1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_buf.h"
9 #include "xfs_buf_mem.h"
10 #include "xfs_trace.h"
11 #include <linux/shmem_fs.h>
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_buf_item.h"
15 #include "xfs_error.h"
16 
17 /*
18  * Buffer Cache for In-Memory Files
19  * ================================
20  *
21  * Online fsck wants to create ephemeral ordered recordsets.  The existing
22  * btree infrastructure can do this, but we need the buffer cache to target
23  * memory instead of block devices.
24  *
25  * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
26  * requirements.  Therefore, the xmbuf mechanism uses an unlinked shmem file to
27  * store our staging data.  This file is not installed in the file descriptor
28  * table so that user programs cannot access the data, which means that the
29  * xmbuf must be freed with xmbuf_destroy.
30  *
31  * xmbufs assume that the caller will handle all required concurrency
32  * management; standard vfs locks (freezer and inode) are not taken.  Reads
33  * and writes are satisfied directly from the page cache.
34  *
35  * The only supported block size is PAGE_SIZE, and we cannot use highmem.
36  */
37 
38 /*
39  * shmem files used to back an in-memory buffer cache must not be exposed to
40  * userspace.  Upper layers must coordinate access to the one handle returned
41  * by the constructor, so establish a separate lock class for xmbufs to avoid
42  * confusing lockdep.
43  */
44 static struct lock_class_key xmbuf_i_mutex_key;
45 
46 /*
47  * Allocate a buffer cache target for a memory-backed file and set up the
48  * buffer target.
49  */
50 int
xmbuf_alloc(struct xfs_mount * mp,const char * descr,struct xfs_buftarg ** btpp)51 xmbuf_alloc(
52 	struct xfs_mount	*mp,
53 	const char		*descr,
54 	struct xfs_buftarg	**btpp)
55 {
56 	struct file		*file;
57 	struct inode		*inode;
58 	struct xfs_buftarg	*btp;
59 	int			error;
60 
61 	btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
62 	if (!btp)
63 		return -ENOMEM;
64 
65 	file = shmem_kernel_file_setup(descr, 0, 0);
66 	if (IS_ERR(file)) {
67 		error = PTR_ERR(file);
68 		goto out_free_btp;
69 	}
70 	inode = file_inode(file);
71 
72 	/* private file, private locking */
73 	lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
74 
75 	/*
76 	 * We don't want to bother with kmapping data during repair, so don't
77 	 * allow highmem pages to back this mapping.
78 	 */
79 	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
80 
81 	/* ensure all writes are below EOF to avoid pagecache zeroing */
82 	i_size_write(inode, inode->i_sb->s_maxbytes);
83 
84 	error = xfs_buf_cache_init(btp->bt_cache);
85 	if (error)
86 		goto out_file;
87 
88 	/* Initialize buffer target */
89 	btp->bt_mount = mp;
90 	btp->bt_dev = (dev_t)-1U;
91 	btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
92 	btp->bt_file = file;
93 	btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
94 	btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
95 
96 	error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
97 	if (error)
98 		goto out_bcache;
99 
100 	trace_xmbuf_create(btp);
101 
102 	*btpp = btp;
103 	return 0;
104 
105 out_bcache:
106 	xfs_buf_cache_destroy(btp->bt_cache);
107 out_file:
108 	fput(file);
109 out_free_btp:
110 	kfree(btp);
111 	return error;
112 }
113 
114 /* Free a buffer cache target for a memory-backed buffer cache. */
115 void
xmbuf_free(struct xfs_buftarg * btp)116 xmbuf_free(
117 	struct xfs_buftarg	*btp)
118 {
119 	ASSERT(xfs_buftarg_is_mem(btp));
120 	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
121 
122 	trace_xmbuf_free(btp);
123 
124 	xfs_destroy_buftarg(btp);
125 	xfs_buf_cache_destroy(btp->bt_cache);
126 	fput(btp->bt_file);
127 	kfree(btp);
128 }
129 
130 /* Directly map a shmem page into the buffer cache. */
131 int
xmbuf_map_page(struct xfs_buf * bp)132 xmbuf_map_page(
133 	struct xfs_buf		*bp)
134 {
135 	struct inode		*inode = file_inode(bp->b_target->bt_file);
136 	struct folio		*folio = NULL;
137 	struct page		*page;
138 	loff_t                  pos = BBTOB(xfs_buf_daddr(bp));
139 	int			error;
140 
141 	ASSERT(xfs_buftarg_is_mem(bp->b_target));
142 
143 	if (bp->b_map_count != 1)
144 		return -ENOMEM;
145 	if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
146 		return -ENOMEM;
147 	if (offset_in_page(pos) != 0) {
148 		ASSERT(offset_in_page(pos));
149 		return -ENOMEM;
150 	}
151 
152 	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio, SGP_CACHE);
153 	if (error)
154 		return error;
155 
156 	if (filemap_check_wb_err(inode->i_mapping, 0)) {
157 		folio_unlock(folio);
158 		folio_put(folio);
159 		return -EIO;
160 	}
161 
162 	page = folio_file_page(folio, pos >> PAGE_SHIFT);
163 
164 	/*
165 	 * Mark the page dirty so that it won't be reclaimed once we drop the
166 	 * (potentially last) reference in xmbuf_unmap_page.
167 	 */
168 	set_page_dirty(page);
169 	unlock_page(page);
170 
171 	bp->b_addr = page_address(page);
172 	bp->b_pages = bp->b_page_array;
173 	bp->b_pages[0] = page;
174 	bp->b_page_count = 1;
175 	return 0;
176 }
177 
178 /* Unmap a shmem page that was mapped into the buffer cache. */
179 void
xmbuf_unmap_page(struct xfs_buf * bp)180 xmbuf_unmap_page(
181 	struct xfs_buf		*bp)
182 {
183 	struct page		*page = bp->b_pages[0];
184 
185 	ASSERT(xfs_buftarg_is_mem(bp->b_target));
186 
187 	put_page(page);
188 
189 	bp->b_addr = NULL;
190 	bp->b_pages[0] = NULL;
191 	bp->b_pages = NULL;
192 	bp->b_page_count = 0;
193 }
194 
195 /* Is this a valid daddr within the buftarg? */
196 bool
xmbuf_verify_daddr(struct xfs_buftarg * btp,xfs_daddr_t daddr)197 xmbuf_verify_daddr(
198 	struct xfs_buftarg	*btp,
199 	xfs_daddr_t		daddr)
200 {
201 	struct inode		*inode = file_inode(btp->bt_file);
202 
203 	ASSERT(xfs_buftarg_is_mem(btp));
204 
205 	return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
206 }
207 
208 /* Discard the page backing this buffer. */
209 static void
xmbuf_stale(struct xfs_buf * bp)210 xmbuf_stale(
211 	struct xfs_buf		*bp)
212 {
213 	struct inode		*inode = file_inode(bp->b_target->bt_file);
214 	loff_t			pos;
215 
216 	ASSERT(xfs_buftarg_is_mem(bp->b_target));
217 
218 	pos = BBTOB(xfs_buf_daddr(bp));
219 	shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1);
220 }
221 
222 /*
223  * Finalize a buffer -- discard the backing page if it's stale, or run the
224  * write verifier to detect problems.
225  */
226 int
xmbuf_finalize(struct xfs_buf * bp)227 xmbuf_finalize(
228 	struct xfs_buf		*bp)
229 {
230 	xfs_failaddr_t		fa;
231 	int			error = 0;
232 
233 	if (bp->b_flags & XBF_STALE) {
234 		xmbuf_stale(bp);
235 		return 0;
236 	}
237 
238 	/*
239 	 * Although this btree is ephemeral, validate the buffer structure so
240 	 * that we can detect memory corruption errors and software bugs.
241 	 */
242 	fa = bp->b_ops->verify_struct(bp);
243 	if (fa) {
244 		error = -EFSCORRUPTED;
245 		xfs_verifier_error(bp, error, fa);
246 	}
247 
248 	return error;
249 }
250 
251 /*
252  * Detach this xmbuf buffer from the transaction by any means necessary.
253  * All buffers are direct-mapped, so they do not need bwrite.
254  */
255 void
xmbuf_trans_bdetach(struct xfs_trans * tp,struct xfs_buf * bp)256 xmbuf_trans_bdetach(
257 	struct xfs_trans	*tp,
258 	struct xfs_buf		*bp)
259 {
260 	struct xfs_buf_log_item	*bli = bp->b_log_item;
261 
262 	ASSERT(bli != NULL);
263 
264 	bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
265 			    XFS_BLI_LOGGED | XFS_BLI_STALE);
266 	clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);
267 
268 	while (bp->b_log_item != NULL)
269 		xfs_trans_bdetach(tp, bp);
270 }
271