1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "scrub/scrub.h"
14 #include "scrub/xfile.h"
15 #include "scrub/xfarray.h"
16 #include "scrub/trace.h"
17 #include <linux/shmem_fs.h>
18 
19 /*
20  * Swappable Temporary Memory
21  * ==========================
22  *
23  * Online checking sometimes needs to be able to stage a large amount of data
24  * in memory.  This information might not fit in the available memory and it
25  * doesn't all need to be accessible at all times.  In other words, we want an
26  * indexed data buffer to store data that can be paged out.
27  *
28  * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
29  * requirements.  Therefore, the xfile mechanism uses an unlinked shmem file to
30  * store our staging data.  This file is not installed in the file descriptor
31  * table so that user programs cannot access the data, which means that the
32  * xfile must be freed with xfile_destroy.
33  *
34  * xfiles assume that the caller will handle all required concurrency
35  * management; standard vfs locks (freezer and inode) are not taken.  Reads
36  * and writes are satisfied directly from the page cache.
37  */
38 
39 /*
40  * xfiles must not be exposed to userspace and require upper layers to
41  * coordinate access to the one handle returned by the constructor, so
42  * establish a separate lock class for xfiles to avoid confusing lockdep.
43  */
44 static struct lock_class_key xfile_i_mutex_key;
45 
46 /*
47  * Create an xfile of the given size.  The description will be used in the
48  * trace output.
49  */
50 int
xfile_create(const char * description,loff_t isize,struct xfile ** xfilep)51 xfile_create(
52 	const char		*description,
53 	loff_t			isize,
54 	struct xfile		**xfilep)
55 {
56 	struct inode		*inode;
57 	struct xfile		*xf;
58 	int			error;
59 
60 	xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
61 	if (!xf)
62 		return -ENOMEM;
63 
64 	xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
65 	if (IS_ERR(xf->file)) {
66 		error = PTR_ERR(xf->file);
67 		goto out_xfile;
68 	}
69 
70 	inode = file_inode(xf->file);
71 	lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
72 
73 	/*
74 	 * We don't want to bother with kmapping data during repair, so don't
75 	 * allow highmem pages to back this mapping.
76 	 */
77 	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
78 
79 	trace_xfile_create(xf);
80 
81 	*xfilep = xf;
82 	return 0;
83 out_xfile:
84 	kfree(xf);
85 	return error;
86 }
87 
88 /* Close the file and release all resources. */
89 void
xfile_destroy(struct xfile * xf)90 xfile_destroy(
91 	struct xfile		*xf)
92 {
93 	struct inode		*inode = file_inode(xf->file);
94 
95 	trace_xfile_destroy(xf);
96 
97 	lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
98 	fput(xf->file);
99 	kfree(xf);
100 }
101 
102 /*
103  * Load an object.  Since we're treating this file as "memory", any error or
104  * short IO is treated as a failure to allocate memory.
105  */
106 int
xfile_load(struct xfile * xf,void * buf,size_t count,loff_t pos)107 xfile_load(
108 	struct xfile		*xf,
109 	void			*buf,
110 	size_t			count,
111 	loff_t			pos)
112 {
113 	struct inode		*inode = file_inode(xf->file);
114 	unsigned int		pflags;
115 
116 	if (count > MAX_RW_COUNT)
117 		return -ENOMEM;
118 	if (inode->i_sb->s_maxbytes - pos < count)
119 		return -ENOMEM;
120 
121 	trace_xfile_load(xf, pos, count);
122 
123 	pflags = memalloc_nofs_save();
124 	while (count > 0) {
125 		struct folio	*folio;
126 		unsigned int	len;
127 		unsigned int	offset;
128 
129 		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
130 				SGP_READ) < 0)
131 			break;
132 		if (!folio) {
133 			/*
134 			 * No data stored at this offset, just zero the output
135 			 * buffer until the next page boundary.
136 			 */
137 			len = min_t(ssize_t, count,
138 				PAGE_SIZE - offset_in_page(pos));
139 			memset(buf, 0, len);
140 		} else {
141 			if (filemap_check_wb_err(inode->i_mapping, 0)) {
142 				folio_unlock(folio);
143 				folio_put(folio);
144 				break;
145 			}
146 
147 			offset = offset_in_folio(folio, pos);
148 			len = min_t(ssize_t, count, folio_size(folio) - offset);
149 			memcpy(buf, folio_address(folio) + offset, len);
150 
151 			folio_unlock(folio);
152 			folio_put(folio);
153 		}
154 		count -= len;
155 		pos += len;
156 		buf += len;
157 	}
158 	memalloc_nofs_restore(pflags);
159 
160 	if (count)
161 		return -ENOMEM;
162 	return 0;
163 }
164 
165 /*
166  * Store an object.  Since we're treating this file as "memory", any error or
167  * short IO is treated as a failure to allocate memory.
168  */
169 int
xfile_store(struct xfile * xf,const void * buf,size_t count,loff_t pos)170 xfile_store(
171 	struct xfile		*xf,
172 	const void		*buf,
173 	size_t			count,
174 	loff_t			pos)
175 {
176 	struct inode		*inode = file_inode(xf->file);
177 	unsigned int		pflags;
178 
179 	if (count > MAX_RW_COUNT)
180 		return -ENOMEM;
181 	if (inode->i_sb->s_maxbytes - pos < count)
182 		return -ENOMEM;
183 
184 	trace_xfile_store(xf, pos, count);
185 
186 	/*
187 	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
188 	 * actually allocates a folio instead of erroring out.
189 	 */
190 	if (pos + count > i_size_read(inode))
191 		i_size_write(inode, pos + count);
192 
193 	pflags = memalloc_nofs_save();
194 	while (count > 0) {
195 		struct folio	*folio;
196 		unsigned int	len;
197 		unsigned int	offset;
198 
199 		if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
200 				SGP_CACHE) < 0)
201 			break;
202 		if (filemap_check_wb_err(inode->i_mapping, 0)) {
203 			folio_unlock(folio);
204 			folio_put(folio);
205 			break;
206 		}
207 
208 		offset = offset_in_folio(folio, pos);
209 		len = min_t(ssize_t, count, folio_size(folio) - offset);
210 		memcpy(folio_address(folio) + offset, buf, len);
211 
212 		folio_mark_dirty(folio);
213 		folio_unlock(folio);
214 		folio_put(folio);
215 
216 		count -= len;
217 		pos += len;
218 		buf += len;
219 	}
220 	memalloc_nofs_restore(pflags);
221 
222 	if (count)
223 		return -ENOMEM;
224 	return 0;
225 }
226 
227 /* Find the next written area in the xfile data for a given offset. */
228 loff_t
xfile_seek_data(struct xfile * xf,loff_t pos)229 xfile_seek_data(
230 	struct xfile		*xf,
231 	loff_t			pos)
232 {
233 	loff_t			ret;
234 
235 	ret = vfs_llseek(xf->file, pos, SEEK_DATA);
236 	trace_xfile_seek_data(xf, pos, ret);
237 	return ret;
238 }
239 
240 /*
241  * Grab the (locked) folio for a memory object.  The object cannot span a folio
242  * boundary.  Returns the locked folio if successful, NULL if there was no
243  * folio or it didn't cover the range requested, or an ERR_PTR on failure.
244  */
245 struct folio *
xfile_get_folio(struct xfile * xf,loff_t pos,size_t len,unsigned int flags)246 xfile_get_folio(
247 	struct xfile		*xf,
248 	loff_t			pos,
249 	size_t			len,
250 	unsigned int		flags)
251 {
252 	struct inode		*inode = file_inode(xf->file);
253 	struct folio		*folio = NULL;
254 	unsigned int		pflags;
255 	int			error;
256 
257 	if (inode->i_sb->s_maxbytes - pos < len)
258 		return ERR_PTR(-ENOMEM);
259 
260 	trace_xfile_get_folio(xf, pos, len);
261 
262 	/*
263 	 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
264 	 * actually allocates a folio instead of erroring out.
265 	 */
266 	if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
267 		i_size_write(inode, pos + len);
268 
269 	pflags = memalloc_nofs_save();
270 	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
271 			(flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
272 	memalloc_nofs_restore(pflags);
273 	if (error)
274 		return ERR_PTR(error);
275 
276 	if (!folio)
277 		return NULL;
278 
279 	if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
280 		folio_unlock(folio);
281 		folio_put(folio);
282 		return NULL;
283 	}
284 
285 	if (filemap_check_wb_err(inode->i_mapping, 0)) {
286 		folio_unlock(folio);
287 		folio_put(folio);
288 		return ERR_PTR(-EIO);
289 	}
290 
291 	/*
292 	 * Mark the folio dirty so that it won't be reclaimed once we drop the
293 	 * (potentially last) reference in xfile_put_folio.
294 	 */
295 	if (flags & XFILE_ALLOC)
296 		folio_mark_dirty(folio);
297 	return folio;
298 }
299 
300 /*
301  * Release the (locked) folio for a memory object.
302  */
303 void
xfile_put_folio(struct xfile * xf,struct folio * folio)304 xfile_put_folio(
305 	struct xfile		*xf,
306 	struct folio		*folio)
307 {
308 	trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
309 
310 	folio_unlock(folio);
311 	folio_put(folio);
312 }
313 
314 /* Discard the page cache that's backing a range of the xfile. */
315 void
xfile_discard(struct xfile * xf,loff_t pos,u64 count)316 xfile_discard(
317 	struct xfile		*xf,
318 	loff_t			pos,
319 	u64			count)
320 {
321 	trace_xfile_discard(xf, pos, count);
322 
323 	shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
324 }
325