1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "scrub/scrub.h"
14 #include "scrub/xfile.h"
15 #include "scrub/xfarray.h"
16 #include "scrub/trace.h"
17 #include <linux/shmem_fs.h>
18
19 /*
20 * Swappable Temporary Memory
21 * ==========================
22 *
23 * Online checking sometimes needs to be able to stage a large amount of data
24 * in memory. This information might not fit in the available memory and it
25 * doesn't all need to be accessible at all times. In other words, we want an
26 * indexed data buffer to store data that can be paged out.
27 *
28 * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
29 * requirements. Therefore, the xfile mechanism uses an unlinked shmem file to
30 * store our staging data. This file is not installed in the file descriptor
31 * table so that user programs cannot access the data, which means that the
32 * xfile must be freed with xfile_destroy.
33 *
34 * xfiles assume that the caller will handle all required concurrency
35 * management; standard vfs locks (freezer and inode) are not taken. Reads
36 * and writes are satisfied directly from the page cache.
37 */
38
39 /*
40 * xfiles must not be exposed to userspace and require upper layers to
41 * coordinate access to the one handle returned by the constructor, so
42 * establish a separate lock class for xfiles to avoid confusing lockdep.
43 */
44 static struct lock_class_key xfile_i_mutex_key;
45
46 /*
47 * Create an xfile of the given size. The description will be used in the
48 * trace output.
49 */
50 int
xfile_create(const char * description,loff_t isize,struct xfile ** xfilep)51 xfile_create(
52 const char *description,
53 loff_t isize,
54 struct xfile **xfilep)
55 {
56 struct inode *inode;
57 struct xfile *xf;
58 int error;
59
60 xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS);
61 if (!xf)
62 return -ENOMEM;
63
64 xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE);
65 if (IS_ERR(xf->file)) {
66 error = PTR_ERR(xf->file);
67 goto out_xfile;
68 }
69
70 inode = file_inode(xf->file);
71 lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key);
72
73 /*
74 * We don't want to bother with kmapping data during repair, so don't
75 * allow highmem pages to back this mapping.
76 */
77 mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
78
79 trace_xfile_create(xf);
80
81 *xfilep = xf;
82 return 0;
83 out_xfile:
84 kfree(xf);
85 return error;
86 }
87
88 /* Close the file and release all resources. */
89 void
xfile_destroy(struct xfile * xf)90 xfile_destroy(
91 struct xfile *xf)
92 {
93 struct inode *inode = file_inode(xf->file);
94
95 trace_xfile_destroy(xf);
96
97 lockdep_set_class(&inode->i_rwsem, &inode->i_sb->s_type->i_mutex_key);
98 fput(xf->file);
99 kfree(xf);
100 }
101
102 /*
103 * Load an object. Since we're treating this file as "memory", any error or
104 * short IO is treated as a failure to allocate memory.
105 */
106 int
xfile_load(struct xfile * xf,void * buf,size_t count,loff_t pos)107 xfile_load(
108 struct xfile *xf,
109 void *buf,
110 size_t count,
111 loff_t pos)
112 {
113 struct inode *inode = file_inode(xf->file);
114 unsigned int pflags;
115
116 if (count > MAX_RW_COUNT)
117 return -ENOMEM;
118 if (inode->i_sb->s_maxbytes - pos < count)
119 return -ENOMEM;
120
121 trace_xfile_load(xf, pos, count);
122
123 pflags = memalloc_nofs_save();
124 while (count > 0) {
125 struct folio *folio;
126 unsigned int len;
127 unsigned int offset;
128
129 if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
130 SGP_READ) < 0)
131 break;
132 if (!folio) {
133 /*
134 * No data stored at this offset, just zero the output
135 * buffer until the next page boundary.
136 */
137 len = min_t(ssize_t, count,
138 PAGE_SIZE - offset_in_page(pos));
139 memset(buf, 0, len);
140 } else {
141 if (filemap_check_wb_err(inode->i_mapping, 0)) {
142 folio_unlock(folio);
143 folio_put(folio);
144 break;
145 }
146
147 offset = offset_in_folio(folio, pos);
148 len = min_t(ssize_t, count, folio_size(folio) - offset);
149 memcpy(buf, folio_address(folio) + offset, len);
150
151 folio_unlock(folio);
152 folio_put(folio);
153 }
154 count -= len;
155 pos += len;
156 buf += len;
157 }
158 memalloc_nofs_restore(pflags);
159
160 if (count)
161 return -ENOMEM;
162 return 0;
163 }
164
165 /*
166 * Store an object. Since we're treating this file as "memory", any error or
167 * short IO is treated as a failure to allocate memory.
168 */
169 int
xfile_store(struct xfile * xf,const void * buf,size_t count,loff_t pos)170 xfile_store(
171 struct xfile *xf,
172 const void *buf,
173 size_t count,
174 loff_t pos)
175 {
176 struct inode *inode = file_inode(xf->file);
177 unsigned int pflags;
178
179 if (count > MAX_RW_COUNT)
180 return -ENOMEM;
181 if (inode->i_sb->s_maxbytes - pos < count)
182 return -ENOMEM;
183
184 trace_xfile_store(xf, pos, count);
185
186 /*
187 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
188 * actually allocates a folio instead of erroring out.
189 */
190 if (pos + count > i_size_read(inode))
191 i_size_write(inode, pos + count);
192
193 pflags = memalloc_nofs_save();
194 while (count > 0) {
195 struct folio *folio;
196 unsigned int len;
197 unsigned int offset;
198
199 if (shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
200 SGP_CACHE) < 0)
201 break;
202 if (filemap_check_wb_err(inode->i_mapping, 0)) {
203 folio_unlock(folio);
204 folio_put(folio);
205 break;
206 }
207
208 offset = offset_in_folio(folio, pos);
209 len = min_t(ssize_t, count, folio_size(folio) - offset);
210 memcpy(folio_address(folio) + offset, buf, len);
211
212 folio_mark_dirty(folio);
213 folio_unlock(folio);
214 folio_put(folio);
215
216 count -= len;
217 pos += len;
218 buf += len;
219 }
220 memalloc_nofs_restore(pflags);
221
222 if (count)
223 return -ENOMEM;
224 return 0;
225 }
226
227 /* Find the next written area in the xfile data for a given offset. */
228 loff_t
xfile_seek_data(struct xfile * xf,loff_t pos)229 xfile_seek_data(
230 struct xfile *xf,
231 loff_t pos)
232 {
233 loff_t ret;
234
235 ret = vfs_llseek(xf->file, pos, SEEK_DATA);
236 trace_xfile_seek_data(xf, pos, ret);
237 return ret;
238 }
239
240 /*
241 * Grab the (locked) folio for a memory object. The object cannot span a folio
242 * boundary. Returns the locked folio if successful, NULL if there was no
243 * folio or it didn't cover the range requested, or an ERR_PTR on failure.
244 */
245 struct folio *
xfile_get_folio(struct xfile * xf,loff_t pos,size_t len,unsigned int flags)246 xfile_get_folio(
247 struct xfile *xf,
248 loff_t pos,
249 size_t len,
250 unsigned int flags)
251 {
252 struct inode *inode = file_inode(xf->file);
253 struct folio *folio = NULL;
254 unsigned int pflags;
255 int error;
256
257 if (inode->i_sb->s_maxbytes - pos < len)
258 return ERR_PTR(-ENOMEM);
259
260 trace_xfile_get_folio(xf, pos, len);
261
262 /*
263 * Increase the file size first so that shmem_get_folio(..., SGP_CACHE),
264 * actually allocates a folio instead of erroring out.
265 */
266 if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode))
267 i_size_write(inode, pos + len);
268
269 pflags = memalloc_nofs_save();
270 error = shmem_get_folio(inode, pos >> PAGE_SHIFT, 0, &folio,
271 (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ);
272 memalloc_nofs_restore(pflags);
273 if (error)
274 return ERR_PTR(error);
275
276 if (!folio)
277 return NULL;
278
279 if (len > folio_size(folio) - offset_in_folio(folio, pos)) {
280 folio_unlock(folio);
281 folio_put(folio);
282 return NULL;
283 }
284
285 if (filemap_check_wb_err(inode->i_mapping, 0)) {
286 folio_unlock(folio);
287 folio_put(folio);
288 return ERR_PTR(-EIO);
289 }
290
291 /*
292 * Mark the folio dirty so that it won't be reclaimed once we drop the
293 * (potentially last) reference in xfile_put_folio.
294 */
295 if (flags & XFILE_ALLOC)
296 folio_mark_dirty(folio);
297 return folio;
298 }
299
300 /*
301 * Release the (locked) folio for a memory object.
302 */
303 void
xfile_put_folio(struct xfile * xf,struct folio * folio)304 xfile_put_folio(
305 struct xfile *xf,
306 struct folio *folio)
307 {
308 trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio));
309
310 folio_unlock(folio);
311 folio_put(folio);
312 }
313
314 /* Discard the page cache that's backing a range of the xfile. */
315 void
xfile_discard(struct xfile * xf,loff_t pos,u64 count)316 xfile_discard(
317 struct xfile *xf,
318 loff_t pos,
319 u64 count)
320 {
321 trace_xfile_discard(xf, pos, count);
322
323 shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
324 }
325