1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Common helpers for stackable filesystems and backing files.
4 *
5 * Forked from fs/overlayfs/file.c.
6 *
7 * Copyright (C) 2017 Red Hat, Inc.
8 * Copyright (C) 2023 CTERA Networks.
9 */
10
11 #include <linux/fs.h>
12 #include <linux/backing-file.h>
13 #include <linux/splice.h>
14 #include <linux/mm.h>
15
16 #include "internal.h"
17
18 /**
19 * backing_file_open - open a backing file for kernel internal use
20 * @user_path: path that the user reuqested to open
21 * @flags: open flags
22 * @real_path: path of the backing file
23 * @cred: credentials for open
24 *
25 * Open a backing file for a stackable filesystem (e.g., overlayfs).
26 * @user_path may be on the stackable filesystem and @real_path on the
27 * underlying filesystem. In this case, we want to be able to return the
28 * @user_path of the stackable filesystem. This is done by embedding the
29 * returned file into a container structure that also stores the stacked
30 * file's path, which can be retrieved using backing_file_user_path().
31 */
backing_file_open(const struct path * user_path,int flags,const struct path * real_path,const struct cred * cred)32 struct file *backing_file_open(const struct path *user_path, int flags,
33 const struct path *real_path,
34 const struct cred *cred)
35 {
36 struct file *f;
37 int error;
38
39 f = alloc_empty_backing_file(flags, cred);
40 if (IS_ERR(f))
41 return f;
42
43 path_get(user_path);
44 *backing_file_user_path(f) = *user_path;
45 error = vfs_open(real_path, f);
46 if (error) {
47 fput(f);
48 f = ERR_PTR(error);
49 }
50
51 return f;
52 }
53 EXPORT_SYMBOL_GPL(backing_file_open);
54
backing_tmpfile_open(const struct path * user_path,int flags,const struct path * real_parentpath,umode_t mode,const struct cred * cred)55 struct file *backing_tmpfile_open(const struct path *user_path, int flags,
56 const struct path *real_parentpath,
57 umode_t mode, const struct cred *cred)
58 {
59 struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
60 struct file *f;
61 int error;
62
63 f = alloc_empty_backing_file(flags, cred);
64 if (IS_ERR(f))
65 return f;
66
67 path_get(user_path);
68 *backing_file_user_path(f) = *user_path;
69 error = vfs_tmpfile(real_idmap, real_parentpath, f, mode);
70 if (error) {
71 fput(f);
72 f = ERR_PTR(error);
73 }
74 return f;
75 }
76 EXPORT_SYMBOL(backing_tmpfile_open);
77
78 struct backing_aio {
79 struct kiocb iocb;
80 refcount_t ref;
81 struct kiocb *orig_iocb;
82 /* used for aio completion */
83 void (*end_write)(struct file *, loff_t, ssize_t);
84 struct work_struct work;
85 long res;
86 };
87
88 static struct kmem_cache *backing_aio_cachep;
89
90 #define BACKING_IOCB_MASK \
91 (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
92
iocb_to_rw_flags(int flags)93 static rwf_t iocb_to_rw_flags(int flags)
94 {
95 return (__force rwf_t)(flags & BACKING_IOCB_MASK);
96 }
97
backing_aio_put(struct backing_aio * aio)98 static void backing_aio_put(struct backing_aio *aio)
99 {
100 if (refcount_dec_and_test(&aio->ref)) {
101 fput(aio->iocb.ki_filp);
102 kmem_cache_free(backing_aio_cachep, aio);
103 }
104 }
105
backing_aio_cleanup(struct backing_aio * aio,long res)106 static void backing_aio_cleanup(struct backing_aio *aio, long res)
107 {
108 struct kiocb *iocb = &aio->iocb;
109 struct kiocb *orig_iocb = aio->orig_iocb;
110
111 if (aio->end_write)
112 aio->end_write(orig_iocb->ki_filp, iocb->ki_pos, res);
113
114 orig_iocb->ki_pos = iocb->ki_pos;
115 backing_aio_put(aio);
116 }
117
backing_aio_rw_complete(struct kiocb * iocb,long res)118 static void backing_aio_rw_complete(struct kiocb *iocb, long res)
119 {
120 struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
121 struct kiocb *orig_iocb = aio->orig_iocb;
122
123 if (iocb->ki_flags & IOCB_WRITE)
124 kiocb_end_write(iocb);
125
126 backing_aio_cleanup(aio, res);
127 orig_iocb->ki_complete(orig_iocb, res);
128 }
129
backing_aio_complete_work(struct work_struct * work)130 static void backing_aio_complete_work(struct work_struct *work)
131 {
132 struct backing_aio *aio = container_of(work, struct backing_aio, work);
133
134 backing_aio_rw_complete(&aio->iocb, aio->res);
135 }
136
backing_aio_queue_completion(struct kiocb * iocb,long res)137 static void backing_aio_queue_completion(struct kiocb *iocb, long res)
138 {
139 struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
140
141 /*
142 * Punt to a work queue to serialize updates of mtime/size.
143 */
144 aio->res = res;
145 INIT_WORK(&aio->work, backing_aio_complete_work);
146 queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
147 &aio->work);
148 }
149
backing_aio_init_wq(struct kiocb * iocb)150 static int backing_aio_init_wq(struct kiocb *iocb)
151 {
152 struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
153
154 if (sb->s_dio_done_wq)
155 return 0;
156
157 return sb_init_dio_done_wq(sb);
158 }
159
160
backing_file_read_iter(struct file * file,struct iov_iter * iter,struct kiocb * iocb,int flags,struct backing_file_ctx * ctx)161 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
162 struct kiocb *iocb, int flags,
163 struct backing_file_ctx *ctx)
164 {
165 struct backing_aio *aio = NULL;
166 const struct cred *old_cred;
167 ssize_t ret;
168
169 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
170 return -EIO;
171
172 if (!iov_iter_count(iter))
173 return 0;
174
175 if (iocb->ki_flags & IOCB_DIRECT &&
176 !(file->f_mode & FMODE_CAN_ODIRECT))
177 return -EINVAL;
178
179 old_cred = override_creds(ctx->cred);
180 if (is_sync_kiocb(iocb)) {
181 rwf_t rwf = iocb_to_rw_flags(flags);
182
183 ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
184 } else {
185 ret = -ENOMEM;
186 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
187 if (!aio)
188 goto out;
189
190 aio->orig_iocb = iocb;
191 kiocb_clone(&aio->iocb, iocb, get_file(file));
192 aio->iocb.ki_complete = backing_aio_rw_complete;
193 refcount_set(&aio->ref, 2);
194 ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
195 backing_aio_put(aio);
196 if (ret != -EIOCBQUEUED)
197 backing_aio_cleanup(aio, ret);
198 }
199 out:
200 revert_creds(old_cred);
201
202 if (ctx->accessed)
203 ctx->accessed(ctx->user_file);
204
205 return ret;
206 }
207 EXPORT_SYMBOL_GPL(backing_file_read_iter);
208
backing_file_write_iter(struct file * file,struct iov_iter * iter,struct kiocb * iocb,int flags,struct backing_file_ctx * ctx)209 ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
210 struct kiocb *iocb, int flags,
211 struct backing_file_ctx *ctx)
212 {
213 const struct cred *old_cred;
214 ssize_t ret;
215
216 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
217 return -EIO;
218
219 if (!iov_iter_count(iter))
220 return 0;
221
222 ret = file_remove_privs(ctx->user_file);
223 if (ret)
224 return ret;
225
226 if (iocb->ki_flags & IOCB_DIRECT &&
227 !(file->f_mode & FMODE_CAN_ODIRECT))
228 return -EINVAL;
229
230 /*
231 * Stacked filesystems don't support deferred completions, don't copy
232 * this property in case it is set by the issuer.
233 */
234 flags &= ~IOCB_DIO_CALLER_COMP;
235
236 old_cred = override_creds(ctx->cred);
237 if (is_sync_kiocb(iocb)) {
238 rwf_t rwf = iocb_to_rw_flags(flags);
239
240 ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
241 if (ctx->end_write)
242 ctx->end_write(ctx->user_file, iocb->ki_pos, ret);
243 } else {
244 struct backing_aio *aio;
245
246 ret = backing_aio_init_wq(iocb);
247 if (ret)
248 goto out;
249
250 ret = -ENOMEM;
251 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
252 if (!aio)
253 goto out;
254
255 aio->orig_iocb = iocb;
256 aio->end_write = ctx->end_write;
257 kiocb_clone(&aio->iocb, iocb, get_file(file));
258 aio->iocb.ki_flags = flags;
259 aio->iocb.ki_complete = backing_aio_queue_completion;
260 refcount_set(&aio->ref, 2);
261 ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
262 backing_aio_put(aio);
263 if (ret != -EIOCBQUEUED)
264 backing_aio_cleanup(aio, ret);
265 }
266 out:
267 revert_creds(old_cred);
268
269 return ret;
270 }
271 EXPORT_SYMBOL_GPL(backing_file_write_iter);
272
backing_file_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags,struct backing_file_ctx * ctx)273 ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
274 struct pipe_inode_info *pipe, size_t len,
275 unsigned int flags,
276 struct backing_file_ctx *ctx)
277 {
278 const struct cred *old_cred;
279 ssize_t ret;
280
281 if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
282 return -EIO;
283
284 old_cred = override_creds(ctx->cred);
285 ret = vfs_splice_read(in, ppos, pipe, len, flags);
286 revert_creds(old_cred);
287
288 if (ctx->accessed)
289 ctx->accessed(ctx->user_file);
290
291 return ret;
292 }
293 EXPORT_SYMBOL_GPL(backing_file_splice_read);
294
backing_file_splice_write(struct pipe_inode_info * pipe,struct file * out,loff_t * ppos,size_t len,unsigned int flags,struct backing_file_ctx * ctx)295 ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
296 struct file *out, loff_t *ppos, size_t len,
297 unsigned int flags,
298 struct backing_file_ctx *ctx)
299 {
300 const struct cred *old_cred;
301 ssize_t ret;
302
303 if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
304 return -EIO;
305
306 if (!out->f_op->splice_write)
307 return -EINVAL;
308
309 ret = file_remove_privs(ctx->user_file);
310 if (ret)
311 return ret;
312
313 old_cred = override_creds(ctx->cred);
314 file_start_write(out);
315 ret = out->f_op->splice_write(pipe, out, ppos, len, flags);
316 file_end_write(out);
317 revert_creds(old_cred);
318
319 if (ctx->end_write)
320 ctx->end_write(ctx->user_file, ppos ? *ppos : 0, ret);
321
322 return ret;
323 }
324 EXPORT_SYMBOL_GPL(backing_file_splice_write);
325
backing_file_mmap(struct file * file,struct vm_area_struct * vma,struct backing_file_ctx * ctx)326 int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
327 struct backing_file_ctx *ctx)
328 {
329 const struct cred *old_cred;
330 int ret;
331
332 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
333 WARN_ON_ONCE(ctx->user_file != vma->vm_file))
334 return -EIO;
335
336 if (!file->f_op->mmap)
337 return -ENODEV;
338
339 vma_set_file(vma, file);
340
341 old_cred = override_creds(ctx->cred);
342 ret = call_mmap(vma->vm_file, vma);
343 revert_creds(old_cred);
344
345 if (ctx->accessed)
346 ctx->accessed(ctx->user_file);
347
348 return ret;
349 }
350 EXPORT_SYMBOL_GPL(backing_file_mmap);
351
backing_aio_init(void)352 static int __init backing_aio_init(void)
353 {
354 backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
355 if (!backing_aio_cachep)
356 return -ENOMEM;
357
358 return 0;
359 }
360 fs_initcall(backing_aio_init);
361