1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * Common helpers for stackable filesystems and backing files.
4   *
5   * Forked from fs/overlayfs/file.c.
6   *
7   * Copyright (C) 2017 Red Hat, Inc.
8   * Copyright (C) 2023 CTERA Networks.
9   */
10  
11  #include <linux/fs.h>
12  #include <linux/backing-file.h>
13  #include <linux/splice.h>
14  #include <linux/mm.h>
15  
16  #include "internal.h"
17  
18  /**
19   * backing_file_open - open a backing file for kernel internal use
20   * @user_path:	path that the user reuqested to open
21   * @flags:	open flags
22   * @real_path:	path of the backing file
23   * @cred:	credentials for open
24   *
25   * Open a backing file for a stackable filesystem (e.g., overlayfs).
26   * @user_path may be on the stackable filesystem and @real_path on the
27   * underlying filesystem.  In this case, we want to be able to return the
28   * @user_path of the stackable filesystem. This is done by embedding the
29   * returned file into a container structure that also stores the stacked
30   * file's path, which can be retrieved using backing_file_user_path().
31   */
backing_file_open(const struct path * user_path,int flags,const struct path * real_path,const struct cred * cred)32  struct file *backing_file_open(const struct path *user_path, int flags,
33  			       const struct path *real_path,
34  			       const struct cred *cred)
35  {
36  	struct file *f;
37  	int error;
38  
39  	f = alloc_empty_backing_file(flags, cred);
40  	if (IS_ERR(f))
41  		return f;
42  
43  	path_get(user_path);
44  	*backing_file_user_path(f) = *user_path;
45  	error = vfs_open(real_path, f);
46  	if (error) {
47  		fput(f);
48  		f = ERR_PTR(error);
49  	}
50  
51  	return f;
52  }
53  EXPORT_SYMBOL_GPL(backing_file_open);
54  
backing_tmpfile_open(const struct path * user_path,int flags,const struct path * real_parentpath,umode_t mode,const struct cred * cred)55  struct file *backing_tmpfile_open(const struct path *user_path, int flags,
56  				  const struct path *real_parentpath,
57  				  umode_t mode, const struct cred *cred)
58  {
59  	struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt);
60  	struct file *f;
61  	int error;
62  
63  	f = alloc_empty_backing_file(flags, cred);
64  	if (IS_ERR(f))
65  		return f;
66  
67  	path_get(user_path);
68  	*backing_file_user_path(f) = *user_path;
69  	error = vfs_tmpfile(real_idmap, real_parentpath, f, mode);
70  	if (error) {
71  		fput(f);
72  		f = ERR_PTR(error);
73  	}
74  	return f;
75  }
76  EXPORT_SYMBOL(backing_tmpfile_open);
77  
78  struct backing_aio {
79  	struct kiocb iocb;
80  	refcount_t ref;
81  	struct kiocb *orig_iocb;
82  	/* used for aio completion */
83  	void (*end_write)(struct file *, loff_t, ssize_t);
84  	struct work_struct work;
85  	long res;
86  };
87  
88  static struct kmem_cache *backing_aio_cachep;
89  
90  #define BACKING_IOCB_MASK \
91  	(IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND)
92  
iocb_to_rw_flags(int flags)93  static rwf_t iocb_to_rw_flags(int flags)
94  {
95  	return (__force rwf_t)(flags & BACKING_IOCB_MASK);
96  }
97  
backing_aio_put(struct backing_aio * aio)98  static void backing_aio_put(struct backing_aio *aio)
99  {
100  	if (refcount_dec_and_test(&aio->ref)) {
101  		fput(aio->iocb.ki_filp);
102  		kmem_cache_free(backing_aio_cachep, aio);
103  	}
104  }
105  
backing_aio_cleanup(struct backing_aio * aio,long res)106  static void backing_aio_cleanup(struct backing_aio *aio, long res)
107  {
108  	struct kiocb *iocb = &aio->iocb;
109  	struct kiocb *orig_iocb = aio->orig_iocb;
110  
111  	if (aio->end_write)
112  		aio->end_write(orig_iocb->ki_filp, iocb->ki_pos, res);
113  
114  	orig_iocb->ki_pos = iocb->ki_pos;
115  	backing_aio_put(aio);
116  }
117  
backing_aio_rw_complete(struct kiocb * iocb,long res)118  static void backing_aio_rw_complete(struct kiocb *iocb, long res)
119  {
120  	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
121  	struct kiocb *orig_iocb = aio->orig_iocb;
122  
123  	if (iocb->ki_flags & IOCB_WRITE)
124  		kiocb_end_write(iocb);
125  
126  	backing_aio_cleanup(aio, res);
127  	orig_iocb->ki_complete(orig_iocb, res);
128  }
129  
backing_aio_complete_work(struct work_struct * work)130  static void backing_aio_complete_work(struct work_struct *work)
131  {
132  	struct backing_aio *aio = container_of(work, struct backing_aio, work);
133  
134  	backing_aio_rw_complete(&aio->iocb, aio->res);
135  }
136  
backing_aio_queue_completion(struct kiocb * iocb,long res)137  static void backing_aio_queue_completion(struct kiocb *iocb, long res)
138  {
139  	struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb);
140  
141  	/*
142  	 * Punt to a work queue to serialize updates of mtime/size.
143  	 */
144  	aio->res = res;
145  	INIT_WORK(&aio->work, backing_aio_complete_work);
146  	queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq,
147  		   &aio->work);
148  }
149  
backing_aio_init_wq(struct kiocb * iocb)150  static int backing_aio_init_wq(struct kiocb *iocb)
151  {
152  	struct super_block *sb = file_inode(iocb->ki_filp)->i_sb;
153  
154  	if (sb->s_dio_done_wq)
155  		return 0;
156  
157  	return sb_init_dio_done_wq(sb);
158  }
159  
160  
backing_file_read_iter(struct file * file,struct iov_iter * iter,struct kiocb * iocb,int flags,struct backing_file_ctx * ctx)161  ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter,
162  			       struct kiocb *iocb, int flags,
163  			       struct backing_file_ctx *ctx)
164  {
165  	struct backing_aio *aio = NULL;
166  	const struct cred *old_cred;
167  	ssize_t ret;
168  
169  	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
170  		return -EIO;
171  
172  	if (!iov_iter_count(iter))
173  		return 0;
174  
175  	if (iocb->ki_flags & IOCB_DIRECT &&
176  	    !(file->f_mode & FMODE_CAN_ODIRECT))
177  		return -EINVAL;
178  
179  	old_cred = override_creds(ctx->cred);
180  	if (is_sync_kiocb(iocb)) {
181  		rwf_t rwf = iocb_to_rw_flags(flags);
182  
183  		ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf);
184  	} else {
185  		ret = -ENOMEM;
186  		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
187  		if (!aio)
188  			goto out;
189  
190  		aio->orig_iocb = iocb;
191  		kiocb_clone(&aio->iocb, iocb, get_file(file));
192  		aio->iocb.ki_complete = backing_aio_rw_complete;
193  		refcount_set(&aio->ref, 2);
194  		ret = vfs_iocb_iter_read(file, &aio->iocb, iter);
195  		backing_aio_put(aio);
196  		if (ret != -EIOCBQUEUED)
197  			backing_aio_cleanup(aio, ret);
198  	}
199  out:
200  	revert_creds(old_cred);
201  
202  	if (ctx->accessed)
203  		ctx->accessed(ctx->user_file);
204  
205  	return ret;
206  }
207  EXPORT_SYMBOL_GPL(backing_file_read_iter);
208  
backing_file_write_iter(struct file * file,struct iov_iter * iter,struct kiocb * iocb,int flags,struct backing_file_ctx * ctx)209  ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter,
210  				struct kiocb *iocb, int flags,
211  				struct backing_file_ctx *ctx)
212  {
213  	const struct cred *old_cred;
214  	ssize_t ret;
215  
216  	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
217  		return -EIO;
218  
219  	if (!iov_iter_count(iter))
220  		return 0;
221  
222  	ret = file_remove_privs(ctx->user_file);
223  	if (ret)
224  		return ret;
225  
226  	if (iocb->ki_flags & IOCB_DIRECT &&
227  	    !(file->f_mode & FMODE_CAN_ODIRECT))
228  		return -EINVAL;
229  
230  	/*
231  	 * Stacked filesystems don't support deferred completions, don't copy
232  	 * this property in case it is set by the issuer.
233  	 */
234  	flags &= ~IOCB_DIO_CALLER_COMP;
235  
236  	old_cred = override_creds(ctx->cred);
237  	if (is_sync_kiocb(iocb)) {
238  		rwf_t rwf = iocb_to_rw_flags(flags);
239  
240  		ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf);
241  		if (ctx->end_write)
242  			ctx->end_write(ctx->user_file, iocb->ki_pos, ret);
243  	} else {
244  		struct backing_aio *aio;
245  
246  		ret = backing_aio_init_wq(iocb);
247  		if (ret)
248  			goto out;
249  
250  		ret = -ENOMEM;
251  		aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL);
252  		if (!aio)
253  			goto out;
254  
255  		aio->orig_iocb = iocb;
256  		aio->end_write = ctx->end_write;
257  		kiocb_clone(&aio->iocb, iocb, get_file(file));
258  		aio->iocb.ki_flags = flags;
259  		aio->iocb.ki_complete = backing_aio_queue_completion;
260  		refcount_set(&aio->ref, 2);
261  		ret = vfs_iocb_iter_write(file, &aio->iocb, iter);
262  		backing_aio_put(aio);
263  		if (ret != -EIOCBQUEUED)
264  			backing_aio_cleanup(aio, ret);
265  	}
266  out:
267  	revert_creds(old_cred);
268  
269  	return ret;
270  }
271  EXPORT_SYMBOL_GPL(backing_file_write_iter);
272  
backing_file_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags,struct backing_file_ctx * ctx)273  ssize_t backing_file_splice_read(struct file *in, loff_t *ppos,
274  				 struct pipe_inode_info *pipe, size_t len,
275  				 unsigned int flags,
276  				 struct backing_file_ctx *ctx)
277  {
278  	const struct cred *old_cred;
279  	ssize_t ret;
280  
281  	if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING)))
282  		return -EIO;
283  
284  	old_cred = override_creds(ctx->cred);
285  	ret = vfs_splice_read(in, ppos, pipe, len, flags);
286  	revert_creds(old_cred);
287  
288  	if (ctx->accessed)
289  		ctx->accessed(ctx->user_file);
290  
291  	return ret;
292  }
293  EXPORT_SYMBOL_GPL(backing_file_splice_read);
294  
backing_file_splice_write(struct pipe_inode_info * pipe,struct file * out,loff_t * ppos,size_t len,unsigned int flags,struct backing_file_ctx * ctx)295  ssize_t backing_file_splice_write(struct pipe_inode_info *pipe,
296  				  struct file *out, loff_t *ppos, size_t len,
297  				  unsigned int flags,
298  				  struct backing_file_ctx *ctx)
299  {
300  	const struct cred *old_cred;
301  	ssize_t ret;
302  
303  	if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING)))
304  		return -EIO;
305  
306  	if (!out->f_op->splice_write)
307  		return -EINVAL;
308  
309  	ret = file_remove_privs(ctx->user_file);
310  	if (ret)
311  		return ret;
312  
313  	old_cred = override_creds(ctx->cred);
314  	file_start_write(out);
315  	ret = out->f_op->splice_write(pipe, out, ppos, len, flags);
316  	file_end_write(out);
317  	revert_creds(old_cred);
318  
319  	if (ctx->end_write)
320  		ctx->end_write(ctx->user_file, ppos ? *ppos : 0, ret);
321  
322  	return ret;
323  }
324  EXPORT_SYMBOL_GPL(backing_file_splice_write);
325  
backing_file_mmap(struct file * file,struct vm_area_struct * vma,struct backing_file_ctx * ctx)326  int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
327  		      struct backing_file_ctx *ctx)
328  {
329  	const struct cred *old_cred;
330  	int ret;
331  
332  	if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) ||
333  	    WARN_ON_ONCE(ctx->user_file != vma->vm_file))
334  		return -EIO;
335  
336  	if (!file->f_op->mmap)
337  		return -ENODEV;
338  
339  	vma_set_file(vma, file);
340  
341  	old_cred = override_creds(ctx->cred);
342  	ret = call_mmap(vma->vm_file, vma);
343  	revert_creds(old_cred);
344  
345  	if (ctx->accessed)
346  		ctx->accessed(ctx->user_file);
347  
348  	return ret;
349  }
350  EXPORT_SYMBOL_GPL(backing_file_mmap);
351  
backing_aio_init(void)352  static int __init backing_aio_init(void)
353  {
354  	backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN);
355  	if (!backing_aio_cachep)
356  		return -ENOMEM;
357  
358  	return 0;
359  }
360  fs_initcall(backing_aio_init);
361