1  /*
2    FUSE: Filesystem in Userspace
3    Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4  
5    This program can be distributed under the terms of the GNU GPL.
6    See the file COPYING.
7  */
8  
9  #include "fuse_i.h"
10  
11  #include <linux/init.h>
12  #include <linux/module.h>
13  #include <linux/poll.h>
14  #include <linux/sched/signal.h>
15  #include <linux/uio.h>
16  #include <linux/miscdevice.h>
17  #include <linux/pagemap.h>
18  #include <linux/file.h>
19  #include <linux/slab.h>
20  #include <linux/pipe_fs_i.h>
21  #include <linux/swap.h>
22  #include <linux/splice.h>
23  #include <linux/sched.h>
24  
25  #define CREATE_TRACE_POINTS
26  #include "fuse_trace.h"
27  
28  MODULE_ALIAS_MISCDEV(FUSE_MINOR);
29  MODULE_ALIAS("devname:fuse");
30  
31  /* Ordinary requests have even IDs, while interrupts IDs are odd */
32  #define FUSE_INT_REQ_BIT (1ULL << 0)
33  #define FUSE_REQ_ID_STEP (1ULL << 1)
34  
35  static struct kmem_cache *fuse_req_cachep;
36  
37  static void end_requests(struct list_head *head);
38  
fuse_get_dev(struct file * file)39  static struct fuse_dev *fuse_get_dev(struct file *file)
40  {
41  	/*
42  	 * Lockless access is OK, because file->private data is set
43  	 * once during mount and is valid until the file is released.
44  	 */
45  	return READ_ONCE(file->private_data);
46  }
47  
fuse_request_init(struct fuse_mount * fm,struct fuse_req * req)48  static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req)
49  {
50  	INIT_LIST_HEAD(&req->list);
51  	INIT_LIST_HEAD(&req->intr_entry);
52  	init_waitqueue_head(&req->waitq);
53  	refcount_set(&req->count, 1);
54  	__set_bit(FR_PENDING, &req->flags);
55  	req->fm = fm;
56  }
57  
fuse_request_alloc(struct fuse_mount * fm,gfp_t flags)58  static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags)
59  {
60  	struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags);
61  	if (req)
62  		fuse_request_init(fm, req);
63  
64  	return req;
65  }
66  
fuse_request_free(struct fuse_req * req)67  static void fuse_request_free(struct fuse_req *req)
68  {
69  	kmem_cache_free(fuse_req_cachep, req);
70  }
71  
__fuse_get_request(struct fuse_req * req)72  static void __fuse_get_request(struct fuse_req *req)
73  {
74  	refcount_inc(&req->count);
75  }
76  
77  /* Must be called with > 1 refcount */
__fuse_put_request(struct fuse_req * req)78  static void __fuse_put_request(struct fuse_req *req)
79  {
80  	refcount_dec(&req->count);
81  }
82  
fuse_set_initialized(struct fuse_conn * fc)83  void fuse_set_initialized(struct fuse_conn *fc)
84  {
85  	/* Make sure stores before this are seen on another CPU */
86  	smp_wmb();
87  	fc->initialized = 1;
88  }
89  
fuse_block_alloc(struct fuse_conn * fc,bool for_background)90  static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
91  {
92  	return !fc->initialized || (for_background && fc->blocked);
93  }
94  
fuse_drop_waiting(struct fuse_conn * fc)95  static void fuse_drop_waiting(struct fuse_conn *fc)
96  {
97  	/*
98  	 * lockess check of fc->connected is okay, because atomic_dec_and_test()
99  	 * provides a memory barrier matched with the one in fuse_wait_aborted()
100  	 * to ensure no wake-up is missed.
101  	 */
102  	if (atomic_dec_and_test(&fc->num_waiting) &&
103  	    !READ_ONCE(fc->connected)) {
104  		/* wake up aborters */
105  		wake_up_all(&fc->blocked_waitq);
106  	}
107  }
108  
109  static void fuse_put_request(struct fuse_req *req);
110  
fuse_get_req(struct mnt_idmap * idmap,struct fuse_mount * fm,bool for_background)111  static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap,
112  				     struct fuse_mount *fm,
113  				     bool for_background)
114  {
115  	struct fuse_conn *fc = fm->fc;
116  	struct fuse_req *req;
117  	bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP);
118  	kuid_t fsuid;
119  	kgid_t fsgid;
120  	int err;
121  
122  	atomic_inc(&fc->num_waiting);
123  
124  	if (fuse_block_alloc(fc, for_background)) {
125  		err = -EINTR;
126  		if (wait_event_killable_exclusive(fc->blocked_waitq,
127  				!fuse_block_alloc(fc, for_background)))
128  			goto out;
129  	}
130  	/* Matches smp_wmb() in fuse_set_initialized() */
131  	smp_rmb();
132  
133  	err = -ENOTCONN;
134  	if (!fc->connected)
135  		goto out;
136  
137  	err = -ECONNREFUSED;
138  	if (fc->conn_error)
139  		goto out;
140  
141  	req = fuse_request_alloc(fm, GFP_KERNEL);
142  	err = -ENOMEM;
143  	if (!req) {
144  		if (for_background)
145  			wake_up(&fc->blocked_waitq);
146  		goto out;
147  	}
148  
149  	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
150  
151  	__set_bit(FR_WAITING, &req->flags);
152  	if (for_background)
153  		__set_bit(FR_BACKGROUND, &req->flags);
154  
155  	/*
156  	 * Keep the old behavior when idmappings support was not
157  	 * declared by a FUSE server.
158  	 *
159  	 * For those FUSE servers who support idmapped mounts,
160  	 * we send UID/GID only along with "inode creation"
161  	 * fuse requests, otherwise idmap == &invalid_mnt_idmap and
162  	 * req->in.h.{u,g}id will be equal to FUSE_INVALID_UIDGID.
163  	 */
164  	fsuid = no_idmap ? current_fsuid() : mapped_fsuid(idmap, fc->user_ns);
165  	fsgid = no_idmap ? current_fsgid() : mapped_fsgid(idmap, fc->user_ns);
166  	req->in.h.uid = from_kuid(fc->user_ns, fsuid);
167  	req->in.h.gid = from_kgid(fc->user_ns, fsgid);
168  
169  	if (no_idmap && unlikely(req->in.h.uid == ((uid_t)-1) ||
170  				 req->in.h.gid == ((gid_t)-1))) {
171  		fuse_put_request(req);
172  		return ERR_PTR(-EOVERFLOW);
173  	}
174  
175  	return req;
176  
177   out:
178  	fuse_drop_waiting(fc);
179  	return ERR_PTR(err);
180  }
181  
fuse_put_request(struct fuse_req * req)182  static void fuse_put_request(struct fuse_req *req)
183  {
184  	struct fuse_conn *fc = req->fm->fc;
185  
186  	if (refcount_dec_and_test(&req->count)) {
187  		if (test_bit(FR_BACKGROUND, &req->flags)) {
188  			/*
189  			 * We get here in the unlikely case that a background
190  			 * request was allocated but not sent
191  			 */
192  			spin_lock(&fc->bg_lock);
193  			if (!fc->blocked)
194  				wake_up(&fc->blocked_waitq);
195  			spin_unlock(&fc->bg_lock);
196  		}
197  
198  		if (test_bit(FR_WAITING, &req->flags)) {
199  			__clear_bit(FR_WAITING, &req->flags);
200  			fuse_drop_waiting(fc);
201  		}
202  
203  		fuse_request_free(req);
204  	}
205  }
206  
fuse_len_args(unsigned int numargs,struct fuse_arg * args)207  unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args)
208  {
209  	unsigned nbytes = 0;
210  	unsigned i;
211  
212  	for (i = 0; i < numargs; i++)
213  		nbytes += args[i].size;
214  
215  	return nbytes;
216  }
217  EXPORT_SYMBOL_GPL(fuse_len_args);
218  
fuse_get_unique_locked(struct fuse_iqueue * fiq)219  static u64 fuse_get_unique_locked(struct fuse_iqueue *fiq)
220  {
221  	fiq->reqctr += FUSE_REQ_ID_STEP;
222  	return fiq->reqctr;
223  }
224  
fuse_get_unique(struct fuse_iqueue * fiq)225  u64 fuse_get_unique(struct fuse_iqueue *fiq)
226  {
227  	u64 ret;
228  
229  	spin_lock(&fiq->lock);
230  	ret = fuse_get_unique_locked(fiq);
231  	spin_unlock(&fiq->lock);
232  
233  	return ret;
234  }
235  EXPORT_SYMBOL_GPL(fuse_get_unique);
236  
fuse_req_hash(u64 unique)237  static unsigned int fuse_req_hash(u64 unique)
238  {
239  	return hash_long(unique & ~FUSE_INT_REQ_BIT, FUSE_PQ_HASH_BITS);
240  }
241  
242  /*
243   * A new request is available, wake fiq->waitq
244   */
fuse_dev_wake_and_unlock(struct fuse_iqueue * fiq)245  static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq)
246  __releases(fiq->lock)
247  {
248  	wake_up(&fiq->waitq);
249  	kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
250  	spin_unlock(&fiq->lock);
251  }
252  
fuse_dev_queue_forget(struct fuse_iqueue * fiq,struct fuse_forget_link * forget)253  static void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget)
254  {
255  	spin_lock(&fiq->lock);
256  	if (fiq->connected) {
257  		fiq->forget_list_tail->next = forget;
258  		fiq->forget_list_tail = forget;
259  		fuse_dev_wake_and_unlock(fiq);
260  	} else {
261  		kfree(forget);
262  		spin_unlock(&fiq->lock);
263  	}
264  }
265  
fuse_dev_queue_interrupt(struct fuse_iqueue * fiq,struct fuse_req * req)266  static void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
267  {
268  	spin_lock(&fiq->lock);
269  	if (list_empty(&req->intr_entry)) {
270  		list_add_tail(&req->intr_entry, &fiq->interrupts);
271  		/*
272  		 * Pairs with smp_mb() implied by test_and_set_bit()
273  		 * from fuse_request_end().
274  		 */
275  		smp_mb();
276  		if (test_bit(FR_FINISHED, &req->flags)) {
277  			list_del_init(&req->intr_entry);
278  			spin_unlock(&fiq->lock);
279  		} else  {
280  			fuse_dev_wake_and_unlock(fiq);
281  		}
282  	} else {
283  		spin_unlock(&fiq->lock);
284  	}
285  }
286  
fuse_dev_queue_req(struct fuse_iqueue * fiq,struct fuse_req * req)287  static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req)
288  {
289  	spin_lock(&fiq->lock);
290  	if (fiq->connected) {
291  		if (req->in.h.opcode != FUSE_NOTIFY_REPLY)
292  			req->in.h.unique = fuse_get_unique_locked(fiq);
293  		list_add_tail(&req->list, &fiq->pending);
294  		fuse_dev_wake_and_unlock(fiq);
295  	} else {
296  		spin_unlock(&fiq->lock);
297  		req->out.h.error = -ENOTCONN;
298  		clear_bit(FR_PENDING, &req->flags);
299  		fuse_request_end(req);
300  	}
301  }
302  
303  const struct fuse_iqueue_ops fuse_dev_fiq_ops = {
304  	.send_forget	= fuse_dev_queue_forget,
305  	.send_interrupt	= fuse_dev_queue_interrupt,
306  	.send_req	= fuse_dev_queue_req,
307  };
308  EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops);
309  
fuse_send_one(struct fuse_iqueue * fiq,struct fuse_req * req)310  static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req)
311  {
312  	req->in.h.len = sizeof(struct fuse_in_header) +
313  		fuse_len_args(req->args->in_numargs,
314  			      (struct fuse_arg *) req->args->in_args);
315  	trace_fuse_request_send(req);
316  	fiq->ops->send_req(fiq, req);
317  }
318  
fuse_queue_forget(struct fuse_conn * fc,struct fuse_forget_link * forget,u64 nodeid,u64 nlookup)319  void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
320  		       u64 nodeid, u64 nlookup)
321  {
322  	struct fuse_iqueue *fiq = &fc->iq;
323  
324  	forget->forget_one.nodeid = nodeid;
325  	forget->forget_one.nlookup = nlookup;
326  
327  	fiq->ops->send_forget(fiq, forget);
328  }
329  
flush_bg_queue(struct fuse_conn * fc)330  static void flush_bg_queue(struct fuse_conn *fc)
331  {
332  	struct fuse_iqueue *fiq = &fc->iq;
333  
334  	while (fc->active_background < fc->max_background &&
335  	       !list_empty(&fc->bg_queue)) {
336  		struct fuse_req *req;
337  
338  		req = list_first_entry(&fc->bg_queue, struct fuse_req, list);
339  		list_del(&req->list);
340  		fc->active_background++;
341  		fuse_send_one(fiq, req);
342  	}
343  }
344  
345  /*
346   * This function is called when a request is finished.  Either a reply
347   * has arrived or it was aborted (and not yet sent) or some error
348   * occurred during communication with userspace, or the device file
349   * was closed.  The requester thread is woken up (if still waiting),
350   * the 'end' callback is called if given, else the reference to the
351   * request is released
352   */
fuse_request_end(struct fuse_req * req)353  void fuse_request_end(struct fuse_req *req)
354  {
355  	struct fuse_mount *fm = req->fm;
356  	struct fuse_conn *fc = fm->fc;
357  	struct fuse_iqueue *fiq = &fc->iq;
358  
359  	if (test_and_set_bit(FR_FINISHED, &req->flags))
360  		goto put_request;
361  
362  	trace_fuse_request_end(req);
363  	/*
364  	 * test_and_set_bit() implies smp_mb() between bit
365  	 * changing and below FR_INTERRUPTED check. Pairs with
366  	 * smp_mb() from queue_interrupt().
367  	 */
368  	if (test_bit(FR_INTERRUPTED, &req->flags)) {
369  		spin_lock(&fiq->lock);
370  		list_del_init(&req->intr_entry);
371  		spin_unlock(&fiq->lock);
372  	}
373  	WARN_ON(test_bit(FR_PENDING, &req->flags));
374  	WARN_ON(test_bit(FR_SENT, &req->flags));
375  	if (test_bit(FR_BACKGROUND, &req->flags)) {
376  		spin_lock(&fc->bg_lock);
377  		clear_bit(FR_BACKGROUND, &req->flags);
378  		if (fc->num_background == fc->max_background) {
379  			fc->blocked = 0;
380  			wake_up(&fc->blocked_waitq);
381  		} else if (!fc->blocked) {
382  			/*
383  			 * Wake up next waiter, if any.  It's okay to use
384  			 * waitqueue_active(), as we've already synced up
385  			 * fc->blocked with waiters with the wake_up() call
386  			 * above.
387  			 */
388  			if (waitqueue_active(&fc->blocked_waitq))
389  				wake_up(&fc->blocked_waitq);
390  		}
391  
392  		fc->num_background--;
393  		fc->active_background--;
394  		flush_bg_queue(fc);
395  		spin_unlock(&fc->bg_lock);
396  	} else {
397  		/* Wake up waiter sleeping in request_wait_answer() */
398  		wake_up(&req->waitq);
399  	}
400  
401  	if (test_bit(FR_ASYNC, &req->flags))
402  		req->args->end(fm, req->args, req->out.h.error);
403  put_request:
404  	fuse_put_request(req);
405  }
406  EXPORT_SYMBOL_GPL(fuse_request_end);
407  
queue_interrupt(struct fuse_req * req)408  static int queue_interrupt(struct fuse_req *req)
409  {
410  	struct fuse_iqueue *fiq = &req->fm->fc->iq;
411  
412  	/* Check for we've sent request to interrupt this req */
413  	if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags)))
414  		return -EINVAL;
415  
416  	fiq->ops->send_interrupt(fiq, req);
417  
418  	return 0;
419  }
420  
request_wait_answer(struct fuse_req * req)421  static void request_wait_answer(struct fuse_req *req)
422  {
423  	struct fuse_conn *fc = req->fm->fc;
424  	struct fuse_iqueue *fiq = &fc->iq;
425  	int err;
426  
427  	if (!fc->no_interrupt) {
428  		/* Any signal may interrupt this */
429  		err = wait_event_interruptible(req->waitq,
430  					test_bit(FR_FINISHED, &req->flags));
431  		if (!err)
432  			return;
433  
434  		set_bit(FR_INTERRUPTED, &req->flags);
435  		/* matches barrier in fuse_dev_do_read() */
436  		smp_mb__after_atomic();
437  		if (test_bit(FR_SENT, &req->flags))
438  			queue_interrupt(req);
439  	}
440  
441  	if (!test_bit(FR_FORCE, &req->flags)) {
442  		/* Only fatal signals may interrupt this */
443  		err = wait_event_killable(req->waitq,
444  					test_bit(FR_FINISHED, &req->flags));
445  		if (!err)
446  			return;
447  
448  		spin_lock(&fiq->lock);
449  		/* Request is not yet in userspace, bail out */
450  		if (test_bit(FR_PENDING, &req->flags)) {
451  			list_del(&req->list);
452  			spin_unlock(&fiq->lock);
453  			__fuse_put_request(req);
454  			req->out.h.error = -EINTR;
455  			return;
456  		}
457  		spin_unlock(&fiq->lock);
458  	}
459  
460  	/*
461  	 * Either request is already in userspace, or it was forced.
462  	 * Wait it out.
463  	 */
464  	wait_event(req->waitq, test_bit(FR_FINISHED, &req->flags));
465  }
466  
__fuse_request_send(struct fuse_req * req)467  static void __fuse_request_send(struct fuse_req *req)
468  {
469  	struct fuse_iqueue *fiq = &req->fm->fc->iq;
470  
471  	BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
472  
473  	/* acquire extra reference, since request is still needed after
474  	   fuse_request_end() */
475  	__fuse_get_request(req);
476  	fuse_send_one(fiq, req);
477  
478  	request_wait_answer(req);
479  	/* Pairs with smp_wmb() in fuse_request_end() */
480  	smp_rmb();
481  }
482  
fuse_adjust_compat(struct fuse_conn * fc,struct fuse_args * args)483  static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
484  {
485  	if (fc->minor < 4 && args->opcode == FUSE_STATFS)
486  		args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE;
487  
488  	if (fc->minor < 9) {
489  		switch (args->opcode) {
490  		case FUSE_LOOKUP:
491  		case FUSE_CREATE:
492  		case FUSE_MKNOD:
493  		case FUSE_MKDIR:
494  		case FUSE_SYMLINK:
495  		case FUSE_LINK:
496  			args->out_args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
497  			break;
498  		case FUSE_GETATTR:
499  		case FUSE_SETATTR:
500  			args->out_args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
501  			break;
502  		}
503  	}
504  	if (fc->minor < 12) {
505  		switch (args->opcode) {
506  		case FUSE_CREATE:
507  			args->in_args[0].size = sizeof(struct fuse_open_in);
508  			break;
509  		case FUSE_MKNOD:
510  			args->in_args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
511  			break;
512  		}
513  	}
514  }
515  
fuse_force_creds(struct fuse_req * req)516  static void fuse_force_creds(struct fuse_req *req)
517  {
518  	struct fuse_conn *fc = req->fm->fc;
519  
520  	if (!req->fm->sb || req->fm->sb->s_iflags & SB_I_NOIDMAP) {
521  		req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid());
522  		req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid());
523  	} else {
524  		req->in.h.uid = FUSE_INVALID_UIDGID;
525  		req->in.h.gid = FUSE_INVALID_UIDGID;
526  	}
527  
528  	req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns);
529  }
530  
fuse_args_to_req(struct fuse_req * req,struct fuse_args * args)531  static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args)
532  {
533  	req->in.h.opcode = args->opcode;
534  	req->in.h.nodeid = args->nodeid;
535  	req->args = args;
536  	if (args->is_ext)
537  		req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8;
538  	if (args->end)
539  		__set_bit(FR_ASYNC, &req->flags);
540  }
541  
__fuse_simple_request(struct mnt_idmap * idmap,struct fuse_mount * fm,struct fuse_args * args)542  ssize_t __fuse_simple_request(struct mnt_idmap *idmap,
543  			      struct fuse_mount *fm,
544  			      struct fuse_args *args)
545  {
546  	struct fuse_conn *fc = fm->fc;
547  	struct fuse_req *req;
548  	ssize_t ret;
549  
550  	if (args->force) {
551  		atomic_inc(&fc->num_waiting);
552  		req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL);
553  
554  		if (!args->nocreds)
555  			fuse_force_creds(req);
556  
557  		__set_bit(FR_WAITING, &req->flags);
558  		__set_bit(FR_FORCE, &req->flags);
559  	} else {
560  		WARN_ON(args->nocreds);
561  		req = fuse_get_req(idmap, fm, false);
562  		if (IS_ERR(req))
563  			return PTR_ERR(req);
564  	}
565  
566  	/* Needs to be done after fuse_get_req() so that fc->minor is valid */
567  	fuse_adjust_compat(fc, args);
568  	fuse_args_to_req(req, args);
569  
570  	if (!args->noreply)
571  		__set_bit(FR_ISREPLY, &req->flags);
572  	__fuse_request_send(req);
573  	ret = req->out.h.error;
574  	if (!ret && args->out_argvar) {
575  		BUG_ON(args->out_numargs == 0);
576  		ret = args->out_args[args->out_numargs - 1].size;
577  	}
578  	fuse_put_request(req);
579  
580  	return ret;
581  }
582  
fuse_request_queue_background(struct fuse_req * req)583  static bool fuse_request_queue_background(struct fuse_req *req)
584  {
585  	struct fuse_mount *fm = req->fm;
586  	struct fuse_conn *fc = fm->fc;
587  	bool queued = false;
588  
589  	WARN_ON(!test_bit(FR_BACKGROUND, &req->flags));
590  	if (!test_bit(FR_WAITING, &req->flags)) {
591  		__set_bit(FR_WAITING, &req->flags);
592  		atomic_inc(&fc->num_waiting);
593  	}
594  	__set_bit(FR_ISREPLY, &req->flags);
595  	spin_lock(&fc->bg_lock);
596  	if (likely(fc->connected)) {
597  		fc->num_background++;
598  		if (fc->num_background == fc->max_background)
599  			fc->blocked = 1;
600  		list_add_tail(&req->list, &fc->bg_queue);
601  		flush_bg_queue(fc);
602  		queued = true;
603  	}
604  	spin_unlock(&fc->bg_lock);
605  
606  	return queued;
607  }
608  
fuse_simple_background(struct fuse_mount * fm,struct fuse_args * args,gfp_t gfp_flags)609  int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args,
610  			    gfp_t gfp_flags)
611  {
612  	struct fuse_req *req;
613  
614  	if (args->force) {
615  		WARN_ON(!args->nocreds);
616  		req = fuse_request_alloc(fm, gfp_flags);
617  		if (!req)
618  			return -ENOMEM;
619  		__set_bit(FR_BACKGROUND, &req->flags);
620  	} else {
621  		WARN_ON(args->nocreds);
622  		req = fuse_get_req(&invalid_mnt_idmap, fm, true);
623  		if (IS_ERR(req))
624  			return PTR_ERR(req);
625  	}
626  
627  	fuse_args_to_req(req, args);
628  
629  	if (!fuse_request_queue_background(req)) {
630  		fuse_put_request(req);
631  		return -ENOTCONN;
632  	}
633  
634  	return 0;
635  }
636  EXPORT_SYMBOL_GPL(fuse_simple_background);
637  
fuse_simple_notify_reply(struct fuse_mount * fm,struct fuse_args * args,u64 unique)638  static int fuse_simple_notify_reply(struct fuse_mount *fm,
639  				    struct fuse_args *args, u64 unique)
640  {
641  	struct fuse_req *req;
642  	struct fuse_iqueue *fiq = &fm->fc->iq;
643  
644  	req = fuse_get_req(&invalid_mnt_idmap, fm, false);
645  	if (IS_ERR(req))
646  		return PTR_ERR(req);
647  
648  	__clear_bit(FR_ISREPLY, &req->flags);
649  	req->in.h.unique = unique;
650  
651  	fuse_args_to_req(req, args);
652  
653  	fuse_send_one(fiq, req);
654  
655  	return 0;
656  }
657  
658  /*
659   * Lock the request.  Up to the next unlock_request() there mustn't be
660   * anything that could cause a page-fault.  If the request was already
661   * aborted bail out.
662   */
lock_request(struct fuse_req * req)663  static int lock_request(struct fuse_req *req)
664  {
665  	int err = 0;
666  	if (req) {
667  		spin_lock(&req->waitq.lock);
668  		if (test_bit(FR_ABORTED, &req->flags))
669  			err = -ENOENT;
670  		else
671  			set_bit(FR_LOCKED, &req->flags);
672  		spin_unlock(&req->waitq.lock);
673  	}
674  	return err;
675  }
676  
677  /*
678   * Unlock request.  If it was aborted while locked, caller is responsible
679   * for unlocking and ending the request.
680   */
unlock_request(struct fuse_req * req)681  static int unlock_request(struct fuse_req *req)
682  {
683  	int err = 0;
684  	if (req) {
685  		spin_lock(&req->waitq.lock);
686  		if (test_bit(FR_ABORTED, &req->flags))
687  			err = -ENOENT;
688  		else
689  			clear_bit(FR_LOCKED, &req->flags);
690  		spin_unlock(&req->waitq.lock);
691  	}
692  	return err;
693  }
694  
695  struct fuse_copy_state {
696  	int write;
697  	struct fuse_req *req;
698  	struct iov_iter *iter;
699  	struct pipe_buffer *pipebufs;
700  	struct pipe_buffer *currbuf;
701  	struct pipe_inode_info *pipe;
702  	unsigned long nr_segs;
703  	struct page *pg;
704  	unsigned len;
705  	unsigned offset;
706  	unsigned move_pages:1;
707  };
708  
fuse_copy_init(struct fuse_copy_state * cs,int write,struct iov_iter * iter)709  static void fuse_copy_init(struct fuse_copy_state *cs, int write,
710  			   struct iov_iter *iter)
711  {
712  	memset(cs, 0, sizeof(*cs));
713  	cs->write = write;
714  	cs->iter = iter;
715  }
716  
717  /* Unmap and put previous page of userspace buffer */
fuse_copy_finish(struct fuse_copy_state * cs)718  static void fuse_copy_finish(struct fuse_copy_state *cs)
719  {
720  	if (cs->currbuf) {
721  		struct pipe_buffer *buf = cs->currbuf;
722  
723  		if (cs->write)
724  			buf->len = PAGE_SIZE - cs->len;
725  		cs->currbuf = NULL;
726  	} else if (cs->pg) {
727  		if (cs->write) {
728  			flush_dcache_page(cs->pg);
729  			set_page_dirty_lock(cs->pg);
730  		}
731  		put_page(cs->pg);
732  	}
733  	cs->pg = NULL;
734  }
735  
736  /*
737   * Get another pagefull of userspace buffer, and map it to kernel
738   * address space, and lock request
739   */
fuse_copy_fill(struct fuse_copy_state * cs)740  static int fuse_copy_fill(struct fuse_copy_state *cs)
741  {
742  	struct page *page;
743  	int err;
744  
745  	err = unlock_request(cs->req);
746  	if (err)
747  		return err;
748  
749  	fuse_copy_finish(cs);
750  	if (cs->pipebufs) {
751  		struct pipe_buffer *buf = cs->pipebufs;
752  
753  		if (!cs->write) {
754  			err = pipe_buf_confirm(cs->pipe, buf);
755  			if (err)
756  				return err;
757  
758  			BUG_ON(!cs->nr_segs);
759  			cs->currbuf = buf;
760  			cs->pg = buf->page;
761  			cs->offset = buf->offset;
762  			cs->len = buf->len;
763  			cs->pipebufs++;
764  			cs->nr_segs--;
765  		} else {
766  			if (cs->nr_segs >= cs->pipe->max_usage)
767  				return -EIO;
768  
769  			page = alloc_page(GFP_HIGHUSER);
770  			if (!page)
771  				return -ENOMEM;
772  
773  			buf->page = page;
774  			buf->offset = 0;
775  			buf->len = 0;
776  
777  			cs->currbuf = buf;
778  			cs->pg = page;
779  			cs->offset = 0;
780  			cs->len = PAGE_SIZE;
781  			cs->pipebufs++;
782  			cs->nr_segs++;
783  		}
784  	} else {
785  		size_t off;
786  		err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off);
787  		if (err < 0)
788  			return err;
789  		BUG_ON(!err);
790  		cs->len = err;
791  		cs->offset = off;
792  		cs->pg = page;
793  	}
794  
795  	return lock_request(cs->req);
796  }
797  
798  /* Do as much copy to/from userspace buffer as we can */
fuse_copy_do(struct fuse_copy_state * cs,void ** val,unsigned * size)799  static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
800  {
801  	unsigned ncpy = min(*size, cs->len);
802  	if (val) {
803  		void *pgaddr = kmap_local_page(cs->pg);
804  		void *buf = pgaddr + cs->offset;
805  
806  		if (cs->write)
807  			memcpy(buf, *val, ncpy);
808  		else
809  			memcpy(*val, buf, ncpy);
810  
811  		kunmap_local(pgaddr);
812  		*val += ncpy;
813  	}
814  	*size -= ncpy;
815  	cs->len -= ncpy;
816  	cs->offset += ncpy;
817  	return ncpy;
818  }
819  
fuse_check_folio(struct folio * folio)820  static int fuse_check_folio(struct folio *folio)
821  {
822  	if (folio_mapped(folio) ||
823  	    folio->mapping != NULL ||
824  	    (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
825  	     ~(1 << PG_locked |
826  	       1 << PG_referenced |
827  	       1 << PG_lru |
828  	       1 << PG_active |
829  	       1 << PG_workingset |
830  	       1 << PG_reclaim |
831  	       1 << PG_waiters |
832  	       LRU_GEN_MASK | LRU_REFS_MASK))) {
833  		dump_page(&folio->page, "fuse: trying to steal weird page");
834  		return 1;
835  	}
836  	return 0;
837  }
838  
fuse_try_move_page(struct fuse_copy_state * cs,struct page ** pagep)839  static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
840  {
841  	int err;
842  	struct folio *oldfolio = page_folio(*pagep);
843  	struct folio *newfolio;
844  	struct pipe_buffer *buf = cs->pipebufs;
845  
846  	folio_get(oldfolio);
847  	err = unlock_request(cs->req);
848  	if (err)
849  		goto out_put_old;
850  
851  	fuse_copy_finish(cs);
852  
853  	err = pipe_buf_confirm(cs->pipe, buf);
854  	if (err)
855  		goto out_put_old;
856  
857  	BUG_ON(!cs->nr_segs);
858  	cs->currbuf = buf;
859  	cs->len = buf->len;
860  	cs->pipebufs++;
861  	cs->nr_segs--;
862  
863  	if (cs->len != PAGE_SIZE)
864  		goto out_fallback;
865  
866  	if (!pipe_buf_try_steal(cs->pipe, buf))
867  		goto out_fallback;
868  
869  	newfolio = page_folio(buf->page);
870  
871  	folio_clear_uptodate(newfolio);
872  	folio_clear_mappedtodisk(newfolio);
873  
874  	if (fuse_check_folio(newfolio) != 0)
875  		goto out_fallback_unlock;
876  
877  	/*
878  	 * This is a new and locked page, it shouldn't be mapped or
879  	 * have any special flags on it
880  	 */
881  	if (WARN_ON(folio_mapped(oldfolio)))
882  		goto out_fallback_unlock;
883  	if (WARN_ON(folio_has_private(oldfolio)))
884  		goto out_fallback_unlock;
885  	if (WARN_ON(folio_test_dirty(oldfolio) ||
886  				folio_test_writeback(oldfolio)))
887  		goto out_fallback_unlock;
888  	if (WARN_ON(folio_test_mlocked(oldfolio)))
889  		goto out_fallback_unlock;
890  
891  	replace_page_cache_folio(oldfolio, newfolio);
892  
893  	folio_get(newfolio);
894  
895  	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
896  		folio_add_lru(newfolio);
897  
898  	/*
899  	 * Release while we have extra ref on stolen page.  Otherwise
900  	 * anon_pipe_buf_release() might think the page can be reused.
901  	 */
902  	pipe_buf_release(cs->pipe, buf);
903  
904  	err = 0;
905  	spin_lock(&cs->req->waitq.lock);
906  	if (test_bit(FR_ABORTED, &cs->req->flags))
907  		err = -ENOENT;
908  	else
909  		*pagep = &newfolio->page;
910  	spin_unlock(&cs->req->waitq.lock);
911  
912  	if (err) {
913  		folio_unlock(newfolio);
914  		folio_put(newfolio);
915  		goto out_put_old;
916  	}
917  
918  	folio_unlock(oldfolio);
919  	/* Drop ref for ap->pages[] array */
920  	folio_put(oldfolio);
921  	cs->len = 0;
922  
923  	err = 0;
924  out_put_old:
925  	/* Drop ref obtained in this function */
926  	folio_put(oldfolio);
927  	return err;
928  
929  out_fallback_unlock:
930  	folio_unlock(newfolio);
931  out_fallback:
932  	cs->pg = buf->page;
933  	cs->offset = buf->offset;
934  
935  	err = lock_request(cs->req);
936  	if (!err)
937  		err = 1;
938  
939  	goto out_put_old;
940  }
941  
fuse_ref_page(struct fuse_copy_state * cs,struct page * page,unsigned offset,unsigned count)942  static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
943  			 unsigned offset, unsigned count)
944  {
945  	struct pipe_buffer *buf;
946  	int err;
947  
948  	if (cs->nr_segs >= cs->pipe->max_usage)
949  		return -EIO;
950  
951  	get_page(page);
952  	err = unlock_request(cs->req);
953  	if (err) {
954  		put_page(page);
955  		return err;
956  	}
957  
958  	fuse_copy_finish(cs);
959  
960  	buf = cs->pipebufs;
961  	buf->page = page;
962  	buf->offset = offset;
963  	buf->len = count;
964  
965  	cs->pipebufs++;
966  	cs->nr_segs++;
967  	cs->len = 0;
968  
969  	return 0;
970  }
971  
972  /*
973   * Copy a page in the request to/from the userspace buffer.  Must be
974   * done atomically
975   */
fuse_copy_page(struct fuse_copy_state * cs,struct page ** pagep,unsigned offset,unsigned count,int zeroing)976  static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
977  			  unsigned offset, unsigned count, int zeroing)
978  {
979  	int err;
980  	struct page *page = *pagep;
981  
982  	if (page && zeroing && count < PAGE_SIZE)
983  		clear_highpage(page);
984  
985  	while (count) {
986  		if (cs->write && cs->pipebufs && page) {
987  			/*
988  			 * Can't control lifetime of pipe buffers, so always
989  			 * copy user pages.
990  			 */
991  			if (cs->req->args->user_pages) {
992  				err = fuse_copy_fill(cs);
993  				if (err)
994  					return err;
995  			} else {
996  				return fuse_ref_page(cs, page, offset, count);
997  			}
998  		} else if (!cs->len) {
999  			if (cs->move_pages && page &&
1000  			    offset == 0 && count == PAGE_SIZE) {
1001  				err = fuse_try_move_page(cs, pagep);
1002  				if (err <= 0)
1003  					return err;
1004  			} else {
1005  				err = fuse_copy_fill(cs);
1006  				if (err)
1007  					return err;
1008  			}
1009  		}
1010  		if (page) {
1011  			void *mapaddr = kmap_local_page(page);
1012  			void *buf = mapaddr + offset;
1013  			offset += fuse_copy_do(cs, &buf, &count);
1014  			kunmap_local(mapaddr);
1015  		} else
1016  			offset += fuse_copy_do(cs, NULL, &count);
1017  	}
1018  	if (page && !cs->write)
1019  		flush_dcache_page(page);
1020  	return 0;
1021  }
1022  
1023  /* Copy pages in the request to/from userspace buffer */
fuse_copy_pages(struct fuse_copy_state * cs,unsigned nbytes,int zeroing)1024  static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
1025  			   int zeroing)
1026  {
1027  	unsigned i;
1028  	struct fuse_req *req = cs->req;
1029  	struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
1030  
1031  
1032  	for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
1033  		int err;
1034  		unsigned int offset = ap->descs[i].offset;
1035  		unsigned int count = min(nbytes, ap->descs[i].length);
1036  
1037  		err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
1038  		if (err)
1039  			return err;
1040  
1041  		nbytes -= count;
1042  	}
1043  	return 0;
1044  }
1045  
1046  /* Copy a single argument in the request to/from userspace buffer */
fuse_copy_one(struct fuse_copy_state * cs,void * val,unsigned size)1047  static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
1048  {
1049  	while (size) {
1050  		if (!cs->len) {
1051  			int err = fuse_copy_fill(cs);
1052  			if (err)
1053  				return err;
1054  		}
1055  		fuse_copy_do(cs, &val, &size);
1056  	}
1057  	return 0;
1058  }
1059  
1060  /* Copy request arguments to/from userspace buffer */
fuse_copy_args(struct fuse_copy_state * cs,unsigned numargs,unsigned argpages,struct fuse_arg * args,int zeroing)1061  static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
1062  			  unsigned argpages, struct fuse_arg *args,
1063  			  int zeroing)
1064  {
1065  	int err = 0;
1066  	unsigned i;
1067  
1068  	for (i = 0; !err && i < numargs; i++)  {
1069  		struct fuse_arg *arg = &args[i];
1070  		if (i == numargs - 1 && argpages)
1071  			err = fuse_copy_pages(cs, arg->size, zeroing);
1072  		else
1073  			err = fuse_copy_one(cs, arg->value, arg->size);
1074  	}
1075  	return err;
1076  }
1077  
forget_pending(struct fuse_iqueue * fiq)1078  static int forget_pending(struct fuse_iqueue *fiq)
1079  {
1080  	return fiq->forget_list_head.next != NULL;
1081  }
1082  
request_pending(struct fuse_iqueue * fiq)1083  static int request_pending(struct fuse_iqueue *fiq)
1084  {
1085  	return !list_empty(&fiq->pending) || !list_empty(&fiq->interrupts) ||
1086  		forget_pending(fiq);
1087  }
1088  
1089  /*
1090   * Transfer an interrupt request to userspace
1091   *
1092   * Unlike other requests this is assembled on demand, without a need
1093   * to allocate a separate fuse_req structure.
1094   *
1095   * Called with fiq->lock held, releases it
1096   */
fuse_read_interrupt(struct fuse_iqueue * fiq,struct fuse_copy_state * cs,size_t nbytes,struct fuse_req * req)1097  static int fuse_read_interrupt(struct fuse_iqueue *fiq,
1098  			       struct fuse_copy_state *cs,
1099  			       size_t nbytes, struct fuse_req *req)
1100  __releases(fiq->lock)
1101  {
1102  	struct fuse_in_header ih;
1103  	struct fuse_interrupt_in arg;
1104  	unsigned reqsize = sizeof(ih) + sizeof(arg);
1105  	int err;
1106  
1107  	list_del_init(&req->intr_entry);
1108  	memset(&ih, 0, sizeof(ih));
1109  	memset(&arg, 0, sizeof(arg));
1110  	ih.len = reqsize;
1111  	ih.opcode = FUSE_INTERRUPT;
1112  	ih.unique = (req->in.h.unique | FUSE_INT_REQ_BIT);
1113  	arg.unique = req->in.h.unique;
1114  
1115  	spin_unlock(&fiq->lock);
1116  	if (nbytes < reqsize)
1117  		return -EINVAL;
1118  
1119  	err = fuse_copy_one(cs, &ih, sizeof(ih));
1120  	if (!err)
1121  		err = fuse_copy_one(cs, &arg, sizeof(arg));
1122  	fuse_copy_finish(cs);
1123  
1124  	return err ? err : reqsize;
1125  }
1126  
fuse_dequeue_forget(struct fuse_iqueue * fiq,unsigned int max,unsigned int * countp)1127  static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq,
1128  						    unsigned int max,
1129  						    unsigned int *countp)
1130  {
1131  	struct fuse_forget_link *head = fiq->forget_list_head.next;
1132  	struct fuse_forget_link **newhead = &head;
1133  	unsigned count;
1134  
1135  	for (count = 0; *newhead != NULL && count < max; count++)
1136  		newhead = &(*newhead)->next;
1137  
1138  	fiq->forget_list_head.next = *newhead;
1139  	*newhead = NULL;
1140  	if (fiq->forget_list_head.next == NULL)
1141  		fiq->forget_list_tail = &fiq->forget_list_head;
1142  
1143  	if (countp != NULL)
1144  		*countp = count;
1145  
1146  	return head;
1147  }
1148  
fuse_read_single_forget(struct fuse_iqueue * fiq,struct fuse_copy_state * cs,size_t nbytes)1149  static int fuse_read_single_forget(struct fuse_iqueue *fiq,
1150  				   struct fuse_copy_state *cs,
1151  				   size_t nbytes)
1152  __releases(fiq->lock)
1153  {
1154  	int err;
1155  	struct fuse_forget_link *forget = fuse_dequeue_forget(fiq, 1, NULL);
1156  	struct fuse_forget_in arg = {
1157  		.nlookup = forget->forget_one.nlookup,
1158  	};
1159  	struct fuse_in_header ih = {
1160  		.opcode = FUSE_FORGET,
1161  		.nodeid = forget->forget_one.nodeid,
1162  		.unique = fuse_get_unique_locked(fiq),
1163  		.len = sizeof(ih) + sizeof(arg),
1164  	};
1165  
1166  	spin_unlock(&fiq->lock);
1167  	kfree(forget);
1168  	if (nbytes < ih.len)
1169  		return -EINVAL;
1170  
1171  	err = fuse_copy_one(cs, &ih, sizeof(ih));
1172  	if (!err)
1173  		err = fuse_copy_one(cs, &arg, sizeof(arg));
1174  	fuse_copy_finish(cs);
1175  
1176  	if (err)
1177  		return err;
1178  
1179  	return ih.len;
1180  }
1181  
fuse_read_batch_forget(struct fuse_iqueue * fiq,struct fuse_copy_state * cs,size_t nbytes)1182  static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
1183  				   struct fuse_copy_state *cs, size_t nbytes)
1184  __releases(fiq->lock)
1185  {
1186  	int err;
1187  	unsigned max_forgets;
1188  	unsigned count;
1189  	struct fuse_forget_link *head;
1190  	struct fuse_batch_forget_in arg = { .count = 0 };
1191  	struct fuse_in_header ih = {
1192  		.opcode = FUSE_BATCH_FORGET,
1193  		.unique = fuse_get_unique_locked(fiq),
1194  		.len = sizeof(ih) + sizeof(arg),
1195  	};
1196  
1197  	if (nbytes < ih.len) {
1198  		spin_unlock(&fiq->lock);
1199  		return -EINVAL;
1200  	}
1201  
1202  	max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
1203  	head = fuse_dequeue_forget(fiq, max_forgets, &count);
1204  	spin_unlock(&fiq->lock);
1205  
1206  	arg.count = count;
1207  	ih.len += count * sizeof(struct fuse_forget_one);
1208  	err = fuse_copy_one(cs, &ih, sizeof(ih));
1209  	if (!err)
1210  		err = fuse_copy_one(cs, &arg, sizeof(arg));
1211  
1212  	while (head) {
1213  		struct fuse_forget_link *forget = head;
1214  
1215  		if (!err) {
1216  			err = fuse_copy_one(cs, &forget->forget_one,
1217  					    sizeof(forget->forget_one));
1218  		}
1219  		head = forget->next;
1220  		kfree(forget);
1221  	}
1222  
1223  	fuse_copy_finish(cs);
1224  
1225  	if (err)
1226  		return err;
1227  
1228  	return ih.len;
1229  }
1230  
fuse_read_forget(struct fuse_conn * fc,struct fuse_iqueue * fiq,struct fuse_copy_state * cs,size_t nbytes)1231  static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
1232  			    struct fuse_copy_state *cs,
1233  			    size_t nbytes)
1234  __releases(fiq->lock)
1235  {
1236  	if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
1237  		return fuse_read_single_forget(fiq, cs, nbytes);
1238  	else
1239  		return fuse_read_batch_forget(fiq, cs, nbytes);
1240  }
1241  
1242  /*
1243   * Read a single request into the userspace filesystem's buffer.  This
1244   * function waits until a request is available, then removes it from
1245   * the pending list and copies request data to userspace buffer.  If
1246   * no reply is needed (FORGET) or request has been aborted or there
1247   * was an error during the copying then it's finished by calling
1248   * fuse_request_end().  Otherwise add it to the processing list, and set
1249   * the 'sent' flag.
1250   */
fuse_dev_do_read(struct fuse_dev * fud,struct file * file,struct fuse_copy_state * cs,size_t nbytes)1251  static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
1252  				struct fuse_copy_state *cs, size_t nbytes)
1253  {
1254  	ssize_t err;
1255  	struct fuse_conn *fc = fud->fc;
1256  	struct fuse_iqueue *fiq = &fc->iq;
1257  	struct fuse_pqueue *fpq = &fud->pq;
1258  	struct fuse_req *req;
1259  	struct fuse_args *args;
1260  	unsigned reqsize;
1261  	unsigned int hash;
1262  
1263  	/*
1264  	 * Require sane minimum read buffer - that has capacity for fixed part
1265  	 * of any request header + negotiated max_write room for data.
1266  	 *
1267  	 * Historically libfuse reserves 4K for fixed header room, but e.g.
1268  	 * GlusterFS reserves only 80 bytes
1269  	 *
1270  	 *	= `sizeof(fuse_in_header) + sizeof(fuse_write_in)`
1271  	 *
1272  	 * which is the absolute minimum any sane filesystem should be using
1273  	 * for header room.
1274  	 */
1275  	if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER,
1276  			   sizeof(struct fuse_in_header) +
1277  			   sizeof(struct fuse_write_in) +
1278  			   fc->max_write))
1279  		return -EINVAL;
1280  
1281   restart:
1282  	for (;;) {
1283  		spin_lock(&fiq->lock);
1284  		if (!fiq->connected || request_pending(fiq))
1285  			break;
1286  		spin_unlock(&fiq->lock);
1287  
1288  		if (file->f_flags & O_NONBLOCK)
1289  			return -EAGAIN;
1290  		err = wait_event_interruptible_exclusive(fiq->waitq,
1291  				!fiq->connected || request_pending(fiq));
1292  		if (err)
1293  			return err;
1294  	}
1295  
1296  	if (!fiq->connected) {
1297  		err = fc->aborted ? -ECONNABORTED : -ENODEV;
1298  		goto err_unlock;
1299  	}
1300  
1301  	if (!list_empty(&fiq->interrupts)) {
1302  		req = list_entry(fiq->interrupts.next, struct fuse_req,
1303  				 intr_entry);
1304  		return fuse_read_interrupt(fiq, cs, nbytes, req);
1305  	}
1306  
1307  	if (forget_pending(fiq)) {
1308  		if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0)
1309  			return fuse_read_forget(fc, fiq, cs, nbytes);
1310  
1311  		if (fiq->forget_batch <= -8)
1312  			fiq->forget_batch = 16;
1313  	}
1314  
1315  	req = list_entry(fiq->pending.next, struct fuse_req, list);
1316  	clear_bit(FR_PENDING, &req->flags);
1317  	list_del_init(&req->list);
1318  	spin_unlock(&fiq->lock);
1319  
1320  	args = req->args;
1321  	reqsize = req->in.h.len;
1322  
1323  	/* If request is too large, reply with an error and restart the read */
1324  	if (nbytes < reqsize) {
1325  		req->out.h.error = -EIO;
1326  		/* SETXATTR is special, since it may contain too large data */
1327  		if (args->opcode == FUSE_SETXATTR)
1328  			req->out.h.error = -E2BIG;
1329  		fuse_request_end(req);
1330  		goto restart;
1331  	}
1332  	spin_lock(&fpq->lock);
1333  	/*
1334  	 *  Must not put request on fpq->io queue after having been shut down by
1335  	 *  fuse_abort_conn()
1336  	 */
1337  	if (!fpq->connected) {
1338  		req->out.h.error = err = -ECONNABORTED;
1339  		goto out_end;
1340  
1341  	}
1342  	list_add(&req->list, &fpq->io);
1343  	spin_unlock(&fpq->lock);
1344  	cs->req = req;
1345  	err = fuse_copy_one(cs, &req->in.h, sizeof(req->in.h));
1346  	if (!err)
1347  		err = fuse_copy_args(cs, args->in_numargs, args->in_pages,
1348  				     (struct fuse_arg *) args->in_args, 0);
1349  	fuse_copy_finish(cs);
1350  	spin_lock(&fpq->lock);
1351  	clear_bit(FR_LOCKED, &req->flags);
1352  	if (!fpq->connected) {
1353  		err = fc->aborted ? -ECONNABORTED : -ENODEV;
1354  		goto out_end;
1355  	}
1356  	if (err) {
1357  		req->out.h.error = -EIO;
1358  		goto out_end;
1359  	}
1360  	if (!test_bit(FR_ISREPLY, &req->flags)) {
1361  		err = reqsize;
1362  		goto out_end;
1363  	}
1364  	hash = fuse_req_hash(req->in.h.unique);
1365  	list_move_tail(&req->list, &fpq->processing[hash]);
1366  	__fuse_get_request(req);
1367  	set_bit(FR_SENT, &req->flags);
1368  	spin_unlock(&fpq->lock);
1369  	/* matches barrier in request_wait_answer() */
1370  	smp_mb__after_atomic();
1371  	if (test_bit(FR_INTERRUPTED, &req->flags))
1372  		queue_interrupt(req);
1373  	fuse_put_request(req);
1374  
1375  	return reqsize;
1376  
1377  out_end:
1378  	if (!test_bit(FR_PRIVATE, &req->flags))
1379  		list_del_init(&req->list);
1380  	spin_unlock(&fpq->lock);
1381  	fuse_request_end(req);
1382  	return err;
1383  
1384   err_unlock:
1385  	spin_unlock(&fiq->lock);
1386  	return err;
1387  }
1388  
fuse_dev_open(struct inode * inode,struct file * file)1389  static int fuse_dev_open(struct inode *inode, struct file *file)
1390  {
1391  	/*
1392  	 * The fuse device's file's private_data is used to hold
1393  	 * the fuse_conn(ection) when it is mounted, and is used to
1394  	 * keep track of whether the file has been mounted already.
1395  	 */
1396  	file->private_data = NULL;
1397  	return 0;
1398  }
1399  
fuse_dev_read(struct kiocb * iocb,struct iov_iter * to)1400  static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
1401  {
1402  	struct fuse_copy_state cs;
1403  	struct file *file = iocb->ki_filp;
1404  	struct fuse_dev *fud = fuse_get_dev(file);
1405  
1406  	if (!fud)
1407  		return -EPERM;
1408  
1409  	if (!user_backed_iter(to))
1410  		return -EINVAL;
1411  
1412  	fuse_copy_init(&cs, 1, to);
1413  
1414  	return fuse_dev_do_read(fud, file, &cs, iov_iter_count(to));
1415  }
1416  
fuse_dev_splice_read(struct file * in,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)1417  static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
1418  				    struct pipe_inode_info *pipe,
1419  				    size_t len, unsigned int flags)
1420  {
1421  	int total, ret;
1422  	int page_nr = 0;
1423  	struct pipe_buffer *bufs;
1424  	struct fuse_copy_state cs;
1425  	struct fuse_dev *fud = fuse_get_dev(in);
1426  
1427  	if (!fud)
1428  		return -EPERM;
1429  
1430  	bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer),
1431  			      GFP_KERNEL);
1432  	if (!bufs)
1433  		return -ENOMEM;
1434  
1435  	fuse_copy_init(&cs, 1, NULL);
1436  	cs.pipebufs = bufs;
1437  	cs.pipe = pipe;
1438  	ret = fuse_dev_do_read(fud, in, &cs, len);
1439  	if (ret < 0)
1440  		goto out;
1441  
1442  	if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) {
1443  		ret = -EIO;
1444  		goto out;
1445  	}
1446  
1447  	for (ret = total = 0; page_nr < cs.nr_segs; total += ret) {
1448  		/*
1449  		 * Need to be careful about this.  Having buf->ops in module
1450  		 * code can Oops if the buffer persists after module unload.
1451  		 */
1452  		bufs[page_nr].ops = &nosteal_pipe_buf_ops;
1453  		bufs[page_nr].flags = 0;
1454  		ret = add_to_pipe(pipe, &bufs[page_nr++]);
1455  		if (unlikely(ret < 0))
1456  			break;
1457  	}
1458  	if (total)
1459  		ret = total;
1460  out:
1461  	for (; page_nr < cs.nr_segs; page_nr++)
1462  		put_page(bufs[page_nr].page);
1463  
1464  	kvfree(bufs);
1465  	return ret;
1466  }
1467  
fuse_notify_poll(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1468  static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
1469  			    struct fuse_copy_state *cs)
1470  {
1471  	struct fuse_notify_poll_wakeup_out outarg;
1472  	int err = -EINVAL;
1473  
1474  	if (size != sizeof(outarg))
1475  		goto err;
1476  
1477  	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1478  	if (err)
1479  		goto err;
1480  
1481  	fuse_copy_finish(cs);
1482  	return fuse_notify_poll_wakeup(fc, &outarg);
1483  
1484  err:
1485  	fuse_copy_finish(cs);
1486  	return err;
1487  }
1488  
fuse_notify_inval_inode(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1489  static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
1490  				   struct fuse_copy_state *cs)
1491  {
1492  	struct fuse_notify_inval_inode_out outarg;
1493  	int err = -EINVAL;
1494  
1495  	if (size != sizeof(outarg))
1496  		goto err;
1497  
1498  	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1499  	if (err)
1500  		goto err;
1501  	fuse_copy_finish(cs);
1502  
1503  	down_read(&fc->killsb);
1504  	err = fuse_reverse_inval_inode(fc, outarg.ino,
1505  				       outarg.off, outarg.len);
1506  	up_read(&fc->killsb);
1507  	return err;
1508  
1509  err:
1510  	fuse_copy_finish(cs);
1511  	return err;
1512  }
1513  
fuse_notify_inval_entry(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1514  static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
1515  				   struct fuse_copy_state *cs)
1516  {
1517  	struct fuse_notify_inval_entry_out outarg;
1518  	int err = -ENOMEM;
1519  	char *buf;
1520  	struct qstr name;
1521  
1522  	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1523  	if (!buf)
1524  		goto err;
1525  
1526  	err = -EINVAL;
1527  	if (size < sizeof(outarg))
1528  		goto err;
1529  
1530  	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1531  	if (err)
1532  		goto err;
1533  
1534  	err = -ENAMETOOLONG;
1535  	if (outarg.namelen > FUSE_NAME_MAX)
1536  		goto err;
1537  
1538  	err = -EINVAL;
1539  	if (size != sizeof(outarg) + outarg.namelen + 1)
1540  		goto err;
1541  
1542  	name.name = buf;
1543  	name.len = outarg.namelen;
1544  	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1545  	if (err)
1546  		goto err;
1547  	fuse_copy_finish(cs);
1548  	buf[outarg.namelen] = 0;
1549  
1550  	down_read(&fc->killsb);
1551  	err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags);
1552  	up_read(&fc->killsb);
1553  	kfree(buf);
1554  	return err;
1555  
1556  err:
1557  	kfree(buf);
1558  	fuse_copy_finish(cs);
1559  	return err;
1560  }
1561  
fuse_notify_delete(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1562  static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size,
1563  			      struct fuse_copy_state *cs)
1564  {
1565  	struct fuse_notify_delete_out outarg;
1566  	int err = -ENOMEM;
1567  	char *buf;
1568  	struct qstr name;
1569  
1570  	buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
1571  	if (!buf)
1572  		goto err;
1573  
1574  	err = -EINVAL;
1575  	if (size < sizeof(outarg))
1576  		goto err;
1577  
1578  	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1579  	if (err)
1580  		goto err;
1581  
1582  	err = -ENAMETOOLONG;
1583  	if (outarg.namelen > FUSE_NAME_MAX)
1584  		goto err;
1585  
1586  	err = -EINVAL;
1587  	if (size != sizeof(outarg) + outarg.namelen + 1)
1588  		goto err;
1589  
1590  	name.name = buf;
1591  	name.len = outarg.namelen;
1592  	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
1593  	if (err)
1594  		goto err;
1595  	fuse_copy_finish(cs);
1596  	buf[outarg.namelen] = 0;
1597  
1598  	down_read(&fc->killsb);
1599  	err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0);
1600  	up_read(&fc->killsb);
1601  	kfree(buf);
1602  	return err;
1603  
1604  err:
1605  	kfree(buf);
1606  	fuse_copy_finish(cs);
1607  	return err;
1608  }
1609  
fuse_notify_store(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1610  static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
1611  			     struct fuse_copy_state *cs)
1612  {
1613  	struct fuse_notify_store_out outarg;
1614  	struct inode *inode;
1615  	struct address_space *mapping;
1616  	u64 nodeid;
1617  	int err;
1618  	pgoff_t index;
1619  	unsigned int offset;
1620  	unsigned int num;
1621  	loff_t file_size;
1622  	loff_t end;
1623  
1624  	err = -EINVAL;
1625  	if (size < sizeof(outarg))
1626  		goto out_finish;
1627  
1628  	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1629  	if (err)
1630  		goto out_finish;
1631  
1632  	err = -EINVAL;
1633  	if (size - sizeof(outarg) != outarg.size)
1634  		goto out_finish;
1635  
1636  	nodeid = outarg.nodeid;
1637  
1638  	down_read(&fc->killsb);
1639  
1640  	err = -ENOENT;
1641  	inode = fuse_ilookup(fc, nodeid,  NULL);
1642  	if (!inode)
1643  		goto out_up_killsb;
1644  
1645  	mapping = inode->i_mapping;
1646  	index = outarg.offset >> PAGE_SHIFT;
1647  	offset = outarg.offset & ~PAGE_MASK;
1648  	file_size = i_size_read(inode);
1649  	end = outarg.offset + outarg.size;
1650  	if (end > file_size) {
1651  		file_size = end;
1652  		fuse_write_update_attr(inode, file_size, outarg.size);
1653  	}
1654  
1655  	num = outarg.size;
1656  	while (num) {
1657  		struct page *page;
1658  		unsigned int this_num;
1659  
1660  		err = -ENOMEM;
1661  		page = find_or_create_page(mapping, index,
1662  					   mapping_gfp_mask(mapping));
1663  		if (!page)
1664  			goto out_iput;
1665  
1666  		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1667  		err = fuse_copy_page(cs, &page, offset, this_num, 0);
1668  		if (!PageUptodate(page) && !err && offset == 0 &&
1669  		    (this_num == PAGE_SIZE || file_size == end)) {
1670  			zero_user_segment(page, this_num, PAGE_SIZE);
1671  			SetPageUptodate(page);
1672  		}
1673  		unlock_page(page);
1674  		put_page(page);
1675  
1676  		if (err)
1677  			goto out_iput;
1678  
1679  		num -= this_num;
1680  		offset = 0;
1681  		index++;
1682  	}
1683  
1684  	err = 0;
1685  
1686  out_iput:
1687  	iput(inode);
1688  out_up_killsb:
1689  	up_read(&fc->killsb);
1690  out_finish:
1691  	fuse_copy_finish(cs);
1692  	return err;
1693  }
1694  
1695  struct fuse_retrieve_args {
1696  	struct fuse_args_pages ap;
1697  	struct fuse_notify_retrieve_in inarg;
1698  };
1699  
fuse_retrieve_end(struct fuse_mount * fm,struct fuse_args * args,int error)1700  static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
1701  			      int error)
1702  {
1703  	struct fuse_retrieve_args *ra =
1704  		container_of(args, typeof(*ra), ap.args);
1705  
1706  	release_pages(ra->ap.pages, ra->ap.num_pages);
1707  	kfree(ra);
1708  }
1709  
fuse_retrieve(struct fuse_mount * fm,struct inode * inode,struct fuse_notify_retrieve_out * outarg)1710  static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
1711  			 struct fuse_notify_retrieve_out *outarg)
1712  {
1713  	int err;
1714  	struct address_space *mapping = inode->i_mapping;
1715  	pgoff_t index;
1716  	loff_t file_size;
1717  	unsigned int num;
1718  	unsigned int offset;
1719  	size_t total_len = 0;
1720  	unsigned int num_pages;
1721  	struct fuse_conn *fc = fm->fc;
1722  	struct fuse_retrieve_args *ra;
1723  	size_t args_size = sizeof(*ra);
1724  	struct fuse_args_pages *ap;
1725  	struct fuse_args *args;
1726  
1727  	offset = outarg->offset & ~PAGE_MASK;
1728  	file_size = i_size_read(inode);
1729  
1730  	num = min(outarg->size, fc->max_write);
1731  	if (outarg->offset > file_size)
1732  		num = 0;
1733  	else if (outarg->offset + num > file_size)
1734  		num = file_size - outarg->offset;
1735  
1736  	num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
1737  	num_pages = min(num_pages, fc->max_pages);
1738  
1739  	args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
1740  
1741  	ra = kzalloc(args_size, GFP_KERNEL);
1742  	if (!ra)
1743  		return -ENOMEM;
1744  
1745  	ap = &ra->ap;
1746  	ap->pages = (void *) (ra + 1);
1747  	ap->descs = (void *) (ap->pages + num_pages);
1748  
1749  	args = &ap->args;
1750  	args->nodeid = outarg->nodeid;
1751  	args->opcode = FUSE_NOTIFY_REPLY;
1752  	args->in_numargs = 2;
1753  	args->in_pages = true;
1754  	args->end = fuse_retrieve_end;
1755  
1756  	index = outarg->offset >> PAGE_SHIFT;
1757  
1758  	while (num && ap->num_pages < num_pages) {
1759  		struct page *page;
1760  		unsigned int this_num;
1761  
1762  		page = find_get_page(mapping, index);
1763  		if (!page)
1764  			break;
1765  
1766  		this_num = min_t(unsigned, num, PAGE_SIZE - offset);
1767  		ap->pages[ap->num_pages] = page;
1768  		ap->descs[ap->num_pages].offset = offset;
1769  		ap->descs[ap->num_pages].length = this_num;
1770  		ap->num_pages++;
1771  
1772  		offset = 0;
1773  		num -= this_num;
1774  		total_len += this_num;
1775  		index++;
1776  	}
1777  	ra->inarg.offset = outarg->offset;
1778  	ra->inarg.size = total_len;
1779  	args->in_args[0].size = sizeof(ra->inarg);
1780  	args->in_args[0].value = &ra->inarg;
1781  	args->in_args[1].size = total_len;
1782  
1783  	err = fuse_simple_notify_reply(fm, args, outarg->notify_unique);
1784  	if (err)
1785  		fuse_retrieve_end(fm, args, err);
1786  
1787  	return err;
1788  }
1789  
fuse_notify_retrieve(struct fuse_conn * fc,unsigned int size,struct fuse_copy_state * cs)1790  static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size,
1791  				struct fuse_copy_state *cs)
1792  {
1793  	struct fuse_notify_retrieve_out outarg;
1794  	struct fuse_mount *fm;
1795  	struct inode *inode;
1796  	u64 nodeid;
1797  	int err;
1798  
1799  	err = -EINVAL;
1800  	if (size != sizeof(outarg))
1801  		goto copy_finish;
1802  
1803  	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
1804  	if (err)
1805  		goto copy_finish;
1806  
1807  	fuse_copy_finish(cs);
1808  
1809  	down_read(&fc->killsb);
1810  	err = -ENOENT;
1811  	nodeid = outarg.nodeid;
1812  
1813  	inode = fuse_ilookup(fc, nodeid, &fm);
1814  	if (inode) {
1815  		err = fuse_retrieve(fm, inode, &outarg);
1816  		iput(inode);
1817  	}
1818  	up_read(&fc->killsb);
1819  
1820  	return err;
1821  
1822  copy_finish:
1823  	fuse_copy_finish(cs);
1824  	return err;
1825  }
1826  
1827  /*
1828   * Resending all processing queue requests.
1829   *
1830   * During a FUSE daemon panics and failover, it is possible for some inflight
1831   * requests to be lost and never returned. As a result, applications awaiting
1832   * replies would become stuck forever. To address this, we can use notification
1833   * to trigger resending of these pending requests to the FUSE daemon, ensuring
1834   * they are properly processed again.
1835   *
1836   * Please note that this strategy is applicable only to idempotent requests or
1837   * if the FUSE daemon takes careful measures to avoid processing duplicated
1838   * non-idempotent requests.
1839   */
fuse_resend(struct fuse_conn * fc)1840  static void fuse_resend(struct fuse_conn *fc)
1841  {
1842  	struct fuse_dev *fud;
1843  	struct fuse_req *req, *next;
1844  	struct fuse_iqueue *fiq = &fc->iq;
1845  	LIST_HEAD(to_queue);
1846  	unsigned int i;
1847  
1848  	spin_lock(&fc->lock);
1849  	if (!fc->connected) {
1850  		spin_unlock(&fc->lock);
1851  		return;
1852  	}
1853  
1854  	list_for_each_entry(fud, &fc->devices, entry) {
1855  		struct fuse_pqueue *fpq = &fud->pq;
1856  
1857  		spin_lock(&fpq->lock);
1858  		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
1859  			list_splice_tail_init(&fpq->processing[i], &to_queue);
1860  		spin_unlock(&fpq->lock);
1861  	}
1862  	spin_unlock(&fc->lock);
1863  
1864  	list_for_each_entry_safe(req, next, &to_queue, list) {
1865  		set_bit(FR_PENDING, &req->flags);
1866  		clear_bit(FR_SENT, &req->flags);
1867  		/* mark the request as resend request */
1868  		req->in.h.unique |= FUSE_UNIQUE_RESEND;
1869  	}
1870  
1871  	spin_lock(&fiq->lock);
1872  	if (!fiq->connected) {
1873  		spin_unlock(&fiq->lock);
1874  		list_for_each_entry(req, &to_queue, list)
1875  			clear_bit(FR_PENDING, &req->flags);
1876  		end_requests(&to_queue);
1877  		return;
1878  	}
1879  	/* iq and pq requests are both oldest to newest */
1880  	list_splice(&to_queue, &fiq->pending);
1881  	fuse_dev_wake_and_unlock(fiq);
1882  }
1883  
fuse_notify_resend(struct fuse_conn * fc)1884  static int fuse_notify_resend(struct fuse_conn *fc)
1885  {
1886  	fuse_resend(fc);
1887  	return 0;
1888  }
1889  
fuse_notify(struct fuse_conn * fc,enum fuse_notify_code code,unsigned int size,struct fuse_copy_state * cs)1890  static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
1891  		       unsigned int size, struct fuse_copy_state *cs)
1892  {
1893  	/* Don't try to move pages (yet) */
1894  	cs->move_pages = 0;
1895  
1896  	switch (code) {
1897  	case FUSE_NOTIFY_POLL:
1898  		return fuse_notify_poll(fc, size, cs);
1899  
1900  	case FUSE_NOTIFY_INVAL_INODE:
1901  		return fuse_notify_inval_inode(fc, size, cs);
1902  
1903  	case FUSE_NOTIFY_INVAL_ENTRY:
1904  		return fuse_notify_inval_entry(fc, size, cs);
1905  
1906  	case FUSE_NOTIFY_STORE:
1907  		return fuse_notify_store(fc, size, cs);
1908  
1909  	case FUSE_NOTIFY_RETRIEVE:
1910  		return fuse_notify_retrieve(fc, size, cs);
1911  
1912  	case FUSE_NOTIFY_DELETE:
1913  		return fuse_notify_delete(fc, size, cs);
1914  
1915  	case FUSE_NOTIFY_RESEND:
1916  		return fuse_notify_resend(fc);
1917  
1918  	default:
1919  		fuse_copy_finish(cs);
1920  		return -EINVAL;
1921  	}
1922  }
1923  
1924  /* Look up request on processing list by unique ID */
request_find(struct fuse_pqueue * fpq,u64 unique)1925  static struct fuse_req *request_find(struct fuse_pqueue *fpq, u64 unique)
1926  {
1927  	unsigned int hash = fuse_req_hash(unique);
1928  	struct fuse_req *req;
1929  
1930  	list_for_each_entry(req, &fpq->processing[hash], list) {
1931  		if (req->in.h.unique == unique)
1932  			return req;
1933  	}
1934  	return NULL;
1935  }
1936  
copy_out_args(struct fuse_copy_state * cs,struct fuse_args * args,unsigned nbytes)1937  static int copy_out_args(struct fuse_copy_state *cs, struct fuse_args *args,
1938  			 unsigned nbytes)
1939  {
1940  	unsigned reqsize = sizeof(struct fuse_out_header);
1941  
1942  	reqsize += fuse_len_args(args->out_numargs, args->out_args);
1943  
1944  	if (reqsize < nbytes || (reqsize > nbytes && !args->out_argvar))
1945  		return -EINVAL;
1946  	else if (reqsize > nbytes) {
1947  		struct fuse_arg *lastarg = &args->out_args[args->out_numargs-1];
1948  		unsigned diffsize = reqsize - nbytes;
1949  
1950  		if (diffsize > lastarg->size)
1951  			return -EINVAL;
1952  		lastarg->size -= diffsize;
1953  	}
1954  	return fuse_copy_args(cs, args->out_numargs, args->out_pages,
1955  			      args->out_args, args->page_zeroing);
1956  }
1957  
1958  /*
1959   * Write a single reply to a request.  First the header is copied from
1960   * the write buffer.  The request is then searched on the processing
1961   * list by the unique ID found in the header.  If found, then remove
1962   * it from the list and copy the rest of the buffer to the request.
1963   * The request is finished by calling fuse_request_end().
1964   */
fuse_dev_do_write(struct fuse_dev * fud,struct fuse_copy_state * cs,size_t nbytes)1965  static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
1966  				 struct fuse_copy_state *cs, size_t nbytes)
1967  {
1968  	int err;
1969  	struct fuse_conn *fc = fud->fc;
1970  	struct fuse_pqueue *fpq = &fud->pq;
1971  	struct fuse_req *req;
1972  	struct fuse_out_header oh;
1973  
1974  	err = -EINVAL;
1975  	if (nbytes < sizeof(struct fuse_out_header))
1976  		goto out;
1977  
1978  	err = fuse_copy_one(cs, &oh, sizeof(oh));
1979  	if (err)
1980  		goto copy_finish;
1981  
1982  	err = -EINVAL;
1983  	if (oh.len != nbytes)
1984  		goto copy_finish;
1985  
1986  	/*
1987  	 * Zero oh.unique indicates unsolicited notification message
1988  	 * and error contains notification code.
1989  	 */
1990  	if (!oh.unique) {
1991  		err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs);
1992  		goto out;
1993  	}
1994  
1995  	err = -EINVAL;
1996  	if (oh.error <= -512 || oh.error > 0)
1997  		goto copy_finish;
1998  
1999  	spin_lock(&fpq->lock);
2000  	req = NULL;
2001  	if (fpq->connected)
2002  		req = request_find(fpq, oh.unique & ~FUSE_INT_REQ_BIT);
2003  
2004  	err = -ENOENT;
2005  	if (!req) {
2006  		spin_unlock(&fpq->lock);
2007  		goto copy_finish;
2008  	}
2009  
2010  	/* Is it an interrupt reply ID? */
2011  	if (oh.unique & FUSE_INT_REQ_BIT) {
2012  		__fuse_get_request(req);
2013  		spin_unlock(&fpq->lock);
2014  
2015  		err = 0;
2016  		if (nbytes != sizeof(struct fuse_out_header))
2017  			err = -EINVAL;
2018  		else if (oh.error == -ENOSYS)
2019  			fc->no_interrupt = 1;
2020  		else if (oh.error == -EAGAIN)
2021  			err = queue_interrupt(req);
2022  
2023  		fuse_put_request(req);
2024  
2025  		goto copy_finish;
2026  	}
2027  
2028  	clear_bit(FR_SENT, &req->flags);
2029  	list_move(&req->list, &fpq->io);
2030  	req->out.h = oh;
2031  	set_bit(FR_LOCKED, &req->flags);
2032  	spin_unlock(&fpq->lock);
2033  	cs->req = req;
2034  	if (!req->args->page_replace)
2035  		cs->move_pages = 0;
2036  
2037  	if (oh.error)
2038  		err = nbytes != sizeof(oh) ? -EINVAL : 0;
2039  	else
2040  		err = copy_out_args(cs, req->args, nbytes);
2041  	fuse_copy_finish(cs);
2042  
2043  	spin_lock(&fpq->lock);
2044  	clear_bit(FR_LOCKED, &req->flags);
2045  	if (!fpq->connected)
2046  		err = -ENOENT;
2047  	else if (err)
2048  		req->out.h.error = -EIO;
2049  	if (!test_bit(FR_PRIVATE, &req->flags))
2050  		list_del_init(&req->list);
2051  	spin_unlock(&fpq->lock);
2052  
2053  	fuse_request_end(req);
2054  out:
2055  	return err ? err : nbytes;
2056  
2057  copy_finish:
2058  	fuse_copy_finish(cs);
2059  	goto out;
2060  }
2061  
fuse_dev_write(struct kiocb * iocb,struct iov_iter * from)2062  static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
2063  {
2064  	struct fuse_copy_state cs;
2065  	struct fuse_dev *fud = fuse_get_dev(iocb->ki_filp);
2066  
2067  	if (!fud)
2068  		return -EPERM;
2069  
2070  	if (!user_backed_iter(from))
2071  		return -EINVAL;
2072  
2073  	fuse_copy_init(&cs, 0, from);
2074  
2075  	return fuse_dev_do_write(fud, &cs, iov_iter_count(from));
2076  }
2077  
fuse_dev_splice_write(struct pipe_inode_info * pipe,struct file * out,loff_t * ppos,size_t len,unsigned int flags)2078  static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
2079  				     struct file *out, loff_t *ppos,
2080  				     size_t len, unsigned int flags)
2081  {
2082  	unsigned int head, tail, mask, count;
2083  	unsigned nbuf;
2084  	unsigned idx;
2085  	struct pipe_buffer *bufs;
2086  	struct fuse_copy_state cs;
2087  	struct fuse_dev *fud;
2088  	size_t rem;
2089  	ssize_t ret;
2090  
2091  	fud = fuse_get_dev(out);
2092  	if (!fud)
2093  		return -EPERM;
2094  
2095  	pipe_lock(pipe);
2096  
2097  	head = pipe->head;
2098  	tail = pipe->tail;
2099  	mask = pipe->ring_size - 1;
2100  	count = head - tail;
2101  
2102  	bufs = kvmalloc_array(count, sizeof(struct pipe_buffer), GFP_KERNEL);
2103  	if (!bufs) {
2104  		pipe_unlock(pipe);
2105  		return -ENOMEM;
2106  	}
2107  
2108  	nbuf = 0;
2109  	rem = 0;
2110  	for (idx = tail; idx != head && rem < len; idx++)
2111  		rem += pipe->bufs[idx & mask].len;
2112  
2113  	ret = -EINVAL;
2114  	if (rem < len)
2115  		goto out_free;
2116  
2117  	rem = len;
2118  	while (rem) {
2119  		struct pipe_buffer *ibuf;
2120  		struct pipe_buffer *obuf;
2121  
2122  		if (WARN_ON(nbuf >= count || tail == head))
2123  			goto out_free;
2124  
2125  		ibuf = &pipe->bufs[tail & mask];
2126  		obuf = &bufs[nbuf];
2127  
2128  		if (rem >= ibuf->len) {
2129  			*obuf = *ibuf;
2130  			ibuf->ops = NULL;
2131  			tail++;
2132  			pipe->tail = tail;
2133  		} else {
2134  			if (!pipe_buf_get(pipe, ibuf))
2135  				goto out_free;
2136  
2137  			*obuf = *ibuf;
2138  			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
2139  			obuf->len = rem;
2140  			ibuf->offset += obuf->len;
2141  			ibuf->len -= obuf->len;
2142  		}
2143  		nbuf++;
2144  		rem -= obuf->len;
2145  	}
2146  	pipe_unlock(pipe);
2147  
2148  	fuse_copy_init(&cs, 0, NULL);
2149  	cs.pipebufs = bufs;
2150  	cs.nr_segs = nbuf;
2151  	cs.pipe = pipe;
2152  
2153  	if (flags & SPLICE_F_MOVE)
2154  		cs.move_pages = 1;
2155  
2156  	ret = fuse_dev_do_write(fud, &cs, len);
2157  
2158  	pipe_lock(pipe);
2159  out_free:
2160  	for (idx = 0; idx < nbuf; idx++) {
2161  		struct pipe_buffer *buf = &bufs[idx];
2162  
2163  		if (buf->ops)
2164  			pipe_buf_release(pipe, buf);
2165  	}
2166  	pipe_unlock(pipe);
2167  
2168  	kvfree(bufs);
2169  	return ret;
2170  }
2171  
fuse_dev_poll(struct file * file,poll_table * wait)2172  static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
2173  {
2174  	__poll_t mask = EPOLLOUT | EPOLLWRNORM;
2175  	struct fuse_iqueue *fiq;
2176  	struct fuse_dev *fud = fuse_get_dev(file);
2177  
2178  	if (!fud)
2179  		return EPOLLERR;
2180  
2181  	fiq = &fud->fc->iq;
2182  	poll_wait(file, &fiq->waitq, wait);
2183  
2184  	spin_lock(&fiq->lock);
2185  	if (!fiq->connected)
2186  		mask = EPOLLERR;
2187  	else if (request_pending(fiq))
2188  		mask |= EPOLLIN | EPOLLRDNORM;
2189  	spin_unlock(&fiq->lock);
2190  
2191  	return mask;
2192  }
2193  
2194  /* Abort all requests on the given list (pending or processing) */
end_requests(struct list_head * head)2195  static void end_requests(struct list_head *head)
2196  {
2197  	while (!list_empty(head)) {
2198  		struct fuse_req *req;
2199  		req = list_entry(head->next, struct fuse_req, list);
2200  		req->out.h.error = -ECONNABORTED;
2201  		clear_bit(FR_SENT, &req->flags);
2202  		list_del_init(&req->list);
2203  		fuse_request_end(req);
2204  	}
2205  }
2206  
end_polls(struct fuse_conn * fc)2207  static void end_polls(struct fuse_conn *fc)
2208  {
2209  	struct rb_node *p;
2210  
2211  	p = rb_first(&fc->polled_files);
2212  
2213  	while (p) {
2214  		struct fuse_file *ff;
2215  		ff = rb_entry(p, struct fuse_file, polled_node);
2216  		wake_up_interruptible_all(&ff->poll_wait);
2217  
2218  		p = rb_next(p);
2219  	}
2220  }
2221  
2222  /*
2223   * Abort all requests.
2224   *
2225   * Emergency exit in case of a malicious or accidental deadlock, or just a hung
2226   * filesystem.
2227   *
2228   * The same effect is usually achievable through killing the filesystem daemon
2229   * and all users of the filesystem.  The exception is the combination of an
2230   * asynchronous request and the tricky deadlock (see
2231   * Documentation/filesystems/fuse.rst).
2232   *
2233   * Aborting requests under I/O goes as follows: 1: Separate out unlocked
2234   * requests, they should be finished off immediately.  Locked requests will be
2235   * finished after unlock; see unlock_request(). 2: Finish off the unlocked
2236   * requests.  It is possible that some request will finish before we can.  This
2237   * is OK, the request will in that case be removed from the list before we touch
2238   * it.
2239   */
fuse_abort_conn(struct fuse_conn * fc)2240  void fuse_abort_conn(struct fuse_conn *fc)
2241  {
2242  	struct fuse_iqueue *fiq = &fc->iq;
2243  
2244  	spin_lock(&fc->lock);
2245  	if (fc->connected) {
2246  		struct fuse_dev *fud;
2247  		struct fuse_req *req, *next;
2248  		LIST_HEAD(to_end);
2249  		unsigned int i;
2250  
2251  		/* Background queuing checks fc->connected under bg_lock */
2252  		spin_lock(&fc->bg_lock);
2253  		fc->connected = 0;
2254  		spin_unlock(&fc->bg_lock);
2255  
2256  		fuse_set_initialized(fc);
2257  		list_for_each_entry(fud, &fc->devices, entry) {
2258  			struct fuse_pqueue *fpq = &fud->pq;
2259  
2260  			spin_lock(&fpq->lock);
2261  			fpq->connected = 0;
2262  			list_for_each_entry_safe(req, next, &fpq->io, list) {
2263  				req->out.h.error = -ECONNABORTED;
2264  				spin_lock(&req->waitq.lock);
2265  				set_bit(FR_ABORTED, &req->flags);
2266  				if (!test_bit(FR_LOCKED, &req->flags)) {
2267  					set_bit(FR_PRIVATE, &req->flags);
2268  					__fuse_get_request(req);
2269  					list_move(&req->list, &to_end);
2270  				}
2271  				spin_unlock(&req->waitq.lock);
2272  			}
2273  			for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2274  				list_splice_tail_init(&fpq->processing[i],
2275  						      &to_end);
2276  			spin_unlock(&fpq->lock);
2277  		}
2278  		spin_lock(&fc->bg_lock);
2279  		fc->blocked = 0;
2280  		fc->max_background = UINT_MAX;
2281  		flush_bg_queue(fc);
2282  		spin_unlock(&fc->bg_lock);
2283  
2284  		spin_lock(&fiq->lock);
2285  		fiq->connected = 0;
2286  		list_for_each_entry(req, &fiq->pending, list)
2287  			clear_bit(FR_PENDING, &req->flags);
2288  		list_splice_tail_init(&fiq->pending, &to_end);
2289  		while (forget_pending(fiq))
2290  			kfree(fuse_dequeue_forget(fiq, 1, NULL));
2291  		wake_up_all(&fiq->waitq);
2292  		spin_unlock(&fiq->lock);
2293  		kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
2294  		end_polls(fc);
2295  		wake_up_all(&fc->blocked_waitq);
2296  		spin_unlock(&fc->lock);
2297  
2298  		end_requests(&to_end);
2299  	} else {
2300  		spin_unlock(&fc->lock);
2301  	}
2302  }
2303  EXPORT_SYMBOL_GPL(fuse_abort_conn);
2304  
fuse_wait_aborted(struct fuse_conn * fc)2305  void fuse_wait_aborted(struct fuse_conn *fc)
2306  {
2307  	/* matches implicit memory barrier in fuse_drop_waiting() */
2308  	smp_mb();
2309  	wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
2310  }
2311  
fuse_dev_release(struct inode * inode,struct file * file)2312  int fuse_dev_release(struct inode *inode, struct file *file)
2313  {
2314  	struct fuse_dev *fud = fuse_get_dev(file);
2315  
2316  	if (fud) {
2317  		struct fuse_conn *fc = fud->fc;
2318  		struct fuse_pqueue *fpq = &fud->pq;
2319  		LIST_HEAD(to_end);
2320  		unsigned int i;
2321  
2322  		spin_lock(&fpq->lock);
2323  		WARN_ON(!list_empty(&fpq->io));
2324  		for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
2325  			list_splice_init(&fpq->processing[i], &to_end);
2326  		spin_unlock(&fpq->lock);
2327  
2328  		end_requests(&to_end);
2329  
2330  		/* Are we the last open device? */
2331  		if (atomic_dec_and_test(&fc->dev_count)) {
2332  			WARN_ON(fc->iq.fasync != NULL);
2333  			fuse_abort_conn(fc);
2334  		}
2335  		fuse_dev_free(fud);
2336  	}
2337  	return 0;
2338  }
2339  EXPORT_SYMBOL_GPL(fuse_dev_release);
2340  
fuse_dev_fasync(int fd,struct file * file,int on)2341  static int fuse_dev_fasync(int fd, struct file *file, int on)
2342  {
2343  	struct fuse_dev *fud = fuse_get_dev(file);
2344  
2345  	if (!fud)
2346  		return -EPERM;
2347  
2348  	/* No locking - fasync_helper does its own locking */
2349  	return fasync_helper(fd, file, on, &fud->fc->iq.fasync);
2350  }
2351  
fuse_device_clone(struct fuse_conn * fc,struct file * new)2352  static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
2353  {
2354  	struct fuse_dev *fud;
2355  
2356  	if (new->private_data)
2357  		return -EINVAL;
2358  
2359  	fud = fuse_dev_alloc_install(fc);
2360  	if (!fud)
2361  		return -ENOMEM;
2362  
2363  	new->private_data = fud;
2364  	atomic_inc(&fc->dev_count);
2365  
2366  	return 0;
2367  }
2368  
fuse_dev_ioctl_clone(struct file * file,__u32 __user * argp)2369  static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp)
2370  {
2371  	int res;
2372  	int oldfd;
2373  	struct fuse_dev *fud = NULL;
2374  	struct fd f;
2375  
2376  	if (get_user(oldfd, argp))
2377  		return -EFAULT;
2378  
2379  	f = fdget(oldfd);
2380  	if (!fd_file(f))
2381  		return -EINVAL;
2382  
2383  	/*
2384  	 * Check against file->f_op because CUSE
2385  	 * uses the same ioctl handler.
2386  	 */
2387  	if (fd_file(f)->f_op == file->f_op)
2388  		fud = fuse_get_dev(fd_file(f));
2389  
2390  	res = -EINVAL;
2391  	if (fud) {
2392  		mutex_lock(&fuse_mutex);
2393  		res = fuse_device_clone(fud->fc, file);
2394  		mutex_unlock(&fuse_mutex);
2395  	}
2396  
2397  	fdput(f);
2398  	return res;
2399  }
2400  
fuse_dev_ioctl_backing_open(struct file * file,struct fuse_backing_map __user * argp)2401  static long fuse_dev_ioctl_backing_open(struct file *file,
2402  					struct fuse_backing_map __user *argp)
2403  {
2404  	struct fuse_dev *fud = fuse_get_dev(file);
2405  	struct fuse_backing_map map;
2406  
2407  	if (!fud)
2408  		return -EPERM;
2409  
2410  	if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2411  		return -EOPNOTSUPP;
2412  
2413  	if (copy_from_user(&map, argp, sizeof(map)))
2414  		return -EFAULT;
2415  
2416  	return fuse_backing_open(fud->fc, &map);
2417  }
2418  
fuse_dev_ioctl_backing_close(struct file * file,__u32 __user * argp)2419  static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp)
2420  {
2421  	struct fuse_dev *fud = fuse_get_dev(file);
2422  	int backing_id;
2423  
2424  	if (!fud)
2425  		return -EPERM;
2426  
2427  	if (!IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
2428  		return -EOPNOTSUPP;
2429  
2430  	if (get_user(backing_id, argp))
2431  		return -EFAULT;
2432  
2433  	return fuse_backing_close(fud->fc, backing_id);
2434  }
2435  
fuse_dev_ioctl(struct file * file,unsigned int cmd,unsigned long arg)2436  static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
2437  			   unsigned long arg)
2438  {
2439  	void __user *argp = (void __user *)arg;
2440  
2441  	switch (cmd) {
2442  	case FUSE_DEV_IOC_CLONE:
2443  		return fuse_dev_ioctl_clone(file, argp);
2444  
2445  	case FUSE_DEV_IOC_BACKING_OPEN:
2446  		return fuse_dev_ioctl_backing_open(file, argp);
2447  
2448  	case FUSE_DEV_IOC_BACKING_CLOSE:
2449  		return fuse_dev_ioctl_backing_close(file, argp);
2450  
2451  	default:
2452  		return -ENOTTY;
2453  	}
2454  }
2455  
2456  const struct file_operations fuse_dev_operations = {
2457  	.owner		= THIS_MODULE,
2458  	.open		= fuse_dev_open,
2459  	.read_iter	= fuse_dev_read,
2460  	.splice_read	= fuse_dev_splice_read,
2461  	.write_iter	= fuse_dev_write,
2462  	.splice_write	= fuse_dev_splice_write,
2463  	.poll		= fuse_dev_poll,
2464  	.release	= fuse_dev_release,
2465  	.fasync		= fuse_dev_fasync,
2466  	.unlocked_ioctl = fuse_dev_ioctl,
2467  	.compat_ioctl   = compat_ptr_ioctl,
2468  };
2469  EXPORT_SYMBOL_GPL(fuse_dev_operations);
2470  
2471  static struct miscdevice fuse_miscdevice = {
2472  	.minor = FUSE_MINOR,
2473  	.name  = "fuse",
2474  	.fops = &fuse_dev_operations,
2475  };
2476  
fuse_dev_init(void)2477  int __init fuse_dev_init(void)
2478  {
2479  	int err = -ENOMEM;
2480  	fuse_req_cachep = kmem_cache_create("fuse_request",
2481  					    sizeof(struct fuse_req),
2482  					    0, 0, NULL);
2483  	if (!fuse_req_cachep)
2484  		goto out;
2485  
2486  	err = misc_register(&fuse_miscdevice);
2487  	if (err)
2488  		goto out_cache_clean;
2489  
2490  	return 0;
2491  
2492   out_cache_clean:
2493  	kmem_cache_destroy(fuse_req_cachep);
2494   out:
2495  	return err;
2496  }
2497  
fuse_dev_cleanup(void)2498  void fuse_dev_cleanup(void)
2499  {
2500  	misc_deregister(&fuse_miscdevice);
2501  	kmem_cache_destroy(fuse_req_cachep);
2502  }
2503