1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * File operations used by nfsd. Some of these have been ripped from
4   * other parts of the kernel because they weren't exported, others
5   * are partial duplicates with added or changed functionality.
6   *
7   * Note that several functions dget() the dentry upon which they want
8   * to act, most notably those that create directory entries. Response
9   * dentry's are dput()'d if necessary in the release callback.
10   * So if you notice code paths that apparently fail to dput() the
11   * dentry, don't worry--they have been taken care of.
12   *
13   * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
14   * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
15   */
16  
17  #include <linux/fs.h>
18  #include <linux/file.h>
19  #include <linux/splice.h>
20  #include <linux/falloc.h>
21  #include <linux/fcntl.h>
22  #include <linux/namei.h>
23  #include <linux/delay.h>
24  #include <linux/fsnotify.h>
25  #include <linux/posix_acl_xattr.h>
26  #include <linux/xattr.h>
27  #include <linux/jhash.h>
28  #include <linux/pagemap.h>
29  #include <linux/slab.h>
30  #include <linux/uaccess.h>
31  #include <linux/exportfs.h>
32  #include <linux/writeback.h>
33  #include <linux/security.h>
34  
35  #include "xdr3.h"
36  
37  #ifdef CONFIG_NFSD_V4
38  #include "../internal.h"
39  #include "acl.h"
40  #include "idmap.h"
41  #include "xdr4.h"
42  #endif /* CONFIG_NFSD_V4 */
43  
44  #include "nfsd.h"
45  #include "vfs.h"
46  #include "filecache.h"
47  #include "trace.h"
48  
49  #define NFSDDBG_FACILITY		NFSDDBG_FILEOP
50  
51  /**
52   * nfserrno - Map Linux errnos to NFS errnos
53   * @errno: POSIX(-ish) error code to be mapped
54   *
55   * Returns the appropriate (net-endian) nfserr_* (or nfs_ok if errno is 0). If
56   * it's an error we don't expect, log it once and return nfserr_io.
57   */
58  __be32
nfserrno(int errno)59  nfserrno (int errno)
60  {
61  	static struct {
62  		__be32	nfserr;
63  		int	syserr;
64  	} nfs_errtbl[] = {
65  		{ nfs_ok, 0 },
66  		{ nfserr_perm, -EPERM },
67  		{ nfserr_noent, -ENOENT },
68  		{ nfserr_io, -EIO },
69  		{ nfserr_nxio, -ENXIO },
70  		{ nfserr_fbig, -E2BIG },
71  		{ nfserr_stale, -EBADF },
72  		{ nfserr_acces, -EACCES },
73  		{ nfserr_exist, -EEXIST },
74  		{ nfserr_xdev, -EXDEV },
75  		{ nfserr_mlink, -EMLINK },
76  		{ nfserr_nodev, -ENODEV },
77  		{ nfserr_notdir, -ENOTDIR },
78  		{ nfserr_isdir, -EISDIR },
79  		{ nfserr_inval, -EINVAL },
80  		{ nfserr_fbig, -EFBIG },
81  		{ nfserr_nospc, -ENOSPC },
82  		{ nfserr_rofs, -EROFS },
83  		{ nfserr_mlink, -EMLINK },
84  		{ nfserr_nametoolong, -ENAMETOOLONG },
85  		{ nfserr_notempty, -ENOTEMPTY },
86  		{ nfserr_dquot, -EDQUOT },
87  		{ nfserr_stale, -ESTALE },
88  		{ nfserr_jukebox, -ETIMEDOUT },
89  		{ nfserr_jukebox, -ERESTARTSYS },
90  		{ nfserr_jukebox, -EAGAIN },
91  		{ nfserr_jukebox, -EWOULDBLOCK },
92  		{ nfserr_jukebox, -ENOMEM },
93  		{ nfserr_io, -ETXTBSY },
94  		{ nfserr_notsupp, -EOPNOTSUPP },
95  		{ nfserr_toosmall, -ETOOSMALL },
96  		{ nfserr_serverfault, -ESERVERFAULT },
97  		{ nfserr_serverfault, -ENFILE },
98  		{ nfserr_io, -EREMOTEIO },
99  		{ nfserr_stale, -EOPENSTALE },
100  		{ nfserr_io, -EUCLEAN },
101  		{ nfserr_perm, -ENOKEY },
102  		{ nfserr_no_grace, -ENOGRACE},
103  		{ nfserr_io, -EBADMSG },
104  	};
105  	int	i;
106  
107  	for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
108  		if (nfs_errtbl[i].syserr == errno)
109  			return nfs_errtbl[i].nfserr;
110  	}
111  	WARN_ONCE(1, "nfsd: non-standard errno: %d\n", errno);
112  	return nfserr_io;
113  }
114  
115  /*
116   * Called from nfsd_lookup and encode_dirent. Check if we have crossed
117   * a mount point.
118   * Returns -EAGAIN or -ETIMEDOUT leaving *dpp and *expp unchanged,
119   *  or nfs_ok having possibly changed *dpp and *expp
120   */
121  int
nfsd_cross_mnt(struct svc_rqst * rqstp,struct dentry ** dpp,struct svc_export ** expp)122  nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
123  		        struct svc_export **expp)
124  {
125  	struct svc_export *exp = *expp, *exp2 = NULL;
126  	struct dentry *dentry = *dpp;
127  	struct path path = {.mnt = mntget(exp->ex_path.mnt),
128  			    .dentry = dget(dentry)};
129  	unsigned int follow_flags = 0;
130  	int err = 0;
131  
132  	if (exp->ex_flags & NFSEXP_CROSSMOUNT)
133  		follow_flags = LOOKUP_AUTOMOUNT;
134  
135  	err = follow_down(&path, follow_flags);
136  	if (err < 0)
137  		goto out;
138  	if (path.mnt == exp->ex_path.mnt && path.dentry == dentry &&
139  	    nfsd_mountpoint(dentry, exp) == 2) {
140  		/* This is only a mountpoint in some other namespace */
141  		path_put(&path);
142  		goto out;
143  	}
144  
145  	exp2 = rqst_exp_get_by_name(rqstp, &path);
146  	if (IS_ERR(exp2)) {
147  		err = PTR_ERR(exp2);
148  		/*
149  		 * We normally allow NFS clients to continue
150  		 * "underneath" a mountpoint that is not exported.
151  		 * The exception is V4ROOT, where no traversal is ever
152  		 * allowed without an explicit export of the new
153  		 * directory.
154  		 */
155  		if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
156  			err = 0;
157  		path_put(&path);
158  		goto out;
159  	}
160  	if (nfsd_v4client(rqstp) ||
161  		(exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
162  		/* successfully crossed mount point */
163  		/*
164  		 * This is subtle: path.dentry is *not* on path.mnt
165  		 * at this point.  The only reason we are safe is that
166  		 * original mnt is pinned down by exp, so we should
167  		 * put path *before* putting exp
168  		 */
169  		*dpp = path.dentry;
170  		path.dentry = dentry;
171  		*expp = exp2;
172  		exp2 = exp;
173  	}
174  	path_put(&path);
175  	exp_put(exp2);
176  out:
177  	return err;
178  }
179  
follow_to_parent(struct path * path)180  static void follow_to_parent(struct path *path)
181  {
182  	struct dentry *dp;
183  
184  	while (path->dentry == path->mnt->mnt_root && follow_up(path))
185  		;
186  	dp = dget_parent(path->dentry);
187  	dput(path->dentry);
188  	path->dentry = dp;
189  }
190  
nfsd_lookup_parent(struct svc_rqst * rqstp,struct dentry * dparent,struct svc_export ** exp,struct dentry ** dentryp)191  static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
192  {
193  	struct svc_export *exp2;
194  	struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
195  			    .dentry = dget(dparent)};
196  
197  	follow_to_parent(&path);
198  
199  	exp2 = rqst_exp_parent(rqstp, &path);
200  	if (PTR_ERR(exp2) == -ENOENT) {
201  		*dentryp = dget(dparent);
202  	} else if (IS_ERR(exp2)) {
203  		path_put(&path);
204  		return PTR_ERR(exp2);
205  	} else {
206  		*dentryp = dget(path.dentry);
207  		exp_put(*exp);
208  		*exp = exp2;
209  	}
210  	path_put(&path);
211  	return 0;
212  }
213  
214  /*
215   * For nfsd purposes, we treat V4ROOT exports as though there was an
216   * export at *every* directory.
217   * We return:
218   * '1' if this dentry *must* be an export point,
219   * '2' if it might be, if there is really a mount here, and
220   * '0' if there is no chance of an export point here.
221   */
nfsd_mountpoint(struct dentry * dentry,struct svc_export * exp)222  int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
223  {
224  	if (!d_inode(dentry))
225  		return 0;
226  	if (exp->ex_flags & NFSEXP_V4ROOT)
227  		return 1;
228  	if (nfsd4_is_junction(dentry))
229  		return 1;
230  	if (d_managed(dentry))
231  		/*
232  		 * Might only be a mountpoint in a different namespace,
233  		 * but we need to check.
234  		 */
235  		return 2;
236  	return 0;
237  }
238  
239  __be32
nfsd_lookup_dentry(struct svc_rqst * rqstp,struct svc_fh * fhp,const char * name,unsigned int len,struct svc_export ** exp_ret,struct dentry ** dentry_ret)240  nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
241  		   const char *name, unsigned int len,
242  		   struct svc_export **exp_ret, struct dentry **dentry_ret)
243  {
244  	struct svc_export	*exp;
245  	struct dentry		*dparent;
246  	struct dentry		*dentry;
247  	int			host_err;
248  
249  	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
250  
251  	dparent = fhp->fh_dentry;
252  	exp = exp_get(fhp->fh_export);
253  
254  	/* Lookup the name, but don't follow links */
255  	if (isdotent(name, len)) {
256  		if (len==1)
257  			dentry = dget(dparent);
258  		else if (dparent != exp->ex_path.dentry)
259  			dentry = dget_parent(dparent);
260  		else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
261  			dentry = dget(dparent); /* .. == . just like at / */
262  		else {
263  			/* checking mountpoint crossing is very different when stepping up */
264  			host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
265  			if (host_err)
266  				goto out_nfserr;
267  		}
268  	} else {
269  		dentry = lookup_one_len_unlocked(name, dparent, len);
270  		host_err = PTR_ERR(dentry);
271  		if (IS_ERR(dentry))
272  			goto out_nfserr;
273  		if (nfsd_mountpoint(dentry, exp)) {
274  			host_err = nfsd_cross_mnt(rqstp, &dentry, &exp);
275  			if (host_err) {
276  				dput(dentry);
277  				goto out_nfserr;
278  			}
279  		}
280  	}
281  	*dentry_ret = dentry;
282  	*exp_ret = exp;
283  	return 0;
284  
285  out_nfserr:
286  	exp_put(exp);
287  	return nfserrno(host_err);
288  }
289  
290  /**
291   * nfsd_lookup - look up a single path component for nfsd
292   *
293   * @rqstp:   the request context
294   * @fhp:     the file handle of the directory
295   * @name:    the component name, or %NULL to look up parent
296   * @len:     length of name to examine
297   * @resfh:   pointer to pre-initialised filehandle to hold result.
298   *
299   * Look up one component of a pathname.
300   * N.B. After this call _both_ fhp and resfh need an fh_put
301   *
302   * If the lookup would cross a mountpoint, and the mounted filesystem
303   * is exported to the client with NFSEXP_NOHIDE, then the lookup is
304   * accepted as it stands and the mounted directory is
305   * returned. Otherwise the covered directory is returned.
306   * NOTE: this mountpoint crossing is not supported properly by all
307   *   clients and is explicitly disallowed for NFSv3
308   *
309   */
310  __be32
nfsd_lookup(struct svc_rqst * rqstp,struct svc_fh * fhp,const char * name,unsigned int len,struct svc_fh * resfh)311  nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
312  	    unsigned int len, struct svc_fh *resfh)
313  {
314  	struct svc_export	*exp;
315  	struct dentry		*dentry;
316  	__be32 err;
317  
318  	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
319  	if (err)
320  		return err;
321  	err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
322  	if (err)
323  		return err;
324  	err = check_nfsd_access(exp, rqstp);
325  	if (err)
326  		goto out;
327  	/*
328  	 * Note: we compose the file handle now, but as the
329  	 * dentry may be negative, it may need to be updated.
330  	 */
331  	err = fh_compose(resfh, exp, dentry, fhp);
332  	if (!err && d_really_is_negative(dentry))
333  		err = nfserr_noent;
334  out:
335  	dput(dentry);
336  	exp_put(exp);
337  	return err;
338  }
339  
340  static void
commit_reset_write_verifier(struct nfsd_net * nn,struct svc_rqst * rqstp,int err)341  commit_reset_write_verifier(struct nfsd_net *nn, struct svc_rqst *rqstp,
342  			    int err)
343  {
344  	switch (err) {
345  	case -EAGAIN:
346  	case -ESTALE:
347  		/*
348  		 * Neither of these are the result of a problem with
349  		 * durable storage, so avoid a write verifier reset.
350  		 */
351  		break;
352  	default:
353  		nfsd_reset_write_verifier(nn);
354  		trace_nfsd_writeverf_reset(nn, rqstp, err);
355  	}
356  }
357  
358  /*
359   * Commit metadata changes to stable storage.
360   */
361  static int
commit_inode_metadata(struct inode * inode)362  commit_inode_metadata(struct inode *inode)
363  {
364  	const struct export_operations *export_ops = inode->i_sb->s_export_op;
365  
366  	if (export_ops->commit_metadata)
367  		return export_ops->commit_metadata(inode);
368  	return sync_inode_metadata(inode, 1);
369  }
370  
371  static int
commit_metadata(struct svc_fh * fhp)372  commit_metadata(struct svc_fh *fhp)
373  {
374  	struct inode *inode = d_inode(fhp->fh_dentry);
375  
376  	if (!EX_ISSYNC(fhp->fh_export))
377  		return 0;
378  	return commit_inode_metadata(inode);
379  }
380  
381  /*
382   * Go over the attributes and take care of the small differences between
383   * NFS semantics and what Linux expects.
384   */
385  static void
nfsd_sanitize_attrs(struct inode * inode,struct iattr * iap)386  nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
387  {
388  	/* Ignore mode updates on symlinks */
389  	if (S_ISLNK(inode->i_mode))
390  		iap->ia_valid &= ~ATTR_MODE;
391  
392  	/* sanitize the mode change */
393  	if (iap->ia_valid & ATTR_MODE) {
394  		iap->ia_mode &= S_IALLUGO;
395  		iap->ia_mode |= (inode->i_mode & ~S_IALLUGO);
396  	}
397  
398  	/* Revoke setuid/setgid on chown */
399  	if (!S_ISDIR(inode->i_mode) &&
400  	    ((iap->ia_valid & ATTR_UID) || (iap->ia_valid & ATTR_GID))) {
401  		iap->ia_valid |= ATTR_KILL_PRIV;
402  		if (iap->ia_valid & ATTR_MODE) {
403  			/* we're setting mode too, just clear the s*id bits */
404  			iap->ia_mode &= ~S_ISUID;
405  			if (iap->ia_mode & S_IXGRP)
406  				iap->ia_mode &= ~S_ISGID;
407  		} else {
408  			/* set ATTR_KILL_* bits and let VFS handle it */
409  			iap->ia_valid |= ATTR_KILL_SUID;
410  			iap->ia_valid |=
411  				setattr_should_drop_sgid(&nop_mnt_idmap, inode);
412  		}
413  	}
414  }
415  
416  static __be32
nfsd_get_write_access(struct svc_rqst * rqstp,struct svc_fh * fhp,struct iattr * iap)417  nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
418  		struct iattr *iap)
419  {
420  	struct inode *inode = d_inode(fhp->fh_dentry);
421  
422  	if (iap->ia_size < inode->i_size) {
423  		__be32 err;
424  
425  		err = nfsd_permission(&rqstp->rq_cred,
426  				      fhp->fh_export, fhp->fh_dentry,
427  				      NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
428  		if (err)
429  			return err;
430  	}
431  	return nfserrno(get_write_access(inode));
432  }
433  
__nfsd_setattr(struct dentry * dentry,struct iattr * iap)434  static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
435  {
436  	int host_err;
437  
438  	if (iap->ia_valid & ATTR_SIZE) {
439  		/*
440  		 * RFC5661, Section 18.30.4:
441  		 *   Changing the size of a file with SETATTR indirectly
442  		 *   changes the time_modify and change attributes.
443  		 *
444  		 * (and similar for the older RFCs)
445  		 */
446  		struct iattr size_attr = {
447  			.ia_valid	= ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
448  			.ia_size	= iap->ia_size,
449  		};
450  
451  		if (iap->ia_size < 0)
452  			return -EFBIG;
453  
454  		host_err = notify_change(&nop_mnt_idmap, dentry, &size_attr, NULL);
455  		if (host_err)
456  			return host_err;
457  		iap->ia_valid &= ~ATTR_SIZE;
458  
459  		/*
460  		 * Avoid the additional setattr call below if the only other
461  		 * attribute that the client sends is the mtime, as we update
462  		 * it as part of the size change above.
463  		 */
464  		if ((iap->ia_valid & ~ATTR_MTIME) == 0)
465  			return 0;
466  	}
467  
468  	if (!iap->ia_valid)
469  		return 0;
470  
471  	iap->ia_valid |= ATTR_CTIME;
472  	return notify_change(&nop_mnt_idmap, dentry, iap, NULL);
473  }
474  
475  /**
476   * nfsd_setattr - Set various file attributes.
477   * @rqstp: controlling RPC transaction
478   * @fhp: filehandle of target
479   * @attr: attributes to set
480   * @guardtime: do not act if ctime.tv_sec does not match this timestamp
481   *
482   * This call may adjust the contents of @attr (in particular, this
483   * call may change the bits in the na_iattr.ia_valid field).
484   *
485   * Returns nfs_ok on success, otherwise an NFS status code is
486   * returned. Caller must release @fhp by calling fh_put in either
487   * case.
488   */
489  __be32
nfsd_setattr(struct svc_rqst * rqstp,struct svc_fh * fhp,struct nfsd_attrs * attr,const struct timespec64 * guardtime)490  nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
491  	     struct nfsd_attrs *attr, const struct timespec64 *guardtime)
492  {
493  	struct dentry	*dentry;
494  	struct inode	*inode;
495  	struct iattr	*iap = attr->na_iattr;
496  	int		accmode = NFSD_MAY_SATTR;
497  	umode_t		ftype = 0;
498  	__be32		err;
499  	int		host_err = 0;
500  	bool		get_write_count;
501  	bool		size_change = (iap->ia_valid & ATTR_SIZE);
502  	int		retries;
503  
504  	if (iap->ia_valid & ATTR_SIZE) {
505  		accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
506  		ftype = S_IFREG;
507  	}
508  
509  	/*
510  	 * If utimes(2) and friends are called with times not NULL, we should
511  	 * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
512  	 * will return EACCES, when the caller's effective UID does not match
513  	 * the owner of the file, and the caller is not privileged. In this
514  	 * situation, we should return EPERM(notify_change will return this).
515  	 */
516  	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) {
517  		accmode |= NFSD_MAY_OWNER_OVERRIDE;
518  		if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)))
519  			accmode |= NFSD_MAY_WRITE;
520  	}
521  
522  	/* Callers that do fh_verify should do the fh_want_write: */
523  	get_write_count = !fhp->fh_dentry;
524  
525  	/* Get inode */
526  	err = fh_verify(rqstp, fhp, ftype, accmode);
527  	if (err)
528  		return err;
529  	if (get_write_count) {
530  		host_err = fh_want_write(fhp);
531  		if (host_err)
532  			goto out;
533  	}
534  
535  	dentry = fhp->fh_dentry;
536  	inode = d_inode(dentry);
537  
538  	nfsd_sanitize_attrs(inode, iap);
539  
540  	/*
541  	 * The size case is special, it changes the file in addition to the
542  	 * attributes, and file systems don't expect it to be mixed with
543  	 * "random" attribute changes.  We thus split out the size change
544  	 * into a separate call to ->setattr, and do the rest as a separate
545  	 * setattr call.
546  	 */
547  	if (size_change) {
548  		err = nfsd_get_write_access(rqstp, fhp, iap);
549  		if (err)
550  			return err;
551  	}
552  
553  	inode_lock(inode);
554  	err = fh_fill_pre_attrs(fhp);
555  	if (err)
556  		goto out_unlock;
557  
558  	if (guardtime) {
559  		struct timespec64 ctime = inode_get_ctime(inode);
560  		if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec ||
561  		    guardtime->tv_nsec != ctime.tv_nsec) {
562  			err = nfserr_notsync;
563  			goto out_fill_attrs;
564  		}
565  	}
566  
567  	for (retries = 1;;) {
568  		struct iattr attrs;
569  
570  		/*
571  		 * notify_change() can alter its iattr argument, making
572  		 * @iap unsuitable for submission multiple times. Make a
573  		 * copy for every loop iteration.
574  		 */
575  		attrs = *iap;
576  		host_err = __nfsd_setattr(dentry, &attrs);
577  		if (host_err != -EAGAIN || !retries--)
578  			break;
579  		if (!nfsd_wait_for_delegreturn(rqstp, inode))
580  			break;
581  	}
582  	if (attr->na_seclabel && attr->na_seclabel->len)
583  		attr->na_labelerr = security_inode_setsecctx(dentry,
584  			attr->na_seclabel->data, attr->na_seclabel->len);
585  	if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && attr->na_pacl)
586  		attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
587  						dentry, ACL_TYPE_ACCESS,
588  						attr->na_pacl);
589  	if (IS_ENABLED(CONFIG_FS_POSIX_ACL) &&
590  	    !attr->na_aclerr && attr->na_dpacl && S_ISDIR(inode->i_mode))
591  		attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
592  						dentry, ACL_TYPE_DEFAULT,
593  						attr->na_dpacl);
594  out_fill_attrs:
595  	/*
596  	 * RFC 1813 Section 3.3.2 does not mandate that an NFS server
597  	 * returns wcc_data for SETATTR. Some client implementations
598  	 * depend on receiving wcc_data, however, to sort out partial
599  	 * updates (eg., the client requested that size and mode be
600  	 * modified, but the server changed only the file mode).
601  	 */
602  	fh_fill_post_attrs(fhp);
603  out_unlock:
604  	inode_unlock(inode);
605  	if (size_change)
606  		put_write_access(inode);
607  out:
608  	if (!host_err)
609  		host_err = commit_metadata(fhp);
610  	return err != 0 ? err : nfserrno(host_err);
611  }
612  
613  #if defined(CONFIG_NFSD_V4)
614  /*
615   * NFS junction information is stored in an extended attribute.
616   */
617  #define NFSD_JUNCTION_XATTR_NAME	XATTR_TRUSTED_PREFIX "junction.nfs"
618  
619  /**
620   * nfsd4_is_junction - Test if an object could be an NFS junction
621   *
622   * @dentry: object to test
623   *
624   * Returns 1 if "dentry" appears to contain NFS junction information.
625   * Otherwise 0 is returned.
626   */
nfsd4_is_junction(struct dentry * dentry)627  int nfsd4_is_junction(struct dentry *dentry)
628  {
629  	struct inode *inode = d_inode(dentry);
630  
631  	if (inode == NULL)
632  		return 0;
633  	if (inode->i_mode & S_IXUGO)
634  		return 0;
635  	if (!(inode->i_mode & S_ISVTX))
636  		return 0;
637  	if (vfs_getxattr(&nop_mnt_idmap, dentry, NFSD_JUNCTION_XATTR_NAME,
638  			 NULL, 0) <= 0)
639  		return 0;
640  	return 1;
641  }
642  
nfsd4_get_cstate(struct svc_rqst * rqstp)643  static struct nfsd4_compound_state *nfsd4_get_cstate(struct svc_rqst *rqstp)
644  {
645  	return &((struct nfsd4_compoundres *)rqstp->rq_resp)->cstate;
646  }
647  
nfsd4_clone_file_range(struct svc_rqst * rqstp,struct nfsd_file * nf_src,u64 src_pos,struct nfsd_file * nf_dst,u64 dst_pos,u64 count,bool sync)648  __be32 nfsd4_clone_file_range(struct svc_rqst *rqstp,
649  		struct nfsd_file *nf_src, u64 src_pos,
650  		struct nfsd_file *nf_dst, u64 dst_pos,
651  		u64 count, bool sync)
652  {
653  	struct file *src = nf_src->nf_file;
654  	struct file *dst = nf_dst->nf_file;
655  	errseq_t since;
656  	loff_t cloned;
657  	__be32 ret = 0;
658  
659  	since = READ_ONCE(dst->f_wb_err);
660  	cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
661  	if (cloned < 0) {
662  		ret = nfserrno(cloned);
663  		goto out_err;
664  	}
665  	if (count && cloned != count) {
666  		ret = nfserrno(-EINVAL);
667  		goto out_err;
668  	}
669  	if (sync) {
670  		loff_t dst_end = count ? dst_pos + count - 1 : LLONG_MAX;
671  		int status = vfs_fsync_range(dst, dst_pos, dst_end, 0);
672  
673  		if (!status)
674  			status = filemap_check_wb_err(dst->f_mapping, since);
675  		if (!status)
676  			status = commit_inode_metadata(file_inode(src));
677  		if (status < 0) {
678  			struct nfsd_net *nn = net_generic(nf_dst->nf_net,
679  							  nfsd_net_id);
680  
681  			trace_nfsd_clone_file_range_err(rqstp,
682  					&nfsd4_get_cstate(rqstp)->save_fh,
683  					src_pos,
684  					&nfsd4_get_cstate(rqstp)->current_fh,
685  					dst_pos,
686  					count, status);
687  			commit_reset_write_verifier(nn, rqstp, status);
688  			ret = nfserrno(status);
689  		}
690  	}
691  out_err:
692  	return ret;
693  }
694  
nfsd_copy_file_range(struct file * src,u64 src_pos,struct file * dst,u64 dst_pos,u64 count)695  ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
696  			     u64 dst_pos, u64 count)
697  {
698  	ssize_t ret;
699  
700  	/*
701  	 * Limit copy to 4MB to prevent indefinitely blocking an nfsd
702  	 * thread and client rpc slot.  The choice of 4MB is somewhat
703  	 * arbitrary.  We might instead base this on r/wsize, or make it
704  	 * tunable, or use a time instead of a byte limit, or implement
705  	 * asynchronous copy.  In theory a client could also recognize a
706  	 * limit like this and pipeline multiple COPY requests.
707  	 */
708  	count = min_t(u64, count, 1 << 22);
709  	ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
710  
711  	if (ret == -EOPNOTSUPP || ret == -EXDEV)
712  		ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count,
713  					  COPY_FILE_SPLICE);
714  	return ret;
715  }
716  
nfsd4_vfs_fallocate(struct svc_rqst * rqstp,struct svc_fh * fhp,struct file * file,loff_t offset,loff_t len,int flags)717  __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
718  			   struct file *file, loff_t offset, loff_t len,
719  			   int flags)
720  {
721  	int error;
722  
723  	if (!S_ISREG(file_inode(file)->i_mode))
724  		return nfserr_inval;
725  
726  	error = vfs_fallocate(file, flags, offset, len);
727  	if (!error)
728  		error = commit_metadata(fhp);
729  
730  	return nfserrno(error);
731  }
732  #endif /* defined(CONFIG_NFSD_V4) */
733  
734  /*
735   * Check server access rights to a file system object
736   */
737  struct accessmap {
738  	u32		access;
739  	int		how;
740  };
741  static struct accessmap	nfs3_regaccess[] = {
742      {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
743      {	NFS3_ACCESS_EXECUTE,	NFSD_MAY_EXEC			},
744      {	NFS3_ACCESS_MODIFY,	NFSD_MAY_WRITE|NFSD_MAY_TRUNC	},
745      {	NFS3_ACCESS_EXTEND,	NFSD_MAY_WRITE			},
746  
747  #ifdef CONFIG_NFSD_V4
748      {	NFS4_ACCESS_XAREAD,	NFSD_MAY_READ			},
749      {	NFS4_ACCESS_XAWRITE,	NFSD_MAY_WRITE			},
750      {	NFS4_ACCESS_XALIST,	NFSD_MAY_READ			},
751  #endif
752  
753      {	0,			0				}
754  };
755  
756  static struct accessmap	nfs3_diraccess[] = {
757      {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
758      {	NFS3_ACCESS_LOOKUP,	NFSD_MAY_EXEC			},
759      {	NFS3_ACCESS_MODIFY,	NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
760      {	NFS3_ACCESS_EXTEND,	NFSD_MAY_EXEC|NFSD_MAY_WRITE	},
761      {	NFS3_ACCESS_DELETE,	NFSD_MAY_REMOVE			},
762  
763  #ifdef CONFIG_NFSD_V4
764      {	NFS4_ACCESS_XAREAD,	NFSD_MAY_READ			},
765      {	NFS4_ACCESS_XAWRITE,	NFSD_MAY_WRITE			},
766      {	NFS4_ACCESS_XALIST,	NFSD_MAY_READ			},
767  #endif
768  
769      {	0,			0				}
770  };
771  
772  static struct accessmap	nfs3_anyaccess[] = {
773  	/* Some clients - Solaris 2.6 at least, make an access call
774  	 * to the server to check for access for things like /dev/null
775  	 * (which really, the server doesn't care about).  So
776  	 * We provide simple access checking for them, looking
777  	 * mainly at mode bits, and we make sure to ignore read-only
778  	 * filesystem checks
779  	 */
780      {	NFS3_ACCESS_READ,	NFSD_MAY_READ			},
781      {	NFS3_ACCESS_EXECUTE,	NFSD_MAY_EXEC			},
782      {	NFS3_ACCESS_MODIFY,	NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS	},
783      {	NFS3_ACCESS_EXTEND,	NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS	},
784  
785      {	0,			0				}
786  };
787  
788  __be32
nfsd_access(struct svc_rqst * rqstp,struct svc_fh * fhp,u32 * access,u32 * supported)789  nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
790  {
791  	struct accessmap	*map;
792  	struct svc_export	*export;
793  	struct dentry		*dentry;
794  	u32			query, result = 0, sresult = 0;
795  	__be32			error;
796  
797  	error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
798  	if (error)
799  		goto out;
800  
801  	export = fhp->fh_export;
802  	dentry = fhp->fh_dentry;
803  
804  	if (d_is_reg(dentry))
805  		map = nfs3_regaccess;
806  	else if (d_is_dir(dentry))
807  		map = nfs3_diraccess;
808  	else
809  		map = nfs3_anyaccess;
810  
811  
812  	query = *access;
813  	for  (; map->access; map++) {
814  		if (map->access & query) {
815  			__be32 err2;
816  
817  			sresult |= map->access;
818  
819  			err2 = nfsd_permission(&rqstp->rq_cred, export,
820  					       dentry, map->how);
821  			switch (err2) {
822  			case nfs_ok:
823  				result |= map->access;
824  				break;
825  
826  			/* the following error codes just mean the access was not allowed,
827  			 * rather than an error occurred */
828  			case nfserr_rofs:
829  			case nfserr_acces:
830  			case nfserr_perm:
831  				/* simply don't "or" in the access bit. */
832  				break;
833  			default:
834  				error = err2;
835  				goto out;
836  			}
837  		}
838  	}
839  	*access = result;
840  	if (supported)
841  		*supported = sresult;
842  
843   out:
844  	return error;
845  }
846  
nfsd_open_break_lease(struct inode * inode,int access)847  int nfsd_open_break_lease(struct inode *inode, int access)
848  {
849  	unsigned int mode;
850  
851  	if (access & NFSD_MAY_NOT_BREAK_LEASE)
852  		return 0;
853  	mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
854  	return break_lease(inode, mode | O_NONBLOCK);
855  }
856  
857  /*
858   * Open an existing file or directory.
859   * The may_flags argument indicates the type of open (read/write/lock)
860   * and additional flags.
861   * N.B. After this call fhp needs an fh_put
862   */
863  static int
__nfsd_open(struct svc_rqst * rqstp,struct svc_fh * fhp,umode_t type,int may_flags,struct file ** filp)864  __nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
865  			int may_flags, struct file **filp)
866  {
867  	struct path	path;
868  	struct inode	*inode;
869  	struct file	*file;
870  	int		flags = O_RDONLY|O_LARGEFILE;
871  	int		host_err = -EPERM;
872  
873  	path.mnt = fhp->fh_export->ex_path.mnt;
874  	path.dentry = fhp->fh_dentry;
875  	inode = d_inode(path.dentry);
876  
877  	if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
878  		goto out;
879  
880  	if (!inode->i_fop)
881  		goto out;
882  
883  	host_err = nfsd_open_break_lease(inode, may_flags);
884  	if (host_err) /* NOMEM or WOULDBLOCK */
885  		goto out;
886  
887  	if (may_flags & NFSD_MAY_WRITE) {
888  		if (may_flags & NFSD_MAY_READ)
889  			flags = O_RDWR|O_LARGEFILE;
890  		else
891  			flags = O_WRONLY|O_LARGEFILE;
892  	}
893  
894  	file = dentry_open(&path, flags, current_cred());
895  	if (IS_ERR(file)) {
896  		host_err = PTR_ERR(file);
897  		goto out;
898  	}
899  
900  	host_err = security_file_post_open(file, may_flags);
901  	if (host_err) {
902  		fput(file);
903  		goto out;
904  	}
905  
906  	*filp = file;
907  out:
908  	return host_err;
909  }
910  
911  __be32
nfsd_open(struct svc_rqst * rqstp,struct svc_fh * fhp,umode_t type,int may_flags,struct file ** filp)912  nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
913  		int may_flags, struct file **filp)
914  {
915  	__be32 err;
916  	int host_err;
917  	bool retried = false;
918  
919  	/*
920  	 * If we get here, then the client has already done an "open",
921  	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
922  	 * in case a chmod has now revoked permission.
923  	 *
924  	 * Arguably we should also allow the owner override for
925  	 * directories, but we never have and it doesn't seem to have
926  	 * caused anyone a problem.  If we were to change this, note
927  	 * also that our filldir callbacks would need a variant of
928  	 * lookup_one_len that doesn't check permissions.
929  	 */
930  	if (type == S_IFREG)
931  		may_flags |= NFSD_MAY_OWNER_OVERRIDE;
932  retry:
933  	err = fh_verify(rqstp, fhp, type, may_flags);
934  	if (!err) {
935  		host_err = __nfsd_open(rqstp, fhp, type, may_flags, filp);
936  		if (host_err == -EOPENSTALE && !retried) {
937  			retried = true;
938  			fh_put(fhp);
939  			goto retry;
940  		}
941  		err = nfserrno(host_err);
942  	}
943  	return err;
944  }
945  
946  /**
947   * nfsd_open_verified - Open a regular file for the filecache
948   * @rqstp: RPC request
949   * @fhp: NFS filehandle of the file to open
950   * @may_flags: internal permission flags
951   * @filp: OUT: open "struct file *"
952   *
953   * Returns zero on success, or a negative errno value.
954   */
955  int
nfsd_open_verified(struct svc_rqst * rqstp,struct svc_fh * fhp,int may_flags,struct file ** filp)956  nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
957  		   struct file **filp)
958  {
959  	return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp);
960  }
961  
962  /*
963   * Grab and keep cached pages associated with a file in the svc_rqst
964   * so that they can be passed to the network sendmsg routines
965   * directly. They will be released after the sending has completed.
966   *
967   * Return values: Number of bytes consumed, or -EIO if there are no
968   * remaining pages in rqstp->rq_pages.
969   */
970  static int
nfsd_splice_actor(struct pipe_inode_info * pipe,struct pipe_buffer * buf,struct splice_desc * sd)971  nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
972  		  struct splice_desc *sd)
973  {
974  	struct svc_rqst *rqstp = sd->u.data;
975  	struct page *page = buf->page;	// may be a compound one
976  	unsigned offset = buf->offset;
977  	struct page *last_page;
978  
979  	last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
980  	for (page += offset / PAGE_SIZE; page <= last_page; page++) {
981  		/*
982  		 * Skip page replacement when extending the contents of the
983  		 * current page.  But note that we may get two zero_pages in a
984  		 * row from shmem.
985  		 */
986  		if (page == *(rqstp->rq_next_page - 1) &&
987  		    offset_in_page(rqstp->rq_res.page_base +
988  				   rqstp->rq_res.page_len))
989  			continue;
990  		if (unlikely(!svc_rqst_replace_page(rqstp, page)))
991  			return -EIO;
992  	}
993  	if (rqstp->rq_res.page_len == 0)	// first call
994  		rqstp->rq_res.page_base = offset % PAGE_SIZE;
995  	rqstp->rq_res.page_len += sd->len;
996  	return sd->len;
997  }
998  
nfsd_direct_splice_actor(struct pipe_inode_info * pipe,struct splice_desc * sd)999  static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
1000  				    struct splice_desc *sd)
1001  {
1002  	return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
1003  }
1004  
nfsd_eof_on_read(struct file * file,loff_t offset,ssize_t len,size_t expected)1005  static u32 nfsd_eof_on_read(struct file *file, loff_t offset, ssize_t len,
1006  		size_t expected)
1007  {
1008  	if (expected != 0 && len == 0)
1009  		return 1;
1010  	if (offset+len >= i_size_read(file_inode(file)))
1011  		return 1;
1012  	return 0;
1013  }
1014  
nfsd_finish_read(struct svc_rqst * rqstp,struct svc_fh * fhp,struct file * file,loff_t offset,unsigned long * count,u32 * eof,ssize_t host_err)1015  static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1016  			       struct file *file, loff_t offset,
1017  			       unsigned long *count, u32 *eof, ssize_t host_err)
1018  {
1019  	if (host_err >= 0) {
1020  		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
1021  
1022  		nfsd_stats_io_read_add(nn, fhp->fh_export, host_err);
1023  		*eof = nfsd_eof_on_read(file, offset, host_err, *count);
1024  		*count = host_err;
1025  		fsnotify_access(file);
1026  		trace_nfsd_read_io_done(rqstp, fhp, offset, *count);
1027  		return 0;
1028  	} else {
1029  		trace_nfsd_read_err(rqstp, fhp, offset, host_err);
1030  		return nfserrno(host_err);
1031  	}
1032  }
1033  
1034  /**
1035   * nfsd_splice_read - Perform a VFS read using a splice pipe
1036   * @rqstp: RPC transaction context
1037   * @fhp: file handle of file to be read
1038   * @file: opened struct file of file to be read
1039   * @offset: starting byte offset
1040   * @count: IN: requested number of bytes; OUT: number of bytes read
1041   * @eof: OUT: set non-zero if operation reached the end of the file
1042   *
1043   * Returns nfs_ok on success, otherwise an nfserr stat value is
1044   * returned.
1045   */
nfsd_splice_read(struct svc_rqst * rqstp,struct svc_fh * fhp,struct file * file,loff_t offset,unsigned long * count,u32 * eof)1046  __be32 nfsd_splice_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1047  			struct file *file, loff_t offset, unsigned long *count,
1048  			u32 *eof)
1049  {
1050  	struct splice_desc sd = {
1051  		.len		= 0,
1052  		.total_len	= *count,
1053  		.pos		= offset,
1054  		.u.data		= rqstp,
1055  	};
1056  	ssize_t host_err;
1057  
1058  	trace_nfsd_read_splice(rqstp, fhp, offset, *count);
1059  	host_err = rw_verify_area(READ, file, &offset, *count);
1060  	if (!host_err)
1061  		host_err = splice_direct_to_actor(file, &sd,
1062  						  nfsd_direct_splice_actor);
1063  	return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
1064  }
1065  
1066  /**
1067   * nfsd_iter_read - Perform a VFS read using an iterator
1068   * @rqstp: RPC transaction context
1069   * @fhp: file handle of file to be read
1070   * @file: opened struct file of file to be read
1071   * @offset: starting byte offset
1072   * @count: IN: requested number of bytes; OUT: number of bytes read
1073   * @base: offset in first page of read buffer
1074   * @eof: OUT: set non-zero if operation reached the end of the file
1075   *
1076   * Some filesystems or situations cannot use nfsd_splice_read. This
1077   * function is the slightly less-performant fallback for those cases.
1078   *
1079   * Returns nfs_ok on success, otherwise an nfserr stat value is
1080   * returned.
1081   */
nfsd_iter_read(struct svc_rqst * rqstp,struct svc_fh * fhp,struct file * file,loff_t offset,unsigned long * count,unsigned int base,u32 * eof)1082  __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1083  		      struct file *file, loff_t offset, unsigned long *count,
1084  		      unsigned int base, u32 *eof)
1085  {
1086  	unsigned long v, total;
1087  	struct iov_iter iter;
1088  	loff_t ppos = offset;
1089  	struct page *page;
1090  	ssize_t host_err;
1091  
1092  	v = 0;
1093  	total = *count;
1094  	while (total) {
1095  		page = *(rqstp->rq_next_page++);
1096  		rqstp->rq_vec[v].iov_base = page_address(page) + base;
1097  		rqstp->rq_vec[v].iov_len = min_t(size_t, total, PAGE_SIZE - base);
1098  		total -= rqstp->rq_vec[v].iov_len;
1099  		++v;
1100  		base = 0;
1101  	}
1102  	WARN_ON_ONCE(v > ARRAY_SIZE(rqstp->rq_vec));
1103  
1104  	trace_nfsd_read_vector(rqstp, fhp, offset, *count);
1105  	iov_iter_kvec(&iter, ITER_DEST, rqstp->rq_vec, v, *count);
1106  	host_err = vfs_iter_read(file, &iter, &ppos, 0);
1107  	return nfsd_finish_read(rqstp, fhp, file, offset, count, eof, host_err);
1108  }
1109  
1110  /*
1111   * Gathered writes: If another process is currently writing to the file,
1112   * there's a high chance this is another nfsd (triggered by a bulk write
1113   * from a client's biod). Rather than syncing the file with each write
1114   * request, we sleep for 10 msec.
1115   *
1116   * I don't know if this roughly approximates C. Juszak's idea of
1117   * gathered writes, but it's a nice and simple solution (IMHO), and it
1118   * seems to work:-)
1119   *
1120   * Note: we do this only in the NFSv2 case, since v3 and higher have a
1121   * better tool (separate unstable writes and commits) for solving this
1122   * problem.
1123   */
wait_for_concurrent_writes(struct file * file)1124  static int wait_for_concurrent_writes(struct file *file)
1125  {
1126  	struct inode *inode = file_inode(file);
1127  	static ino_t last_ino;
1128  	static dev_t last_dev;
1129  	int err = 0;
1130  
1131  	if (atomic_read(&inode->i_writecount) > 1
1132  	    || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
1133  		dprintk("nfsd: write defer %d\n", task_pid_nr(current));
1134  		msleep(10);
1135  		dprintk("nfsd: write resume %d\n", task_pid_nr(current));
1136  	}
1137  
1138  	if (inode->i_state & I_DIRTY) {
1139  		dprintk("nfsd: write sync %d\n", task_pid_nr(current));
1140  		err = vfs_fsync(file, 0);
1141  	}
1142  	last_ino = inode->i_ino;
1143  	last_dev = inode->i_sb->s_dev;
1144  	return err;
1145  }
1146  
1147  __be32
nfsd_vfs_write(struct svc_rqst * rqstp,struct svc_fh * fhp,struct nfsd_file * nf,loff_t offset,struct kvec * vec,int vlen,unsigned long * cnt,int stable,__be32 * verf)1148  nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
1149  				loff_t offset, struct kvec *vec, int vlen,
1150  				unsigned long *cnt, int stable,
1151  				__be32 *verf)
1152  {
1153  	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
1154  	struct file		*file = nf->nf_file;
1155  	struct super_block	*sb = file_inode(file)->i_sb;
1156  	struct svc_export	*exp;
1157  	struct iov_iter		iter;
1158  	errseq_t		since;
1159  	__be32			nfserr;
1160  	int			host_err;
1161  	loff_t			pos = offset;
1162  	unsigned long		exp_op_flags = 0;
1163  	unsigned int		pflags = current->flags;
1164  	rwf_t			flags = 0;
1165  	bool			restore_flags = false;
1166  
1167  	trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
1168  
1169  	if (sb->s_export_op)
1170  		exp_op_flags = sb->s_export_op->flags;
1171  
1172  	if (test_bit(RQ_LOCAL, &rqstp->rq_flags) &&
1173  	    !(exp_op_flags & EXPORT_OP_REMOTE_FS)) {
1174  		/*
1175  		 * We want throttling in balance_dirty_pages()
1176  		 * and shrink_inactive_list() to only consider
1177  		 * the backingdev we are writing to, so that nfs to
1178  		 * localhost doesn't cause nfsd to lock up due to all
1179  		 * the client's dirty pages or its congested queue.
1180  		 */
1181  		current->flags |= PF_LOCAL_THROTTLE;
1182  		restore_flags = true;
1183  	}
1184  
1185  	exp = fhp->fh_export;
1186  
1187  	if (!EX_ISSYNC(exp))
1188  		stable = NFS_UNSTABLE;
1189  
1190  	if (stable && !fhp->fh_use_wgather)
1191  		flags |= RWF_SYNC;
1192  
1193  	iov_iter_kvec(&iter, ITER_SOURCE, vec, vlen, *cnt);
1194  	since = READ_ONCE(file->f_wb_err);
1195  	if (verf)
1196  		nfsd_copy_write_verifier(verf, nn);
1197  	host_err = vfs_iter_write(file, &iter, &pos, flags);
1198  	if (host_err < 0) {
1199  		commit_reset_write_verifier(nn, rqstp, host_err);
1200  		goto out_nfserr;
1201  	}
1202  	*cnt = host_err;
1203  	nfsd_stats_io_write_add(nn, exp, *cnt);
1204  	fsnotify_modify(file);
1205  	host_err = filemap_check_wb_err(file->f_mapping, since);
1206  	if (host_err < 0)
1207  		goto out_nfserr;
1208  
1209  	if (stable && fhp->fh_use_wgather) {
1210  		host_err = wait_for_concurrent_writes(file);
1211  		if (host_err < 0)
1212  			commit_reset_write_verifier(nn, rqstp, host_err);
1213  	}
1214  
1215  out_nfserr:
1216  	if (host_err >= 0) {
1217  		trace_nfsd_write_io_done(rqstp, fhp, offset, *cnt);
1218  		nfserr = nfs_ok;
1219  	} else {
1220  		trace_nfsd_write_err(rqstp, fhp, offset, host_err);
1221  		nfserr = nfserrno(host_err);
1222  	}
1223  	if (restore_flags)
1224  		current_restore_flags(pflags, PF_LOCAL_THROTTLE);
1225  	return nfserr;
1226  }
1227  
1228  /**
1229   * nfsd_read_splice_ok - check if spliced reading is supported
1230   * @rqstp: RPC transaction context
1231   *
1232   * Return values:
1233   *   %true: nfsd_splice_read() may be used
1234   *   %false: nfsd_splice_read() must not be used
1235   *
1236   * NFS READ normally uses splice to send data in-place. However the
1237   * data in cache can change after the reply's MIC is computed but
1238   * before the RPC reply is sent. To prevent the client from
1239   * rejecting the server-computed MIC in this somewhat rare case, do
1240   * not use splice with the GSS integrity and privacy services.
1241   */
nfsd_read_splice_ok(struct svc_rqst * rqstp)1242  bool nfsd_read_splice_ok(struct svc_rqst *rqstp)
1243  {
1244  	switch (svc_auth_flavor(rqstp)) {
1245  	case RPC_AUTH_GSS_KRB5I:
1246  	case RPC_AUTH_GSS_KRB5P:
1247  		return false;
1248  	}
1249  	return true;
1250  }
1251  
1252  /**
1253   * nfsd_read - Read data from a file
1254   * @rqstp: RPC transaction context
1255   * @fhp: file handle of file to be read
1256   * @offset: starting byte offset
1257   * @count: IN: requested number of bytes; OUT: number of bytes read
1258   * @eof: OUT: set non-zero if operation reached the end of the file
1259   *
1260   * The caller must verify that there is enough space in @rqstp.rq_res
1261   * to perform this operation.
1262   *
1263   * N.B. After this call fhp needs an fh_put
1264   *
1265   * Returns nfs_ok on success, otherwise an nfserr stat value is
1266   * returned.
1267   */
nfsd_read(struct svc_rqst * rqstp,struct svc_fh * fhp,loff_t offset,unsigned long * count,u32 * eof)1268  __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
1269  		 loff_t offset, unsigned long *count, u32 *eof)
1270  {
1271  	struct nfsd_file	*nf;
1272  	struct file *file;
1273  	__be32 err;
1274  
1275  	trace_nfsd_read_start(rqstp, fhp, offset, *count);
1276  	err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_READ, &nf);
1277  	if (err)
1278  		return err;
1279  
1280  	file = nf->nf_file;
1281  	if (file->f_op->splice_read && nfsd_read_splice_ok(rqstp))
1282  		err = nfsd_splice_read(rqstp, fhp, file, offset, count, eof);
1283  	else
1284  		err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
1285  
1286  	nfsd_file_put(nf);
1287  	trace_nfsd_read_done(rqstp, fhp, offset, *count);
1288  	return err;
1289  }
1290  
1291  /*
1292   * Write data to a file.
1293   * The stable flag requests synchronous writes.
1294   * N.B. After this call fhp needs an fh_put
1295   */
1296  __be32
nfsd_write(struct svc_rqst * rqstp,struct svc_fh * fhp,loff_t offset,struct kvec * vec,int vlen,unsigned long * cnt,int stable,__be32 * verf)1297  nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
1298  	   struct kvec *vec, int vlen, unsigned long *cnt, int stable,
1299  	   __be32 *verf)
1300  {
1301  	struct nfsd_file *nf;
1302  	__be32 err;
1303  
1304  	trace_nfsd_write_start(rqstp, fhp, offset, *cnt);
1305  
1306  	err = nfsd_file_acquire_gc(rqstp, fhp, NFSD_MAY_WRITE, &nf);
1307  	if (err)
1308  		goto out;
1309  
1310  	err = nfsd_vfs_write(rqstp, fhp, nf, offset, vec,
1311  			vlen, cnt, stable, verf);
1312  	nfsd_file_put(nf);
1313  out:
1314  	trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
1315  	return err;
1316  }
1317  
1318  /**
1319   * nfsd_commit - Commit pending writes to stable storage
1320   * @rqstp: RPC request being processed
1321   * @fhp: NFS filehandle
1322   * @nf: target file
1323   * @offset: raw offset from beginning of file
1324   * @count: raw count of bytes to sync
1325   * @verf: filled in with the server's current write verifier
1326   *
1327   * Note: we guarantee that data that lies within the range specified
1328   * by the 'offset' and 'count' parameters will be synced. The server
1329   * is permitted to sync data that lies outside this range at the
1330   * same time.
1331   *
1332   * Unfortunately we cannot lock the file to make sure we return full WCC
1333   * data to the client, as locking happens lower down in the filesystem.
1334   *
1335   * Return values:
1336   *   An nfsstat value in network byte order.
1337   */
1338  __be32
nfsd_commit(struct svc_rqst * rqstp,struct svc_fh * fhp,struct nfsd_file * nf,u64 offset,u32 count,__be32 * verf)1339  nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
1340  	    u64 offset, u32 count, __be32 *verf)
1341  {
1342  	__be32			err = nfs_ok;
1343  	u64			maxbytes;
1344  	loff_t			start, end;
1345  	struct nfsd_net		*nn;
1346  
1347  	/*
1348  	 * Convert the client-provided (offset, count) range to a
1349  	 * (start, end) range. If the client-provided range falls
1350  	 * outside the maximum file size of the underlying FS,
1351  	 * clamp the sync range appropriately.
1352  	 */
1353  	start = 0;
1354  	end = LLONG_MAX;
1355  	maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes;
1356  	if (offset < maxbytes) {
1357  		start = offset;
1358  		if (count && (offset + count - 1 < maxbytes))
1359  			end = offset + count - 1;
1360  	}
1361  
1362  	nn = net_generic(nf->nf_net, nfsd_net_id);
1363  	if (EX_ISSYNC(fhp->fh_export)) {
1364  		errseq_t since = READ_ONCE(nf->nf_file->f_wb_err);
1365  		int err2;
1366  
1367  		err2 = vfs_fsync_range(nf->nf_file, start, end, 0);
1368  		switch (err2) {
1369  		case 0:
1370  			nfsd_copy_write_verifier(verf, nn);
1371  			err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
1372  						    since);
1373  			err = nfserrno(err2);
1374  			break;
1375  		case -EINVAL:
1376  			err = nfserr_notsupp;
1377  			break;
1378  		default:
1379  			commit_reset_write_verifier(nn, rqstp, err2);
1380  			err = nfserrno(err2);
1381  		}
1382  	} else
1383  		nfsd_copy_write_verifier(verf, nn);
1384  
1385  	return err;
1386  }
1387  
1388  /**
1389   * nfsd_create_setattr - Set a created file's attributes
1390   * @rqstp: RPC transaction being executed
1391   * @fhp: NFS filehandle of parent directory
1392   * @resfhp: NFS filehandle of new object
1393   * @attrs: requested attributes of new object
1394   *
1395   * Returns nfs_ok on success, or an nfsstat in network byte order.
1396   */
1397  __be32
nfsd_create_setattr(struct svc_rqst * rqstp,struct svc_fh * fhp,struct svc_fh * resfhp,struct nfsd_attrs * attrs)1398  nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
1399  		    struct svc_fh *resfhp, struct nfsd_attrs *attrs)
1400  {
1401  	struct iattr *iap = attrs->na_iattr;
1402  	__be32 status;
1403  
1404  	/*
1405  	 * Mode has already been set by file creation.
1406  	 */
1407  	iap->ia_valid &= ~ATTR_MODE;
1408  
1409  	/*
1410  	 * Setting uid/gid works only for root.  Irix appears to
1411  	 * send along the gid on create when it tries to implement
1412  	 * setgid directories via NFS:
1413  	 */
1414  	if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
1415  		iap->ia_valid &= ~(ATTR_UID|ATTR_GID);
1416  
1417  	/*
1418  	 * Callers expect new file metadata to be committed even
1419  	 * if the attributes have not changed.
1420  	 */
1421  	if (nfsd_attrs_valid(attrs))
1422  		status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
1423  	else
1424  		status = nfserrno(commit_metadata(resfhp));
1425  
1426  	/*
1427  	 * Transactional filesystems had a chance to commit changes
1428  	 * for both parent and child simultaneously making the
1429  	 * following commit_metadata a noop in many cases.
1430  	 */
1431  	if (!status)
1432  		status = nfserrno(commit_metadata(fhp));
1433  
1434  	/*
1435  	 * Update the new filehandle to pick up the new attributes.
1436  	 */
1437  	if (!status)
1438  		status = fh_update(resfhp);
1439  
1440  	return status;
1441  }
1442  
1443  /* HPUX client sometimes creates a file in mode 000, and sets size to 0.
1444   * setting size to 0 may fail for some specific file systems by the permission
1445   * checking which requires WRITE permission but the mode is 000.
1446   * we ignore the resizing(to 0) on the just new created file, since the size is
1447   * 0 after file created.
1448   *
1449   * call this only after vfs_create() is called.
1450   * */
1451  static void
nfsd_check_ignore_resizing(struct iattr * iap)1452  nfsd_check_ignore_resizing(struct iattr *iap)
1453  {
1454  	if ((iap->ia_valid & ATTR_SIZE) && (iap->ia_size == 0))
1455  		iap->ia_valid &= ~ATTR_SIZE;
1456  }
1457  
1458  /* The parent directory should already be locked: */
1459  __be32
nfsd_create_locked(struct svc_rqst * rqstp,struct svc_fh * fhp,struct nfsd_attrs * attrs,int type,dev_t rdev,struct svc_fh * resfhp)1460  nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
1461  		   struct nfsd_attrs *attrs,
1462  		   int type, dev_t rdev, struct svc_fh *resfhp)
1463  {
1464  	struct dentry	*dentry, *dchild;
1465  	struct inode	*dirp;
1466  	struct iattr	*iap = attrs->na_iattr;
1467  	__be32		err;
1468  	int		host_err;
1469  
1470  	dentry = fhp->fh_dentry;
1471  	dirp = d_inode(dentry);
1472  
1473  	dchild = dget(resfhp->fh_dentry);
1474  	err = nfsd_permission(&rqstp->rq_cred, fhp->fh_export, dentry,
1475  			      NFSD_MAY_CREATE);
1476  	if (err)
1477  		goto out;
1478  
1479  	if (!(iap->ia_valid & ATTR_MODE))
1480  		iap->ia_mode = 0;
1481  	iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
1482  
1483  	if (!IS_POSIXACL(dirp))
1484  		iap->ia_mode &= ~current_umask();
1485  
1486  	err = 0;
1487  	switch (type) {
1488  	case S_IFREG:
1489  		host_err = vfs_create(&nop_mnt_idmap, dirp, dchild,
1490  				      iap->ia_mode, true);
1491  		if (!host_err)
1492  			nfsd_check_ignore_resizing(iap);
1493  		break;
1494  	case S_IFDIR:
1495  		host_err = vfs_mkdir(&nop_mnt_idmap, dirp, dchild, iap->ia_mode);
1496  		if (!host_err && unlikely(d_unhashed(dchild))) {
1497  			struct dentry *d;
1498  			d = lookup_one_len(dchild->d_name.name,
1499  					   dchild->d_parent,
1500  					   dchild->d_name.len);
1501  			if (IS_ERR(d)) {
1502  				host_err = PTR_ERR(d);
1503  				break;
1504  			}
1505  			if (unlikely(d_is_negative(d))) {
1506  				dput(d);
1507  				err = nfserr_serverfault;
1508  				goto out;
1509  			}
1510  			dput(resfhp->fh_dentry);
1511  			resfhp->fh_dentry = dget(d);
1512  			err = fh_update(resfhp);
1513  			dput(dchild);
1514  			dchild = d;
1515  			if (err)
1516  				goto out;
1517  		}
1518  		break;
1519  	case S_IFCHR:
1520  	case S_IFBLK:
1521  	case S_IFIFO:
1522  	case S_IFSOCK:
1523  		host_err = vfs_mknod(&nop_mnt_idmap, dirp, dchild,
1524  				     iap->ia_mode, rdev);
1525  		break;
1526  	default:
1527  		printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
1528  		       type);
1529  		host_err = -EINVAL;
1530  	}
1531  	if (host_err < 0)
1532  		goto out_nfserr;
1533  
1534  	err = nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
1535  
1536  out:
1537  	dput(dchild);
1538  	return err;
1539  
1540  out_nfserr:
1541  	err = nfserrno(host_err);
1542  	goto out;
1543  }
1544  
1545  /*
1546   * Create a filesystem object (regular, directory, special).
1547   * Note that the parent directory is left locked.
1548   *
1549   * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
1550   */
1551  __be32
nfsd_create(struct svc_rqst * rqstp,struct svc_fh * fhp,char * fname,int flen,struct nfsd_attrs * attrs,int type,dev_t rdev,struct svc_fh * resfhp)1552  nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1553  	    char *fname, int flen, struct nfsd_attrs *attrs,
1554  	    int type, dev_t rdev, struct svc_fh *resfhp)
1555  {
1556  	struct dentry	*dentry, *dchild = NULL;
1557  	__be32		err;
1558  	int		host_err;
1559  
1560  	if (isdotent(fname, flen))
1561  		return nfserr_exist;
1562  
1563  	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_NOP);
1564  	if (err)
1565  		return err;
1566  
1567  	dentry = fhp->fh_dentry;
1568  
1569  	host_err = fh_want_write(fhp);
1570  	if (host_err)
1571  		return nfserrno(host_err);
1572  
1573  	inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
1574  	dchild = lookup_one_len(fname, dentry, flen);
1575  	host_err = PTR_ERR(dchild);
1576  	if (IS_ERR(dchild)) {
1577  		err = nfserrno(host_err);
1578  		goto out_unlock;
1579  	}
1580  	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
1581  	/*
1582  	 * We unconditionally drop our ref to dchild as fh_compose will have
1583  	 * already grabbed its own ref for it.
1584  	 */
1585  	dput(dchild);
1586  	if (err)
1587  		goto out_unlock;
1588  	err = fh_fill_pre_attrs(fhp);
1589  	if (err != nfs_ok)
1590  		goto out_unlock;
1591  	err = nfsd_create_locked(rqstp, fhp, attrs, type, rdev, resfhp);
1592  	fh_fill_post_attrs(fhp);
1593  out_unlock:
1594  	inode_unlock(dentry->d_inode);
1595  	return err;
1596  }
1597  
1598  /*
1599   * Read a symlink. On entry, *lenp must contain the maximum path length that
1600   * fits into the buffer. On return, it contains the true length.
1601   * N.B. After this call fhp needs an fh_put
1602   */
1603  __be32
nfsd_readlink(struct svc_rqst * rqstp,struct svc_fh * fhp,char * buf,int * lenp)1604  nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1605  {
1606  	__be32		err;
1607  	const char *link;
1608  	struct path path;
1609  	DEFINE_DELAYED_CALL(done);
1610  	int len;
1611  
1612  	err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
1613  	if (unlikely(err))
1614  		return err;
1615  
1616  	path.mnt = fhp->fh_export->ex_path.mnt;
1617  	path.dentry = fhp->fh_dentry;
1618  
1619  	if (unlikely(!d_is_symlink(path.dentry)))
1620  		return nfserr_inval;
1621  
1622  	touch_atime(&path);
1623  
1624  	link = vfs_get_link(path.dentry, &done);
1625  	if (IS_ERR(link))
1626  		return nfserrno(PTR_ERR(link));
1627  
1628  	len = strlen(link);
1629  	if (len < *lenp)
1630  		*lenp = len;
1631  	memcpy(buf, link, *lenp);
1632  	do_delayed_call(&done);
1633  	return 0;
1634  }
1635  
1636  /**
1637   * nfsd_symlink - Create a symlink and look up its inode
1638   * @rqstp: RPC transaction being executed
1639   * @fhp: NFS filehandle of parent directory
1640   * @fname: filename of the new symlink
1641   * @flen: length of @fname
1642   * @path: content of the new symlink (NUL-terminated)
1643   * @attrs: requested attributes of new object
1644   * @resfhp: NFS filehandle of new object
1645   *
1646   * N.B. After this call _both_ fhp and resfhp need an fh_put
1647   *
1648   * Returns nfs_ok on success, or an nfsstat in network byte order.
1649   */
1650  __be32
nfsd_symlink(struct svc_rqst * rqstp,struct svc_fh * fhp,char * fname,int flen,char * path,struct nfsd_attrs * attrs,struct svc_fh * resfhp)1651  nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1652  	     char *fname, int flen,
1653  	     char *path, struct nfsd_attrs *attrs,
1654  	     struct svc_fh *resfhp)
1655  {
1656  	struct dentry	*dentry, *dnew;
1657  	__be32		err, cerr;
1658  	int		host_err;
1659  
1660  	err = nfserr_noent;
1661  	if (!flen || path[0] == '\0')
1662  		goto out;
1663  	err = nfserr_exist;
1664  	if (isdotent(fname, flen))
1665  		goto out;
1666  
1667  	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1668  	if (err)
1669  		goto out;
1670  
1671  	host_err = fh_want_write(fhp);
1672  	if (host_err) {
1673  		err = nfserrno(host_err);
1674  		goto out;
1675  	}
1676  
1677  	dentry = fhp->fh_dentry;
1678  	inode_lock_nested(dentry->d_inode, I_MUTEX_PARENT);
1679  	dnew = lookup_one_len(fname, dentry, flen);
1680  	if (IS_ERR(dnew)) {
1681  		err = nfserrno(PTR_ERR(dnew));
1682  		inode_unlock(dentry->d_inode);
1683  		goto out_drop_write;
1684  	}
1685  	err = fh_fill_pre_attrs(fhp);
1686  	if (err != nfs_ok)
1687  		goto out_unlock;
1688  	host_err = vfs_symlink(&nop_mnt_idmap, d_inode(dentry), dnew, path);
1689  	err = nfserrno(host_err);
1690  	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
1691  	if (!err)
1692  		nfsd_create_setattr(rqstp, fhp, resfhp, attrs);
1693  	fh_fill_post_attrs(fhp);
1694  out_unlock:
1695  	inode_unlock(dentry->d_inode);
1696  	if (!err)
1697  		err = nfserrno(commit_metadata(fhp));
1698  	dput(dnew);
1699  	if (err==0) err = cerr;
1700  out_drop_write:
1701  	fh_drop_write(fhp);
1702  out:
1703  	return err;
1704  }
1705  
1706  /*
1707   * Create a hardlink
1708   * N.B. After this call _both_ ffhp and tfhp need an fh_put
1709   */
1710  __be32
nfsd_link(struct svc_rqst * rqstp,struct svc_fh * ffhp,char * name,int len,struct svc_fh * tfhp)1711  nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1712  				char *name, int len, struct svc_fh *tfhp)
1713  {
1714  	struct dentry	*ddir, *dnew, *dold;
1715  	struct inode	*dirp;
1716  	__be32		err;
1717  	int		host_err;
1718  
1719  	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
1720  	if (err)
1721  		goto out;
1722  	err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP);
1723  	if (err)
1724  		goto out;
1725  	err = nfserr_isdir;
1726  	if (d_is_dir(tfhp->fh_dentry))
1727  		goto out;
1728  	err = nfserr_perm;
1729  	if (!len)
1730  		goto out;
1731  	err = nfserr_exist;
1732  	if (isdotent(name, len))
1733  		goto out;
1734  
1735  	host_err = fh_want_write(tfhp);
1736  	if (host_err) {
1737  		err = nfserrno(host_err);
1738  		goto out;
1739  	}
1740  
1741  	ddir = ffhp->fh_dentry;
1742  	dirp = d_inode(ddir);
1743  	inode_lock_nested(dirp, I_MUTEX_PARENT);
1744  
1745  	dnew = lookup_one_len(name, ddir, len);
1746  	if (IS_ERR(dnew)) {
1747  		err = nfserrno(PTR_ERR(dnew));
1748  		goto out_unlock;
1749  	}
1750  
1751  	dold = tfhp->fh_dentry;
1752  
1753  	err = nfserr_noent;
1754  	if (d_really_is_negative(dold))
1755  		goto out_dput;
1756  	err = fh_fill_pre_attrs(ffhp);
1757  	if (err != nfs_ok)
1758  		goto out_dput;
1759  	host_err = vfs_link(dold, &nop_mnt_idmap, dirp, dnew, NULL);
1760  	fh_fill_post_attrs(ffhp);
1761  	inode_unlock(dirp);
1762  	if (!host_err) {
1763  		err = nfserrno(commit_metadata(ffhp));
1764  		if (!err)
1765  			err = nfserrno(commit_metadata(tfhp));
1766  	} else {
1767  		err = nfserrno(host_err);
1768  	}
1769  	dput(dnew);
1770  out_drop_write:
1771  	fh_drop_write(tfhp);
1772  out:
1773  	return err;
1774  
1775  out_dput:
1776  	dput(dnew);
1777  out_unlock:
1778  	inode_unlock(dirp);
1779  	goto out_drop_write;
1780  }
1781  
1782  static void
nfsd_close_cached_files(struct dentry * dentry)1783  nfsd_close_cached_files(struct dentry *dentry)
1784  {
1785  	struct inode *inode = d_inode(dentry);
1786  
1787  	if (inode && S_ISREG(inode->i_mode))
1788  		nfsd_file_close_inode_sync(inode);
1789  }
1790  
1791  static bool
nfsd_has_cached_files(struct dentry * dentry)1792  nfsd_has_cached_files(struct dentry *dentry)
1793  {
1794  	bool		ret = false;
1795  	struct inode *inode = d_inode(dentry);
1796  
1797  	if (inode && S_ISREG(inode->i_mode))
1798  		ret = nfsd_file_is_cached(inode);
1799  	return ret;
1800  }
1801  
1802  /*
1803   * Rename a file
1804   * N.B. After this call _both_ ffhp and tfhp need an fh_put
1805   */
1806  __be32
nfsd_rename(struct svc_rqst * rqstp,struct svc_fh * ffhp,char * fname,int flen,struct svc_fh * tfhp,char * tname,int tlen)1807  nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1808  			    struct svc_fh *tfhp, char *tname, int tlen)
1809  {
1810  	struct dentry	*fdentry, *tdentry, *odentry, *ndentry, *trap;
1811  	struct inode	*fdir, *tdir;
1812  	__be32		err;
1813  	int		host_err;
1814  	bool		close_cached = false;
1815  
1816  	err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
1817  	if (err)
1818  		goto out;
1819  	err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
1820  	if (err)
1821  		goto out;
1822  
1823  	fdentry = ffhp->fh_dentry;
1824  	fdir = d_inode(fdentry);
1825  
1826  	tdentry = tfhp->fh_dentry;
1827  	tdir = d_inode(tdentry);
1828  
1829  	err = nfserr_perm;
1830  	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
1831  		goto out;
1832  
1833  	err = nfserr_xdev;
1834  	if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
1835  		goto out;
1836  	if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
1837  		goto out;
1838  
1839  retry:
1840  	host_err = fh_want_write(ffhp);
1841  	if (host_err) {
1842  		err = nfserrno(host_err);
1843  		goto out;
1844  	}
1845  
1846  	trap = lock_rename(tdentry, fdentry);
1847  	if (IS_ERR(trap)) {
1848  		err = nfserr_xdev;
1849  		goto out_want_write;
1850  	}
1851  	err = fh_fill_pre_attrs(ffhp);
1852  	if (err != nfs_ok)
1853  		goto out_unlock;
1854  	err = fh_fill_pre_attrs(tfhp);
1855  	if (err != nfs_ok)
1856  		goto out_unlock;
1857  
1858  	odentry = lookup_one_len(fname, fdentry, flen);
1859  	host_err = PTR_ERR(odentry);
1860  	if (IS_ERR(odentry))
1861  		goto out_nfserr;
1862  
1863  	host_err = -ENOENT;
1864  	if (d_really_is_negative(odentry))
1865  		goto out_dput_old;
1866  	host_err = -EINVAL;
1867  	if (odentry == trap)
1868  		goto out_dput_old;
1869  
1870  	ndentry = lookup_one_len(tname, tdentry, tlen);
1871  	host_err = PTR_ERR(ndentry);
1872  	if (IS_ERR(ndentry))
1873  		goto out_dput_old;
1874  	host_err = -ENOTEMPTY;
1875  	if (ndentry == trap)
1876  		goto out_dput_new;
1877  
1878  	if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
1879  	    nfsd_has_cached_files(ndentry)) {
1880  		close_cached = true;
1881  		goto out_dput_old;
1882  	} else {
1883  		struct renamedata rd = {
1884  			.old_mnt_idmap	= &nop_mnt_idmap,
1885  			.old_dir	= fdir,
1886  			.old_dentry	= odentry,
1887  			.new_mnt_idmap	= &nop_mnt_idmap,
1888  			.new_dir	= tdir,
1889  			.new_dentry	= ndentry,
1890  		};
1891  		int retries;
1892  
1893  		for (retries = 1;;) {
1894  			host_err = vfs_rename(&rd);
1895  			if (host_err != -EAGAIN || !retries--)
1896  				break;
1897  			if (!nfsd_wait_for_delegreturn(rqstp, d_inode(odentry)))
1898  				break;
1899  		}
1900  		if (!host_err) {
1901  			host_err = commit_metadata(tfhp);
1902  			if (!host_err)
1903  				host_err = commit_metadata(ffhp);
1904  		}
1905  	}
1906   out_dput_new:
1907  	dput(ndentry);
1908   out_dput_old:
1909  	dput(odentry);
1910   out_nfserr:
1911  	err = nfserrno(host_err);
1912  
1913  	if (!close_cached) {
1914  		fh_fill_post_attrs(ffhp);
1915  		fh_fill_post_attrs(tfhp);
1916  	}
1917  out_unlock:
1918  	unlock_rename(tdentry, fdentry);
1919  out_want_write:
1920  	fh_drop_write(ffhp);
1921  
1922  	/*
1923  	 * If the target dentry has cached open files, then we need to
1924  	 * try to close them prior to doing the rename.  Final fput
1925  	 * shouldn't be done with locks held however, so we delay it
1926  	 * until this point and then reattempt the whole shebang.
1927  	 */
1928  	if (close_cached) {
1929  		close_cached = false;
1930  		nfsd_close_cached_files(ndentry);
1931  		dput(ndentry);
1932  		goto retry;
1933  	}
1934  out:
1935  	return err;
1936  }
1937  
1938  /*
1939   * Unlink a file or directory
1940   * N.B. After this call fhp needs an fh_put
1941   */
1942  __be32
nfsd_unlink(struct svc_rqst * rqstp,struct svc_fh * fhp,int type,char * fname,int flen)1943  nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1944  				char *fname, int flen)
1945  {
1946  	struct dentry	*dentry, *rdentry;
1947  	struct inode	*dirp;
1948  	struct inode	*rinode;
1949  	__be32		err;
1950  	int		host_err;
1951  
1952  	err = nfserr_acces;
1953  	if (!flen || isdotent(fname, flen))
1954  		goto out;
1955  	err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
1956  	if (err)
1957  		goto out;
1958  
1959  	host_err = fh_want_write(fhp);
1960  	if (host_err)
1961  		goto out_nfserr;
1962  
1963  	dentry = fhp->fh_dentry;
1964  	dirp = d_inode(dentry);
1965  	inode_lock_nested(dirp, I_MUTEX_PARENT);
1966  
1967  	rdentry = lookup_one_len(fname, dentry, flen);
1968  	host_err = PTR_ERR(rdentry);
1969  	if (IS_ERR(rdentry))
1970  		goto out_unlock;
1971  
1972  	if (d_really_is_negative(rdentry)) {
1973  		dput(rdentry);
1974  		host_err = -ENOENT;
1975  		goto out_unlock;
1976  	}
1977  	rinode = d_inode(rdentry);
1978  	err = fh_fill_pre_attrs(fhp);
1979  	if (err != nfs_ok)
1980  		goto out_unlock;
1981  
1982  	ihold(rinode);
1983  	if (!type)
1984  		type = d_inode(rdentry)->i_mode & S_IFMT;
1985  
1986  	if (type != S_IFDIR) {
1987  		int retries;
1988  
1989  		if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
1990  			nfsd_close_cached_files(rdentry);
1991  
1992  		for (retries = 1;;) {
1993  			host_err = vfs_unlink(&nop_mnt_idmap, dirp, rdentry, NULL);
1994  			if (host_err != -EAGAIN || !retries--)
1995  				break;
1996  			if (!nfsd_wait_for_delegreturn(rqstp, rinode))
1997  				break;
1998  		}
1999  	} else {
2000  		host_err = vfs_rmdir(&nop_mnt_idmap, dirp, rdentry);
2001  	}
2002  	fh_fill_post_attrs(fhp);
2003  
2004  	inode_unlock(dirp);
2005  	if (!host_err)
2006  		host_err = commit_metadata(fhp);
2007  	dput(rdentry);
2008  	iput(rinode);    /* truncate the inode here */
2009  
2010  out_drop_write:
2011  	fh_drop_write(fhp);
2012  out_nfserr:
2013  	if (host_err == -EBUSY) {
2014  		/* name is mounted-on. There is no perfect
2015  		 * error status.
2016  		 */
2017  		err = nfserr_file_open;
2018  	} else {
2019  		err = nfserrno(host_err);
2020  	}
2021  out:
2022  	return err;
2023  out_unlock:
2024  	inode_unlock(dirp);
2025  	goto out_drop_write;
2026  }
2027  
2028  /*
2029   * We do this buffering because we must not call back into the file
2030   * system's ->lookup() method from the filldir callback. That may well
2031   * deadlock a number of file systems.
2032   *
2033   * This is based heavily on the implementation of same in XFS.
2034   */
2035  struct buffered_dirent {
2036  	u64		ino;
2037  	loff_t		offset;
2038  	int		namlen;
2039  	unsigned int	d_type;
2040  	char		name[];
2041  };
2042  
2043  struct readdir_data {
2044  	struct dir_context ctx;
2045  	char		*dirent;
2046  	size_t		used;
2047  	int		full;
2048  };
2049  
nfsd_buffered_filldir(struct dir_context * ctx,const char * name,int namlen,loff_t offset,u64 ino,unsigned int d_type)2050  static bool nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
2051  				 int namlen, loff_t offset, u64 ino,
2052  				 unsigned int d_type)
2053  {
2054  	struct readdir_data *buf =
2055  		container_of(ctx, struct readdir_data, ctx);
2056  	struct buffered_dirent *de = (void *)(buf->dirent + buf->used);
2057  	unsigned int reclen;
2058  
2059  	reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
2060  	if (buf->used + reclen > PAGE_SIZE) {
2061  		buf->full = 1;
2062  		return false;
2063  	}
2064  
2065  	de->namlen = namlen;
2066  	de->offset = offset;
2067  	de->ino = ino;
2068  	de->d_type = d_type;
2069  	memcpy(de->name, name, namlen);
2070  	buf->used += reclen;
2071  
2072  	return true;
2073  }
2074  
nfsd_buffered_readdir(struct file * file,struct svc_fh * fhp,nfsd_filldir_t func,struct readdir_cd * cdp,loff_t * offsetp)2075  static __be32 nfsd_buffered_readdir(struct file *file, struct svc_fh *fhp,
2076  				    nfsd_filldir_t func, struct readdir_cd *cdp,
2077  				    loff_t *offsetp)
2078  {
2079  	struct buffered_dirent *de;
2080  	int host_err;
2081  	int size;
2082  	loff_t offset;
2083  	struct readdir_data buf = {
2084  		.ctx.actor = nfsd_buffered_filldir,
2085  		.dirent = (void *)__get_free_page(GFP_KERNEL)
2086  	};
2087  
2088  	if (!buf.dirent)
2089  		return nfserrno(-ENOMEM);
2090  
2091  	offset = *offsetp;
2092  
2093  	while (1) {
2094  		unsigned int reclen;
2095  
2096  		cdp->err = nfserr_eof; /* will be cleared on successful read */
2097  		buf.used = 0;
2098  		buf.full = 0;
2099  
2100  		host_err = iterate_dir(file, &buf.ctx);
2101  		if (buf.full)
2102  			host_err = 0;
2103  
2104  		if (host_err < 0)
2105  			break;
2106  
2107  		size = buf.used;
2108  
2109  		if (!size)
2110  			break;
2111  
2112  		de = (struct buffered_dirent *)buf.dirent;
2113  		while (size > 0) {
2114  			offset = de->offset;
2115  
2116  			if (func(cdp, de->name, de->namlen, de->offset,
2117  				 de->ino, de->d_type))
2118  				break;
2119  
2120  			if (cdp->err != nfs_ok)
2121  				break;
2122  
2123  			trace_nfsd_dirent(fhp, de->ino, de->name, de->namlen);
2124  
2125  			reclen = ALIGN(sizeof(*de) + de->namlen,
2126  				       sizeof(u64));
2127  			size -= reclen;
2128  			de = (struct buffered_dirent *)((char *)de + reclen);
2129  		}
2130  		if (size > 0) /* We bailed out early */
2131  			break;
2132  
2133  		offset = vfs_llseek(file, 0, SEEK_CUR);
2134  	}
2135  
2136  	free_page((unsigned long)(buf.dirent));
2137  
2138  	if (host_err)
2139  		return nfserrno(host_err);
2140  
2141  	*offsetp = offset;
2142  	return cdp->err;
2143  }
2144  
2145  /**
2146   * nfsd_readdir - Read entries from a directory
2147   * @rqstp: RPC transaction context
2148   * @fhp: NFS file handle of directory to be read
2149   * @offsetp: OUT: seek offset of final entry that was read
2150   * @cdp: OUT: an eof error value
2151   * @func: entry filler actor
2152   *
2153   * This implementation ignores the NFSv3/4 verifier cookie.
2154   *
2155   * NB: normal system calls hold file->f_pos_lock when calling
2156   * ->iterate_shared and ->llseek, but nfsd_readdir() does not.
2157   * Because the struct file acquired here is not visible to other
2158   * threads, it's internal state does not need mutex protection.
2159   *
2160   * Returns nfs_ok on success, otherwise an nfsstat code is
2161   * returned.
2162   */
2163  __be32
nfsd_readdir(struct svc_rqst * rqstp,struct svc_fh * fhp,loff_t * offsetp,struct readdir_cd * cdp,nfsd_filldir_t func)2164  nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
2165  	     struct readdir_cd *cdp, nfsd_filldir_t func)
2166  {
2167  	__be32		err;
2168  	struct file	*file;
2169  	loff_t		offset = *offsetp;
2170  	int             may_flags = NFSD_MAY_READ;
2171  
2172  	err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
2173  	if (err)
2174  		goto out;
2175  
2176  	if (fhp->fh_64bit_cookies)
2177  		file->f_mode |= FMODE_64BITHASH;
2178  	else
2179  		file->f_mode |= FMODE_32BITHASH;
2180  
2181  	offset = vfs_llseek(file, offset, SEEK_SET);
2182  	if (offset < 0) {
2183  		err = nfserrno((int)offset);
2184  		goto out_close;
2185  	}
2186  
2187  	err = nfsd_buffered_readdir(file, fhp, func, cdp, offsetp);
2188  
2189  	if (err == nfserr_eof || err == nfserr_toosmall)
2190  		err = nfs_ok; /* can still be found in ->err */
2191  out_close:
2192  	nfsd_filp_close(file);
2193  out:
2194  	return err;
2195  }
2196  
2197  /**
2198   * nfsd_filp_close: close a file synchronously
2199   * @fp: the file to close
2200   *
2201   * nfsd_filp_close() is similar in behaviour to filp_close().
2202   * The difference is that if this is the final close on the
2203   * file, the that finalisation happens immediately, rather then
2204   * being handed over to a work_queue, as it the case for
2205   * filp_close().
2206   * When a user-space process closes a file (even when using
2207   * filp_close() the finalisation happens before returning to
2208   * userspace, so it is effectively synchronous.  When a kernel thread
2209   * uses file_close(), on the other hand, the handling is completely
2210   * asynchronous.  This means that any cost imposed by that finalisation
2211   * is not imposed on the nfsd thread, and nfsd could potentually
2212   * close files more quickly than the work queue finalises the close,
2213   * which would lead to unbounded growth in the queue.
2214   *
2215   * In some contexts is it not safe to synchronously wait for
2216   * close finalisation (see comment for __fput_sync()), but nfsd
2217   * does not match those contexts.  In partcilarly it does not, at the
2218   * time that this function is called, hold and locks and no finalisation
2219   * of any file, socket, or device driver would have any cause to wait
2220   * for nfsd to make progress.
2221   */
nfsd_filp_close(struct file * fp)2222  void nfsd_filp_close(struct file *fp)
2223  {
2224  	get_file(fp);
2225  	filp_close(fp, NULL);
2226  	__fput_sync(fp);
2227  }
2228  
2229  /*
2230   * Get file system stats
2231   * N.B. After this call fhp needs an fh_put
2232   */
2233  __be32
nfsd_statfs(struct svc_rqst * rqstp,struct svc_fh * fhp,struct kstatfs * stat,int access)2234  nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
2235  {
2236  	__be32 err;
2237  
2238  	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
2239  	if (!err) {
2240  		struct path path = {
2241  			.mnt	= fhp->fh_export->ex_path.mnt,
2242  			.dentry	= fhp->fh_dentry,
2243  		};
2244  		if (vfs_statfs(&path, stat))
2245  			err = nfserr_io;
2246  	}
2247  	return err;
2248  }
2249  
exp_rdonly(struct svc_cred * cred,struct svc_export * exp)2250  static int exp_rdonly(struct svc_cred *cred, struct svc_export *exp)
2251  {
2252  	return nfsexp_flags(cred, exp) & NFSEXP_READONLY;
2253  }
2254  
2255  #ifdef CONFIG_NFSD_V4
2256  /*
2257   * Helper function to translate error numbers. In the case of xattr operations,
2258   * some error codes need to be translated outside of the standard translations.
2259   *
2260   * ENODATA needs to be translated to nfserr_noxattr.
2261   * E2BIG to nfserr_xattr2big.
2262   *
2263   * Additionally, vfs_listxattr can return -ERANGE. This means that the
2264   * file has too many extended attributes to retrieve inside an
2265   * XATTR_LIST_MAX sized buffer. This is a bug in the xattr implementation:
2266   * filesystems will allow the adding of extended attributes until they hit
2267   * their own internal limit. This limit may be larger than XATTR_LIST_MAX.
2268   * So, at that point, the attributes are present and valid, but can't
2269   * be retrieved using listxattr, since the upper level xattr code enforces
2270   * the XATTR_LIST_MAX limit.
2271   *
2272   * This bug means that we need to deal with listxattr returning -ERANGE. The
2273   * best mapping is to return TOOSMALL.
2274   */
2275  static __be32
nfsd_xattr_errno(int err)2276  nfsd_xattr_errno(int err)
2277  {
2278  	switch (err) {
2279  	case -ENODATA:
2280  		return nfserr_noxattr;
2281  	case -E2BIG:
2282  		return nfserr_xattr2big;
2283  	case -ERANGE:
2284  		return nfserr_toosmall;
2285  	}
2286  	return nfserrno(err);
2287  }
2288  
2289  /*
2290   * Retrieve the specified user extended attribute. To avoid always
2291   * having to allocate the maximum size (since we are not getting
2292   * a maximum size from the RPC), do a probe + alloc. Hold a reader
2293   * lock on i_rwsem to prevent the extended attribute from changing
2294   * size while we're doing this.
2295   */
2296  __be32
nfsd_getxattr(struct svc_rqst * rqstp,struct svc_fh * fhp,char * name,void ** bufp,int * lenp)2297  nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
2298  	      void **bufp, int *lenp)
2299  {
2300  	ssize_t len;
2301  	__be32 err;
2302  	char *buf;
2303  	struct inode *inode;
2304  	struct dentry *dentry;
2305  
2306  	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
2307  	if (err)
2308  		return err;
2309  
2310  	err = nfs_ok;
2311  	dentry = fhp->fh_dentry;
2312  	inode = d_inode(dentry);
2313  
2314  	inode_lock_shared(inode);
2315  
2316  	len = vfs_getxattr(&nop_mnt_idmap, dentry, name, NULL, 0);
2317  
2318  	/*
2319  	 * Zero-length attribute, just return.
2320  	 */
2321  	if (len == 0) {
2322  		*bufp = NULL;
2323  		*lenp = 0;
2324  		goto out;
2325  	}
2326  
2327  	if (len < 0) {
2328  		err = nfsd_xattr_errno(len);
2329  		goto out;
2330  	}
2331  
2332  	if (len > *lenp) {
2333  		err = nfserr_toosmall;
2334  		goto out;
2335  	}
2336  
2337  	buf = kvmalloc(len, GFP_KERNEL);
2338  	if (buf == NULL) {
2339  		err = nfserr_jukebox;
2340  		goto out;
2341  	}
2342  
2343  	len = vfs_getxattr(&nop_mnt_idmap, dentry, name, buf, len);
2344  	if (len <= 0) {
2345  		kvfree(buf);
2346  		buf = NULL;
2347  		err = nfsd_xattr_errno(len);
2348  	}
2349  
2350  	*lenp = len;
2351  	*bufp = buf;
2352  
2353  out:
2354  	inode_unlock_shared(inode);
2355  
2356  	return err;
2357  }
2358  
2359  /*
2360   * Retrieve the xattr names. Since we can't know how many are
2361   * user extended attributes, we must get all attributes here,
2362   * and have the XDR encode filter out the "user." ones.
2363   *
2364   * While this could always just allocate an XATTR_LIST_MAX
2365   * buffer, that's a waste, so do a probe + allocate. To
2366   * avoid any changes between the probe and allocate, wrap
2367   * this in inode_lock.
2368   */
2369  __be32
nfsd_listxattr(struct svc_rqst * rqstp,struct svc_fh * fhp,char ** bufp,int * lenp)2370  nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp,
2371  	       int *lenp)
2372  {
2373  	ssize_t len;
2374  	__be32 err;
2375  	char *buf;
2376  	struct inode *inode;
2377  	struct dentry *dentry;
2378  
2379  	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_READ);
2380  	if (err)
2381  		return err;
2382  
2383  	dentry = fhp->fh_dentry;
2384  	inode = d_inode(dentry);
2385  	*lenp = 0;
2386  
2387  	inode_lock_shared(inode);
2388  
2389  	len = vfs_listxattr(dentry, NULL, 0);
2390  	if (len <= 0) {
2391  		err = nfsd_xattr_errno(len);
2392  		goto out;
2393  	}
2394  
2395  	if (len > XATTR_LIST_MAX) {
2396  		err = nfserr_xattr2big;
2397  		goto out;
2398  	}
2399  
2400  	buf = kvmalloc(len, GFP_KERNEL);
2401  	if (buf == NULL) {
2402  		err = nfserr_jukebox;
2403  		goto out;
2404  	}
2405  
2406  	len = vfs_listxattr(dentry, buf, len);
2407  	if (len <= 0) {
2408  		kvfree(buf);
2409  		err = nfsd_xattr_errno(len);
2410  		goto out;
2411  	}
2412  
2413  	*lenp = len;
2414  	*bufp = buf;
2415  
2416  	err = nfs_ok;
2417  out:
2418  	inode_unlock_shared(inode);
2419  
2420  	return err;
2421  }
2422  
2423  /**
2424   * nfsd_removexattr - Remove an extended attribute
2425   * @rqstp: RPC transaction being executed
2426   * @fhp: NFS filehandle of object with xattr to remove
2427   * @name: name of xattr to remove (NUL-terminate)
2428   *
2429   * Pass in a NULL pointer for delegated_inode, and let the client deal
2430   * with NFS4ERR_DELAY (same as with e.g. setattr and remove).
2431   *
2432   * Returns nfs_ok on success, or an nfsstat in network byte order.
2433   */
2434  __be32
nfsd_removexattr(struct svc_rqst * rqstp,struct svc_fh * fhp,char * name)2435  nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
2436  {
2437  	__be32 err;
2438  	int ret;
2439  
2440  	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
2441  	if (err)
2442  		return err;
2443  
2444  	ret = fh_want_write(fhp);
2445  	if (ret)
2446  		return nfserrno(ret);
2447  
2448  	inode_lock(fhp->fh_dentry->d_inode);
2449  	err = fh_fill_pre_attrs(fhp);
2450  	if (err != nfs_ok)
2451  		goto out_unlock;
2452  	ret = __vfs_removexattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
2453  				       name, NULL);
2454  	err = nfsd_xattr_errno(ret);
2455  	fh_fill_post_attrs(fhp);
2456  out_unlock:
2457  	inode_unlock(fhp->fh_dentry->d_inode);
2458  	fh_drop_write(fhp);
2459  
2460  	return err;
2461  }
2462  
2463  __be32
nfsd_setxattr(struct svc_rqst * rqstp,struct svc_fh * fhp,char * name,void * buf,u32 len,u32 flags)2464  nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
2465  	      void *buf, u32 len, u32 flags)
2466  {
2467  	__be32 err;
2468  	int ret;
2469  
2470  	err = fh_verify(rqstp, fhp, 0, NFSD_MAY_WRITE);
2471  	if (err)
2472  		return err;
2473  
2474  	ret = fh_want_write(fhp);
2475  	if (ret)
2476  		return nfserrno(ret);
2477  	inode_lock(fhp->fh_dentry->d_inode);
2478  	err = fh_fill_pre_attrs(fhp);
2479  	if (err != nfs_ok)
2480  		goto out_unlock;
2481  	ret = __vfs_setxattr_locked(&nop_mnt_idmap, fhp->fh_dentry,
2482  				    name, buf, len, flags, NULL);
2483  	fh_fill_post_attrs(fhp);
2484  	err = nfsd_xattr_errno(ret);
2485  out_unlock:
2486  	inode_unlock(fhp->fh_dentry->d_inode);
2487  	fh_drop_write(fhp);
2488  	return err;
2489  }
2490  #endif
2491  
2492  /*
2493   * Check for a user's access permissions to this inode.
2494   */
2495  __be32
nfsd_permission(struct svc_cred * cred,struct svc_export * exp,struct dentry * dentry,int acc)2496  nfsd_permission(struct svc_cred *cred, struct svc_export *exp,
2497  		struct dentry *dentry, int acc)
2498  {
2499  	struct inode	*inode = d_inode(dentry);
2500  	int		err;
2501  
2502  	if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
2503  		return 0;
2504  #if 0
2505  	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
2506  		acc,
2507  		(acc & NFSD_MAY_READ)?	" read"  : "",
2508  		(acc & NFSD_MAY_WRITE)?	" write" : "",
2509  		(acc & NFSD_MAY_EXEC)?	" exec"  : "",
2510  		(acc & NFSD_MAY_SATTR)?	" sattr" : "",
2511  		(acc & NFSD_MAY_TRUNC)?	" trunc" : "",
2512  		(acc & NFSD_MAY_LOCK)?	" lock"  : "",
2513  		(acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
2514  		inode->i_mode,
2515  		IS_IMMUTABLE(inode)?	" immut" : "",
2516  		IS_APPEND(inode)?	" append" : "",
2517  		__mnt_is_readonly(exp->ex_path.mnt)?	" ro" : "");
2518  	dprintk("      owner %d/%d user %d/%d\n",
2519  		inode->i_uid, inode->i_gid, current_fsuid(), current_fsgid());
2520  #endif
2521  
2522  	/* Normally we reject any write/sattr etc access on a read-only file
2523  	 * system.  But if it is IRIX doing check on write-access for a
2524  	 * device special file, we ignore rofs.
2525  	 */
2526  	if (!(acc & NFSD_MAY_LOCAL_ACCESS))
2527  		if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
2528  			if (exp_rdonly(cred, exp) ||
2529  			    __mnt_is_readonly(exp->ex_path.mnt))
2530  				return nfserr_rofs;
2531  			if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
2532  				return nfserr_perm;
2533  		}
2534  	if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
2535  		return nfserr_perm;
2536  
2537  	if (acc & NFSD_MAY_LOCK) {
2538  		/* If we cannot rely on authentication in NLM requests,
2539  		 * just allow locks, otherwise require read permission, or
2540  		 * ownership
2541  		 */
2542  		if (exp->ex_flags & NFSEXP_NOAUTHNLM)
2543  			return 0;
2544  		else
2545  			acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
2546  	}
2547  	/*
2548  	 * The file owner always gets access permission for accesses that
2549  	 * would normally be checked at open time. This is to make
2550  	 * file access work even when the client has done a fchmod(fd, 0).
2551  	 *
2552  	 * However, `cp foo bar' should fail nevertheless when bar is
2553  	 * readonly. A sensible way to do this might be to reject all
2554  	 * attempts to truncate a read-only file, because a creat() call
2555  	 * always implies file truncation.
2556  	 * ... but this isn't really fair.  A process may reasonably call
2557  	 * ftruncate on an open file descriptor on a file with perm 000.
2558  	 * We must trust the client to do permission checking - using "ACCESS"
2559  	 * with NFSv3.
2560  	 */
2561  	if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
2562  	    uid_eq(inode->i_uid, current_fsuid()))
2563  		return 0;
2564  
2565  	/* This assumes  NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
2566  	err = inode_permission(&nop_mnt_idmap, inode,
2567  			       acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
2568  
2569  	/* Allow read access to binaries even when mode 111 */
2570  	if (err == -EACCES && S_ISREG(inode->i_mode) &&
2571  	     (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
2572  	      acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
2573  		err = inode_permission(&nop_mnt_idmap, inode, MAY_EXEC);
2574  
2575  	return err? nfserrno(err) : 0;
2576  }
2577