1  /* SPDX-License-Identifier: GPL-2.0 */
2  #ifndef _LINUX_FS_H
3  #define _LINUX_FS_H
4  
5  #include <linux/linkage.h>
6  #include <linux/wait_bit.h>
7  #include <linux/kdev_t.h>
8  #include <linux/dcache.h>
9  #include <linux/path.h>
10  #include <linux/stat.h>
11  #include <linux/cache.h>
12  #include <linux/list.h>
13  #include <linux/list_lru.h>
14  #include <linux/llist.h>
15  #include <linux/radix-tree.h>
16  #include <linux/xarray.h>
17  #include <linux/rbtree.h>
18  #include <linux/init.h>
19  #include <linux/pid.h>
20  #include <linux/bug.h>
21  #include <linux/mutex.h>
22  #include <linux/rwsem.h>
23  #include <linux/mm_types.h>
24  #include <linux/capability.h>
25  #include <linux/semaphore.h>
26  #include <linux/fcntl.h>
27  #include <linux/rculist_bl.h>
28  #include <linux/atomic.h>
29  #include <linux/shrinker.h>
30  #include <linux/migrate_mode.h>
31  #include <linux/uidgid.h>
32  #include <linux/lockdep.h>
33  #include <linux/percpu-rwsem.h>
34  #include <linux/workqueue.h>
35  #include <linux/delayed_call.h>
36  #include <linux/uuid.h>
37  #include <linux/errseq.h>
38  #include <linux/ioprio.h>
39  #include <linux/fs_types.h>
40  #include <linux/build_bug.h>
41  #include <linux/stddef.h>
42  #include <linux/mount.h>
43  #include <linux/cred.h>
44  #include <linux/mnt_idmapping.h>
45  #include <linux/slab.h>
46  #include <linux/maple_tree.h>
47  #include <linux/rw_hint.h>
48  
49  #include <asm/byteorder.h>
50  #include <uapi/linux/fs.h>
51  
52  struct backing_dev_info;
53  struct bdi_writeback;
54  struct bio;
55  struct io_comp_batch;
56  struct export_operations;
57  struct fiemap_extent_info;
58  struct hd_geometry;
59  struct iovec;
60  struct kiocb;
61  struct kobject;
62  struct pipe_inode_info;
63  struct poll_table_struct;
64  struct kstatfs;
65  struct vm_area_struct;
66  struct vfsmount;
67  struct cred;
68  struct swap_info_struct;
69  struct seq_file;
70  struct workqueue_struct;
71  struct iov_iter;
72  struct fscrypt_inode_info;
73  struct fscrypt_operations;
74  struct fsverity_info;
75  struct fsverity_operations;
76  struct fsnotify_mark_connector;
77  struct fsnotify_sb_info;
78  struct fs_context;
79  struct fs_parameter_spec;
80  struct fileattr;
81  struct iomap_ops;
82  
83  extern void __init inode_init(void);
84  extern void __init inode_init_early(void);
85  extern void __init files_init(void);
86  extern void __init files_maxfiles_init(void);
87  
88  extern unsigned long get_max_files(void);
89  extern unsigned int sysctl_nr_open;
90  
91  typedef __kernel_rwf_t rwf_t;
92  
93  struct buffer_head;
94  typedef int (get_block_t)(struct inode *inode, sector_t iblock,
95  			struct buffer_head *bh_result, int create);
96  typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
97  			ssize_t bytes, void *private);
98  
99  #define MAY_EXEC		0x00000001
100  #define MAY_WRITE		0x00000002
101  #define MAY_READ		0x00000004
102  #define MAY_APPEND		0x00000008
103  #define MAY_ACCESS		0x00000010
104  #define MAY_OPEN		0x00000020
105  #define MAY_CHDIR		0x00000040
106  /* called from RCU mode, don't block */
107  #define MAY_NOT_BLOCK		0x00000080
108  
109  /*
110   * flags in file.f_mode.  Note that FMODE_READ and FMODE_WRITE must correspond
111   * to O_WRONLY and O_RDWR via the strange trick in do_dentry_open()
112   */
113  
114  /* file is open for reading */
115  #define FMODE_READ		((__force fmode_t)(1 << 0))
116  /* file is open for writing */
117  #define FMODE_WRITE		((__force fmode_t)(1 << 1))
118  /* file is seekable */
119  #define FMODE_LSEEK		((__force fmode_t)(1 << 2))
120  /* file can be accessed using pread */
121  #define FMODE_PREAD		((__force fmode_t)(1 << 3))
122  /* file can be accessed using pwrite */
123  #define FMODE_PWRITE		((__force fmode_t)(1 << 4))
124  /* File is opened for execution with sys_execve / sys_uselib */
125  #define FMODE_EXEC		((__force fmode_t)(1 << 5))
126  /* File writes are restricted (block device specific) */
127  #define FMODE_WRITE_RESTRICTED	((__force fmode_t)(1 << 6))
128  /* File supports atomic writes */
129  #define FMODE_CAN_ATOMIC_WRITE	((__force fmode_t)(1 << 7))
130  
131  /* FMODE_* bit 8 */
132  
133  /* 32bit hashes as llseek() offset (for directories) */
134  #define FMODE_32BITHASH         ((__force fmode_t)(1 << 9))
135  /* 64bit hashes as llseek() offset (for directories) */
136  #define FMODE_64BITHASH         ((__force fmode_t)(1 << 10))
137  
138  /*
139   * Don't update ctime and mtime.
140   *
141   * Currently a special hack for the XFS open_by_handle ioctl, but we'll
142   * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon.
143   */
144  #define FMODE_NOCMTIME		((__force fmode_t)(1 << 11))
145  
146  /* Expect random access pattern */
147  #define FMODE_RANDOM		((__force fmode_t)(1 << 12))
148  
149  /* FMODE_* bit 13 */
150  
151  /* File is opened with O_PATH; almost nothing can be done with it */
152  #define FMODE_PATH		((__force fmode_t)(1 << 14))
153  
154  /* File needs atomic accesses to f_pos */
155  #define FMODE_ATOMIC_POS	((__force fmode_t)(1 << 15))
156  /* Write access to underlying fs */
157  #define FMODE_WRITER		((__force fmode_t)(1 << 16))
158  /* Has read method(s) */
159  #define FMODE_CAN_READ          ((__force fmode_t)(1 << 17))
160  /* Has write method(s) */
161  #define FMODE_CAN_WRITE         ((__force fmode_t)(1 << 18))
162  
163  #define FMODE_OPENED		((__force fmode_t)(1 << 19))
164  #define FMODE_CREATED		((__force fmode_t)(1 << 20))
165  
166  /* File is stream-like */
167  #define FMODE_STREAM		((__force fmode_t)(1 << 21))
168  
169  /* File supports DIRECT IO */
170  #define	FMODE_CAN_ODIRECT	((__force fmode_t)(1 << 22))
171  
172  #define	FMODE_NOREUSE		((__force fmode_t)(1 << 23))
173  
174  /* FMODE_* bit 24 */
175  
176  /* File is embedded in backing_file object */
177  #define FMODE_BACKING		((__force fmode_t)(1 << 25))
178  
179  /* File was opened by fanotify and shouldn't generate fanotify events */
180  #define FMODE_NONOTIFY		((__force fmode_t)(1 << 26))
181  
182  /* File is capable of returning -EAGAIN if I/O will block */
183  #define FMODE_NOWAIT		((__force fmode_t)(1 << 27))
184  
185  /* File represents mount that needs unmounting */
186  #define FMODE_NEED_UNMOUNT	((__force fmode_t)(1 << 28))
187  
188  /* File does not contribute to nr_files count */
189  #define FMODE_NOACCOUNT		((__force fmode_t)(1 << 29))
190  
191  /*
192   * Attribute flags.  These should be or-ed together to figure out what
193   * has been changed!
194   */
195  #define ATTR_MODE	(1 << 0)
196  #define ATTR_UID	(1 << 1)
197  #define ATTR_GID	(1 << 2)
198  #define ATTR_SIZE	(1 << 3)
199  #define ATTR_ATIME	(1 << 4)
200  #define ATTR_MTIME	(1 << 5)
201  #define ATTR_CTIME	(1 << 6)
202  #define ATTR_ATIME_SET	(1 << 7)
203  #define ATTR_MTIME_SET	(1 << 8)
204  #define ATTR_FORCE	(1 << 9) /* Not a change, but a change it */
205  #define ATTR_KILL_SUID	(1 << 11)
206  #define ATTR_KILL_SGID	(1 << 12)
207  #define ATTR_FILE	(1 << 13)
208  #define ATTR_KILL_PRIV	(1 << 14)
209  #define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
210  #define ATTR_TIMES_SET	(1 << 16)
211  #define ATTR_TOUCH	(1 << 17)
212  #define ATTR_DELEG	(1 << 18) /* Delegated attrs. Don't break write delegations */
213  
214  /*
215   * Whiteout is represented by a char device.  The following constants define the
216   * mode and device number to use.
217   */
218  #define WHITEOUT_MODE 0
219  #define WHITEOUT_DEV 0
220  
221  /*
222   * This is the Inode Attributes structure, used for notify_change().  It
223   * uses the above definitions as flags, to know which values have changed.
224   * Also, in this manner, a Filesystem can look at only the values it cares
225   * about.  Basically, these are the attributes that the VFS layer can
226   * request to change from the FS layer.
227   *
228   * Derek Atkins <warlord@MIT.EDU> 94-10-20
229   */
230  struct iattr {
231  	unsigned int	ia_valid;
232  	umode_t		ia_mode;
233  	/*
234  	 * The two anonymous unions wrap structures with the same member.
235  	 *
236  	 * Filesystems raising FS_ALLOW_IDMAP need to use ia_vfs{g,u}id which
237  	 * are a dedicated type requiring the filesystem to use the dedicated
238  	 * helpers. Other filesystem can continue to use ia_{g,u}id until they
239  	 * have been ported.
240  	 *
241  	 * They always contain the same value. In other words FS_ALLOW_IDMAP
242  	 * pass down the same value on idmapped mounts as they would on regular
243  	 * mounts.
244  	 */
245  	union {
246  		kuid_t		ia_uid;
247  		vfsuid_t	ia_vfsuid;
248  	};
249  	union {
250  		kgid_t		ia_gid;
251  		vfsgid_t	ia_vfsgid;
252  	};
253  	loff_t		ia_size;
254  	struct timespec64 ia_atime;
255  	struct timespec64 ia_mtime;
256  	struct timespec64 ia_ctime;
257  
258  	/*
259  	 * Not an attribute, but an auxiliary info for filesystems wanting to
260  	 * implement an ftruncate() like method.  NOTE: filesystem should
261  	 * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL).
262  	 */
263  	struct file	*ia_file;
264  };
265  
266  /*
267   * Includes for diskquotas.
268   */
269  #include <linux/quota.h>
270  
271  /*
272   * Maximum number of layers of fs stack.  Needs to be limited to
273   * prevent kernel stack overflow
274   */
275  #define FILESYSTEM_MAX_STACK_DEPTH 2
276  
277  /**
278   * enum positive_aop_returns - aop return codes with specific semantics
279   *
280   * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
281   * 			    completed, that the page is still locked, and
282   * 			    should be considered active.  The VM uses this hint
283   * 			    to return the page to the active list -- it won't
284   * 			    be a candidate for writeback again in the near
285   * 			    future.  Other callers must be careful to unlock
286   * 			    the page if they get this return.  Returned by
287   * 			    writepage();
288   *
289   * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
290   *  			unlocked it and the page might have been truncated.
291   *  			The caller should back up to acquiring a new page and
292   *  			trying again.  The aop will be taking reasonable
293   *  			precautions not to livelock.  If the caller held a page
294   *  			reference, it should drop it before retrying.  Returned
295   *  			by read_folio().
296   *
297   * address_space_operation functions return these large constants to indicate
298   * special semantics to the caller.  These are much larger than the bytes in a
299   * page to allow for functions that return the number of bytes operated on in a
300   * given page.
301   */
302  
303  enum positive_aop_returns {
304  	AOP_WRITEPAGE_ACTIVATE	= 0x80000,
305  	AOP_TRUNCATED_PAGE	= 0x80001,
306  };
307  
308  /*
309   * oh the beauties of C type declarations.
310   */
311  struct page;
312  struct address_space;
313  struct writeback_control;
314  struct readahead_control;
315  
316  /* Match RWF_* bits to IOCB bits */
317  #define IOCB_HIPRI		(__force int) RWF_HIPRI
318  #define IOCB_DSYNC		(__force int) RWF_DSYNC
319  #define IOCB_SYNC		(__force int) RWF_SYNC
320  #define IOCB_NOWAIT		(__force int) RWF_NOWAIT
321  #define IOCB_APPEND		(__force int) RWF_APPEND
322  #define IOCB_ATOMIC		(__force int) RWF_ATOMIC
323  
324  /* non-RWF related bits - start at 16 */
325  #define IOCB_EVENTFD		(1 << 16)
326  #define IOCB_DIRECT		(1 << 17)
327  #define IOCB_WRITE		(1 << 18)
328  /* iocb->ki_waitq is valid */
329  #define IOCB_WAITQ		(1 << 19)
330  #define IOCB_NOIO		(1 << 20)
331  /* can use bio alloc cache */
332  #define IOCB_ALLOC_CACHE	(1 << 21)
333  /*
334   * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
335   * iocb completion can be passed back to the owner for execution from a safe
336   * context rather than needing to be punted through a workqueue. If this
337   * flag is set, the bio completion handling may set iocb->dio_complete to a
338   * handler function and iocb->private to context information for that handler.
339   * The issuer should call the handler with that context information from task
340   * context to complete the processing of the iocb. Note that while this
341   * provides a task context for the dio_complete() callback, it should only be
342   * used on the completion side for non-IO generating completions. It's fine to
343   * call blocking functions from this callback, but they should not wait for
344   * unrelated IO (like cache flushing, new IO generation, etc).
345   */
346  #define IOCB_DIO_CALLER_COMP	(1 << 22)
347  /* kiocb is a read or write operation submitted by fs/aio.c. */
348  #define IOCB_AIO_RW		(1 << 23)
349  
350  /* for use in trace events */
351  #define TRACE_IOCB_STRINGS \
352  	{ IOCB_HIPRI,		"HIPRI" }, \
353  	{ IOCB_DSYNC,		"DSYNC" }, \
354  	{ IOCB_SYNC,		"SYNC" }, \
355  	{ IOCB_NOWAIT,		"NOWAIT" }, \
356  	{ IOCB_APPEND,		"APPEND" }, \
357  	{ IOCB_ATOMIC,		"ATOMIC"}, \
358  	{ IOCB_EVENTFD,		"EVENTFD"}, \
359  	{ IOCB_DIRECT,		"DIRECT" }, \
360  	{ IOCB_WRITE,		"WRITE" }, \
361  	{ IOCB_WAITQ,		"WAITQ" }, \
362  	{ IOCB_NOIO,		"NOIO" }, \
363  	{ IOCB_ALLOC_CACHE,	"ALLOC_CACHE" }, \
364  	{ IOCB_DIO_CALLER_COMP,	"CALLER_COMP" }
365  
366  struct kiocb {
367  	struct file		*ki_filp;
368  	loff_t			ki_pos;
369  	void (*ki_complete)(struct kiocb *iocb, long ret);
370  	void			*private;
371  	int			ki_flags;
372  	u16			ki_ioprio; /* See linux/ioprio.h */
373  	union {
374  		/*
375  		 * Only used for async buffered reads, where it denotes the
376  		 * page waitqueue associated with completing the read. Valid
377  		 * IFF IOCB_WAITQ is set.
378  		 */
379  		struct wait_page_queue	*ki_waitq;
380  		/*
381  		 * Can be used for O_DIRECT IO, where the completion handling
382  		 * is punted back to the issuer of the IO. May only be set
383  		 * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
384  		 * must then check for presence of this handler when ki_complete
385  		 * is invoked. The data passed in to this handler must be
386  		 * assigned to ->private when dio_complete is assigned.
387  		 */
388  		ssize_t (*dio_complete)(void *data);
389  	};
390  };
391  
is_sync_kiocb(struct kiocb * kiocb)392  static inline bool is_sync_kiocb(struct kiocb *kiocb)
393  {
394  	return kiocb->ki_complete == NULL;
395  }
396  
397  struct address_space_operations {
398  	int (*writepage)(struct page *page, struct writeback_control *wbc);
399  	int (*read_folio)(struct file *, struct folio *);
400  
401  	/* Write back some dirty pages from this mapping. */
402  	int (*writepages)(struct address_space *, struct writeback_control *);
403  
404  	/* Mark a folio dirty.  Return true if this dirtied it */
405  	bool (*dirty_folio)(struct address_space *, struct folio *);
406  
407  	void (*readahead)(struct readahead_control *);
408  
409  	int (*write_begin)(struct file *, struct address_space *mapping,
410  				loff_t pos, unsigned len,
411  				struct folio **foliop, void **fsdata);
412  	int (*write_end)(struct file *, struct address_space *mapping,
413  				loff_t pos, unsigned len, unsigned copied,
414  				struct folio *folio, void *fsdata);
415  
416  	/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
417  	sector_t (*bmap)(struct address_space *, sector_t);
418  	void (*invalidate_folio) (struct folio *, size_t offset, size_t len);
419  	bool (*release_folio)(struct folio *, gfp_t);
420  	void (*free_folio)(struct folio *folio);
421  	ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter);
422  	/*
423  	 * migrate the contents of a folio to the specified target. If
424  	 * migrate_mode is MIGRATE_ASYNC, it must not block.
425  	 */
426  	int (*migrate_folio)(struct address_space *, struct folio *dst,
427  			struct folio *src, enum migrate_mode);
428  	int (*launder_folio)(struct folio *);
429  	bool (*is_partially_uptodate) (struct folio *, size_t from,
430  			size_t count);
431  	void (*is_dirty_writeback) (struct folio *, bool *dirty, bool *wb);
432  	int (*error_remove_folio)(struct address_space *, struct folio *);
433  
434  	/* swapfile support */
435  	int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
436  				sector_t *span);
437  	void (*swap_deactivate)(struct file *file);
438  	int (*swap_rw)(struct kiocb *iocb, struct iov_iter *iter);
439  };
440  
441  extern const struct address_space_operations empty_aops;
442  
443  /**
444   * struct address_space - Contents of a cacheable, mappable object.
445   * @host: Owner, either the inode or the block_device.
446   * @i_pages: Cached pages.
447   * @invalidate_lock: Guards coherency between page cache contents and
448   *   file offset->disk block mappings in the filesystem during invalidates.
449   *   It is also used to block modification of page cache contents through
450   *   memory mappings.
451   * @gfp_mask: Memory allocation flags to use for allocating pages.
452   * @i_mmap_writable: Number of VM_SHARED, VM_MAYWRITE mappings.
453   * @nr_thps: Number of THPs in the pagecache (non-shmem only).
454   * @i_mmap: Tree of private and shared mappings.
455   * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
456   * @nrpages: Number of page entries, protected by the i_pages lock.
457   * @writeback_index: Writeback starts here.
458   * @a_ops: Methods.
459   * @flags: Error bits and flags (AS_*).
460   * @wb_err: The most recent error which has occurred.
461   * @i_private_lock: For use by the owner of the address_space.
462   * @i_private_list: For use by the owner of the address_space.
463   * @i_private_data: For use by the owner of the address_space.
464   */
465  struct address_space {
466  	struct inode		*host;
467  	struct xarray		i_pages;
468  	struct rw_semaphore	invalidate_lock;
469  	gfp_t			gfp_mask;
470  	atomic_t		i_mmap_writable;
471  #ifdef CONFIG_READ_ONLY_THP_FOR_FS
472  	/* number of thp, only for non-shmem files */
473  	atomic_t		nr_thps;
474  #endif
475  	struct rb_root_cached	i_mmap;
476  	unsigned long		nrpages;
477  	pgoff_t			writeback_index;
478  	const struct address_space_operations *a_ops;
479  	unsigned long		flags;
480  	errseq_t		wb_err;
481  	spinlock_t		i_private_lock;
482  	struct list_head	i_private_list;
483  	struct rw_semaphore	i_mmap_rwsem;
484  	void *			i_private_data;
485  } __attribute__((aligned(sizeof(long)))) __randomize_layout;
486  	/*
487  	 * On most architectures that alignment is already the case; but
488  	 * must be enforced here for CRIS, to let the least significant bit
489  	 * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
490  	 */
491  
492  /* XArray tags, for tagging dirty and writeback pages in the pagecache. */
493  #define PAGECACHE_TAG_DIRTY	XA_MARK_0
494  #define PAGECACHE_TAG_WRITEBACK	XA_MARK_1
495  #define PAGECACHE_TAG_TOWRITE	XA_MARK_2
496  
497  /*
498   * Returns true if any of the pages in the mapping are marked with the tag.
499   */
mapping_tagged(struct address_space * mapping,xa_mark_t tag)500  static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
501  {
502  	return xa_marked(&mapping->i_pages, tag);
503  }
504  
i_mmap_lock_write(struct address_space * mapping)505  static inline void i_mmap_lock_write(struct address_space *mapping)
506  {
507  	down_write(&mapping->i_mmap_rwsem);
508  }
509  
i_mmap_trylock_write(struct address_space * mapping)510  static inline int i_mmap_trylock_write(struct address_space *mapping)
511  {
512  	return down_write_trylock(&mapping->i_mmap_rwsem);
513  }
514  
i_mmap_unlock_write(struct address_space * mapping)515  static inline void i_mmap_unlock_write(struct address_space *mapping)
516  {
517  	up_write(&mapping->i_mmap_rwsem);
518  }
519  
i_mmap_trylock_read(struct address_space * mapping)520  static inline int i_mmap_trylock_read(struct address_space *mapping)
521  {
522  	return down_read_trylock(&mapping->i_mmap_rwsem);
523  }
524  
i_mmap_lock_read(struct address_space * mapping)525  static inline void i_mmap_lock_read(struct address_space *mapping)
526  {
527  	down_read(&mapping->i_mmap_rwsem);
528  }
529  
i_mmap_unlock_read(struct address_space * mapping)530  static inline void i_mmap_unlock_read(struct address_space *mapping)
531  {
532  	up_read(&mapping->i_mmap_rwsem);
533  }
534  
i_mmap_assert_locked(struct address_space * mapping)535  static inline void i_mmap_assert_locked(struct address_space *mapping)
536  {
537  	lockdep_assert_held(&mapping->i_mmap_rwsem);
538  }
539  
i_mmap_assert_write_locked(struct address_space * mapping)540  static inline void i_mmap_assert_write_locked(struct address_space *mapping)
541  {
542  	lockdep_assert_held_write(&mapping->i_mmap_rwsem);
543  }
544  
545  /*
546   * Might pages of this file be mapped into userspace?
547   */
mapping_mapped(struct address_space * mapping)548  static inline int mapping_mapped(struct address_space *mapping)
549  {
550  	return	!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root);
551  }
552  
553  /*
554   * Might pages of this file have been modified in userspace?
555   * Note that i_mmap_writable counts all VM_SHARED, VM_MAYWRITE vmas: do_mmap
556   * marks vma as VM_SHARED if it is shared, and the file was opened for
557   * writing i.e. vma may be mprotected writable even if now readonly.
558   *
559   * If i_mmap_writable is negative, no new writable mappings are allowed. You
560   * can only deny writable mappings, if none exists right now.
561   */
mapping_writably_mapped(struct address_space * mapping)562  static inline int mapping_writably_mapped(struct address_space *mapping)
563  {
564  	return atomic_read(&mapping->i_mmap_writable) > 0;
565  }
566  
mapping_map_writable(struct address_space * mapping)567  static inline int mapping_map_writable(struct address_space *mapping)
568  {
569  	return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
570  		0 : -EPERM;
571  }
572  
mapping_unmap_writable(struct address_space * mapping)573  static inline void mapping_unmap_writable(struct address_space *mapping)
574  {
575  	atomic_dec(&mapping->i_mmap_writable);
576  }
577  
mapping_deny_writable(struct address_space * mapping)578  static inline int mapping_deny_writable(struct address_space *mapping)
579  {
580  	return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
581  		0 : -EBUSY;
582  }
583  
mapping_allow_writable(struct address_space * mapping)584  static inline void mapping_allow_writable(struct address_space *mapping)
585  {
586  	atomic_inc(&mapping->i_mmap_writable);
587  }
588  
589  /*
590   * Use sequence counter to get consistent i_size on 32-bit processors.
591   */
592  #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
593  #include <linux/seqlock.h>
594  #define __NEED_I_SIZE_ORDERED
595  #define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
596  #else
597  #define i_size_ordered_init(inode) do { } while (0)
598  #endif
599  
600  struct posix_acl;
601  #define ACL_NOT_CACHED ((void *)(-1))
602  /*
603   * ACL_DONT_CACHE is for stacked filesystems, that rely on underlying fs to
604   * cache the ACL.  This also means that ->get_inode_acl() can be called in RCU
605   * mode with the LOOKUP_RCU flag.
606   */
607  #define ACL_DONT_CACHE ((void *)(-3))
608  
609  static inline struct posix_acl *
uncached_acl_sentinel(struct task_struct * task)610  uncached_acl_sentinel(struct task_struct *task)
611  {
612  	return (void *)task + 1;
613  }
614  
615  static inline bool
is_uncached_acl(struct posix_acl * acl)616  is_uncached_acl(struct posix_acl *acl)
617  {
618  	return (long)acl & 1;
619  }
620  
621  #define IOP_FASTPERM	0x0001
622  #define IOP_LOOKUP	0x0002
623  #define IOP_NOFOLLOW	0x0004
624  #define IOP_XATTR	0x0008
625  #define IOP_DEFAULT_READLINK	0x0010
626  
627  /*
628   * Keep mostly read-only and often accessed (especially for
629   * the RCU path lookup and 'stat' data) fields at the beginning
630   * of the 'struct inode'
631   */
632  struct inode {
633  	umode_t			i_mode;
634  	unsigned short		i_opflags;
635  	kuid_t			i_uid;
636  	kgid_t			i_gid;
637  	unsigned int		i_flags;
638  
639  #ifdef CONFIG_FS_POSIX_ACL
640  	struct posix_acl	*i_acl;
641  	struct posix_acl	*i_default_acl;
642  #endif
643  
644  	const struct inode_operations	*i_op;
645  	struct super_block	*i_sb;
646  	struct address_space	*i_mapping;
647  
648  #ifdef CONFIG_SECURITY
649  	void			*i_security;
650  #endif
651  
652  	/* Stat data, not accessed from path walking */
653  	unsigned long		i_ino;
654  	/*
655  	 * Filesystems may only read i_nlink directly.  They shall use the
656  	 * following functions for modification:
657  	 *
658  	 *    (set|clear|inc|drop)_nlink
659  	 *    inode_(inc|dec)_link_count
660  	 */
661  	union {
662  		const unsigned int i_nlink;
663  		unsigned int __i_nlink;
664  	};
665  	dev_t			i_rdev;
666  	loff_t			i_size;
667  	time64_t		i_atime_sec;
668  	time64_t		i_mtime_sec;
669  	time64_t		i_ctime_sec;
670  	u32			i_atime_nsec;
671  	u32			i_mtime_nsec;
672  	u32			i_ctime_nsec;
673  	u32			i_generation;
674  	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
675  	unsigned short          i_bytes;
676  	u8			i_blkbits;
677  	enum rw_hint		i_write_hint;
678  	blkcnt_t		i_blocks;
679  
680  #ifdef __NEED_I_SIZE_ORDERED
681  	seqcount_t		i_size_seqcount;
682  #endif
683  
684  	/* Misc */
685  	u32			i_state;
686  	/* 32-bit hole */
687  	struct rw_semaphore	i_rwsem;
688  
689  	unsigned long		dirtied_when;	/* jiffies of first dirtying */
690  	unsigned long		dirtied_time_when;
691  
692  	struct hlist_node	i_hash;
693  	struct list_head	i_io_list;	/* backing dev IO list */
694  #ifdef CONFIG_CGROUP_WRITEBACK
695  	struct bdi_writeback	*i_wb;		/* the associated cgroup wb */
696  
697  	/* foreign inode detection, see wbc_detach_inode() */
698  	int			i_wb_frn_winner;
699  	u16			i_wb_frn_avg_time;
700  	u16			i_wb_frn_history;
701  #endif
702  	struct list_head	i_lru;		/* inode LRU list */
703  	struct list_head	i_sb_list;
704  	struct list_head	i_wb_list;	/* backing dev writeback list */
705  	union {
706  		struct hlist_head	i_dentry;
707  		struct rcu_head		i_rcu;
708  	};
709  	atomic64_t		i_version;
710  	atomic64_t		i_sequence; /* see futex */
711  	atomic_t		i_count;
712  	atomic_t		i_dio_count;
713  	atomic_t		i_writecount;
714  #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
715  	atomic_t		i_readcount; /* struct files open RO */
716  #endif
717  	union {
718  		const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
719  		void (*free_inode)(struct inode *);
720  	};
721  	struct file_lock_context	*i_flctx;
722  	struct address_space	i_data;
723  	struct list_head	i_devices;
724  	union {
725  		struct pipe_inode_info	*i_pipe;
726  		struct cdev		*i_cdev;
727  		char			*i_link;
728  		unsigned		i_dir_seq;
729  	};
730  
731  
732  #ifdef CONFIG_FSNOTIFY
733  	__u32			i_fsnotify_mask; /* all events this inode cares about */
734  	/* 32-bit hole reserved for expanding i_fsnotify_mask */
735  	struct fsnotify_mark_connector __rcu	*i_fsnotify_marks;
736  #endif
737  
738  #ifdef CONFIG_FS_ENCRYPTION
739  	struct fscrypt_inode_info	*i_crypt_info;
740  #endif
741  
742  #ifdef CONFIG_FS_VERITY
743  	struct fsverity_info	*i_verity_info;
744  #endif
745  
746  	void			*i_private; /* fs or device private pointer */
747  } __randomize_layout;
748  
749  /*
750   * Get bit address from inode->i_state to use with wait_var_event()
751   * infrastructre.
752   */
753  #define inode_state_wait_address(inode, bit) ((char *)&(inode)->i_state + (bit))
754  
755  struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
756  					    struct inode *inode, u32 bit);
757  
inode_wake_up_bit(struct inode * inode,u32 bit)758  static inline void inode_wake_up_bit(struct inode *inode, u32 bit)
759  {
760  	/* Caller is responsible for correct memory barriers. */
761  	wake_up_var(inode_state_wait_address(inode, bit));
762  }
763  
764  struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode);
765  
i_blocksize(const struct inode * node)766  static inline unsigned int i_blocksize(const struct inode *node)
767  {
768  	return (1 << node->i_blkbits);
769  }
770  
inode_unhashed(struct inode * inode)771  static inline int inode_unhashed(struct inode *inode)
772  {
773  	return hlist_unhashed(&inode->i_hash);
774  }
775  
776  /*
777   * __mark_inode_dirty expects inodes to be hashed.  Since we don't
778   * want special inodes in the fileset inode space, we make them
779   * appear hashed, but do not put on any lists.  hlist_del()
780   * will work fine and require no locking.
781   */
inode_fake_hash(struct inode * inode)782  static inline void inode_fake_hash(struct inode *inode)
783  {
784  	hlist_add_fake(&inode->i_hash);
785  }
786  
787  /*
788   * inode->i_mutex nesting subclasses for the lock validator:
789   *
790   * 0: the object of the current VFS operation
791   * 1: parent
792   * 2: child/target
793   * 3: xattr
794   * 4: second non-directory
795   * 5: second parent (when locking independent directories in rename)
796   *
797   * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two
798   * non-directories at once.
799   *
800   * The locking order between these classes is
801   * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory
802   */
803  enum inode_i_mutex_lock_class
804  {
805  	I_MUTEX_NORMAL,
806  	I_MUTEX_PARENT,
807  	I_MUTEX_CHILD,
808  	I_MUTEX_XATTR,
809  	I_MUTEX_NONDIR2,
810  	I_MUTEX_PARENT2,
811  };
812  
inode_lock(struct inode * inode)813  static inline void inode_lock(struct inode *inode)
814  {
815  	down_write(&inode->i_rwsem);
816  }
817  
inode_unlock(struct inode * inode)818  static inline void inode_unlock(struct inode *inode)
819  {
820  	up_write(&inode->i_rwsem);
821  }
822  
inode_lock_shared(struct inode * inode)823  static inline void inode_lock_shared(struct inode *inode)
824  {
825  	down_read(&inode->i_rwsem);
826  }
827  
inode_unlock_shared(struct inode * inode)828  static inline void inode_unlock_shared(struct inode *inode)
829  {
830  	up_read(&inode->i_rwsem);
831  }
832  
inode_trylock(struct inode * inode)833  static inline int inode_trylock(struct inode *inode)
834  {
835  	return down_write_trylock(&inode->i_rwsem);
836  }
837  
inode_trylock_shared(struct inode * inode)838  static inline int inode_trylock_shared(struct inode *inode)
839  {
840  	return down_read_trylock(&inode->i_rwsem);
841  }
842  
inode_is_locked(struct inode * inode)843  static inline int inode_is_locked(struct inode *inode)
844  {
845  	return rwsem_is_locked(&inode->i_rwsem);
846  }
847  
inode_lock_nested(struct inode * inode,unsigned subclass)848  static inline void inode_lock_nested(struct inode *inode, unsigned subclass)
849  {
850  	down_write_nested(&inode->i_rwsem, subclass);
851  }
852  
inode_lock_shared_nested(struct inode * inode,unsigned subclass)853  static inline void inode_lock_shared_nested(struct inode *inode, unsigned subclass)
854  {
855  	down_read_nested(&inode->i_rwsem, subclass);
856  }
857  
filemap_invalidate_lock(struct address_space * mapping)858  static inline void filemap_invalidate_lock(struct address_space *mapping)
859  {
860  	down_write(&mapping->invalidate_lock);
861  }
862  
filemap_invalidate_unlock(struct address_space * mapping)863  static inline void filemap_invalidate_unlock(struct address_space *mapping)
864  {
865  	up_write(&mapping->invalidate_lock);
866  }
867  
filemap_invalidate_lock_shared(struct address_space * mapping)868  static inline void filemap_invalidate_lock_shared(struct address_space *mapping)
869  {
870  	down_read(&mapping->invalidate_lock);
871  }
872  
filemap_invalidate_trylock_shared(struct address_space * mapping)873  static inline int filemap_invalidate_trylock_shared(
874  					struct address_space *mapping)
875  {
876  	return down_read_trylock(&mapping->invalidate_lock);
877  }
878  
filemap_invalidate_unlock_shared(struct address_space * mapping)879  static inline void filemap_invalidate_unlock_shared(
880  					struct address_space *mapping)
881  {
882  	up_read(&mapping->invalidate_lock);
883  }
884  
885  void lock_two_nondirectories(struct inode *, struct inode*);
886  void unlock_two_nondirectories(struct inode *, struct inode*);
887  
888  void filemap_invalidate_lock_two(struct address_space *mapping1,
889  				 struct address_space *mapping2);
890  void filemap_invalidate_unlock_two(struct address_space *mapping1,
891  				   struct address_space *mapping2);
892  
893  
894  /*
895   * NOTE: in a 32bit arch with a preemptable kernel and
896   * an UP compile the i_size_read/write must be atomic
897   * with respect to the local cpu (unlike with preempt disabled),
898   * but they don't need to be atomic with respect to other cpus like in
899   * true SMP (so they need either to either locally disable irq around
900   * the read or for example on x86 they can be still implemented as a
901   * cmpxchg8b without the need of the lock prefix). For SMP compiles
902   * and 64bit archs it makes no difference if preempt is enabled or not.
903   */
i_size_read(const struct inode * inode)904  static inline loff_t i_size_read(const struct inode *inode)
905  {
906  #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
907  	loff_t i_size;
908  	unsigned int seq;
909  
910  	do {
911  		seq = read_seqcount_begin(&inode->i_size_seqcount);
912  		i_size = inode->i_size;
913  	} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
914  	return i_size;
915  #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
916  	loff_t i_size;
917  
918  	preempt_disable();
919  	i_size = inode->i_size;
920  	preempt_enable();
921  	return i_size;
922  #else
923  	/* Pairs with smp_store_release() in i_size_write() */
924  	return smp_load_acquire(&inode->i_size);
925  #endif
926  }
927  
928  /*
929   * NOTE: unlike i_size_read(), i_size_write() does need locking around it
930   * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
931   * can be lost, resulting in subsequent i_size_read() calls spinning forever.
932   */
i_size_write(struct inode * inode,loff_t i_size)933  static inline void i_size_write(struct inode *inode, loff_t i_size)
934  {
935  #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
936  	preempt_disable();
937  	write_seqcount_begin(&inode->i_size_seqcount);
938  	inode->i_size = i_size;
939  	write_seqcount_end(&inode->i_size_seqcount);
940  	preempt_enable();
941  #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
942  	preempt_disable();
943  	inode->i_size = i_size;
944  	preempt_enable();
945  #else
946  	/*
947  	 * Pairs with smp_load_acquire() in i_size_read() to ensure
948  	 * changes related to inode size (such as page contents) are
949  	 * visible before we see the changed inode size.
950  	 */
951  	smp_store_release(&inode->i_size, i_size);
952  #endif
953  }
954  
iminor(const struct inode * inode)955  static inline unsigned iminor(const struct inode *inode)
956  {
957  	return MINOR(inode->i_rdev);
958  }
959  
imajor(const struct inode * inode)960  static inline unsigned imajor(const struct inode *inode)
961  {
962  	return MAJOR(inode->i_rdev);
963  }
964  
965  struct fown_struct {
966  	struct file *file;	/* backpointer for security modules */
967  	rwlock_t lock;          /* protects pid, uid, euid fields */
968  	struct pid *pid;	/* pid or -pgrp where SIGIO should be sent */
969  	enum pid_type pid_type;	/* Kind of process group SIGIO should be sent to */
970  	kuid_t uid, euid;	/* uid/euid of process setting the owner */
971  	int signum;		/* posix.1b rt signal to be delivered on IO */
972  };
973  
974  /**
975   * struct file_ra_state - Track a file's readahead state.
976   * @start: Where the most recent readahead started.
977   * @size: Number of pages read in the most recent readahead.
978   * @async_size: Numer of pages that were/are not needed immediately
979   *      and so were/are genuinely "ahead".  Start next readahead when
980   *      the first of these pages is accessed.
981   * @ra_pages: Maximum size of a readahead request, copied from the bdi.
982   * @mmap_miss: How many mmap accesses missed in the page cache.
983   * @prev_pos: The last byte in the most recent read request.
984   *
985   * When this structure is passed to ->readahead(), the "most recent"
986   * readahead means the current readahead.
987   */
988  struct file_ra_state {
989  	pgoff_t start;
990  	unsigned int size;
991  	unsigned int async_size;
992  	unsigned int ra_pages;
993  	unsigned int mmap_miss;
994  	loff_t prev_pos;
995  };
996  
997  /*
998   * Check if @index falls in the readahead windows.
999   */
ra_has_index(struct file_ra_state * ra,pgoff_t index)1000  static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
1001  {
1002  	return (index >= ra->start &&
1003  		index <  ra->start + ra->size);
1004  }
1005  
1006  /**
1007   * struct file - Represents a file
1008   * @f_count: reference count
1009   * @f_lock: Protects f_ep, f_flags. Must not be taken from IRQ context.
1010   * @f_mode: FMODE_* flags often used in hotpaths
1011   * @f_op: file operations
1012   * @f_mapping: Contents of a cacheable, mappable object.
1013   * @private_data: filesystem or driver specific data
1014   * @f_inode: cached inode
1015   * @f_flags: file flags
1016   * @f_iocb_flags: iocb flags
1017   * @f_cred: stashed credentials of creator/opener
1018   * @f_path: path of the file
1019   * @f_pos_lock: lock protecting file position
1020   * @f_pipe: specific to pipes
1021   * @f_pos: file position
1022   * @f_security: LSM security context of this file
1023   * @f_owner: file owner
1024   * @f_wb_err: writeback error
1025   * @f_sb_err: per sb writeback errors
1026   * @f_ep: link of all epoll hooks for this file
1027   * @f_task_work: task work entry point
1028   * @f_llist: work queue entrypoint
1029   * @f_ra: file's readahead state
1030   * @f_freeptr: Pointer used by SLAB_TYPESAFE_BY_RCU file cache (don't touch.)
1031   */
1032  struct file {
1033  	atomic_long_t			f_count;
1034  	spinlock_t			f_lock;
1035  	fmode_t				f_mode;
1036  	const struct file_operations	*f_op;
1037  	struct address_space		*f_mapping;
1038  	void				*private_data;
1039  	struct inode			*f_inode;
1040  	unsigned int			f_flags;
1041  	unsigned int			f_iocb_flags;
1042  	const struct cred		*f_cred;
1043  	/* --- cacheline 1 boundary (64 bytes) --- */
1044  	struct path			f_path;
1045  	union {
1046  		/* regular files (with FMODE_ATOMIC_POS) and directories */
1047  		struct mutex		f_pos_lock;
1048  		/* pipes */
1049  		u64			f_pipe;
1050  	};
1051  	loff_t				f_pos;
1052  #ifdef CONFIG_SECURITY
1053  	void				*f_security;
1054  #endif
1055  	/* --- cacheline 2 boundary (128 bytes) --- */
1056  	struct fown_struct		*f_owner;
1057  	errseq_t			f_wb_err;
1058  	errseq_t			f_sb_err;
1059  #ifdef CONFIG_EPOLL
1060  	struct hlist_head		*f_ep;
1061  #endif
1062  	union {
1063  		struct callback_head	f_task_work;
1064  		struct llist_node	f_llist;
1065  		struct file_ra_state	f_ra;
1066  		freeptr_t		f_freeptr;
1067  	};
1068  	/* --- cacheline 3 boundary (192 bytes) --- */
1069  } __randomize_layout
1070    __attribute__((aligned(4)));	/* lest something weird decides that 2 is OK */
1071  
1072  struct file_handle {
1073  	__u32 handle_bytes;
1074  	int handle_type;
1075  	/* file identifier */
1076  	unsigned char f_handle[] __counted_by(handle_bytes);
1077  };
1078  
get_file(struct file * f)1079  static inline struct file *get_file(struct file *f)
1080  {
1081  	long prior = atomic_long_fetch_inc_relaxed(&f->f_count);
1082  	WARN_ONCE(!prior, "struct file::f_count incremented from zero; use-after-free condition present!\n");
1083  	return f;
1084  }
1085  
1086  struct file *get_file_rcu(struct file __rcu **f);
1087  struct file *get_file_active(struct file **f);
1088  
1089  #define file_count(x)	atomic_long_read(&(x)->f_count)
1090  
1091  #define	MAX_NON_LFS	((1UL<<31) - 1)
1092  
1093  /* Page cache limit. The filesystems should put that into their s_maxbytes
1094     limits, otherwise bad things can happen in VM. */
1095  #if BITS_PER_LONG==32
1096  #define MAX_LFS_FILESIZE	((loff_t)ULONG_MAX << PAGE_SHIFT)
1097  #elif BITS_PER_LONG==64
1098  #define MAX_LFS_FILESIZE 	((loff_t)LLONG_MAX)
1099  #endif
1100  
1101  /* legacy typedef, should eventually be removed */
1102  typedef void *fl_owner_t;
1103  
1104  struct file_lock;
1105  struct file_lease;
1106  
1107  /* The following constant reflects the upper bound of the file/locking space */
1108  #ifndef OFFSET_MAX
1109  #define OFFSET_MAX	type_max(loff_t)
1110  #define OFFT_OFFSET_MAX	type_max(off_t)
1111  #endif
1112  
1113  int file_f_owner_allocate(struct file *file);
file_f_owner(const struct file * file)1114  static inline struct fown_struct *file_f_owner(const struct file *file)
1115  {
1116  	return READ_ONCE(file->f_owner);
1117  }
1118  
1119  extern void send_sigio(struct fown_struct *fown, int fd, int band);
1120  
file_inode(const struct file * f)1121  static inline struct inode *file_inode(const struct file *f)
1122  {
1123  	return f->f_inode;
1124  }
1125  
1126  /*
1127   * file_dentry() is a relic from the days that overlayfs was using files with a
1128   * "fake" path, meaning, f_path on overlayfs and f_inode on underlying fs.
1129   * In those days, file_dentry() was needed to get the underlying fs dentry that
1130   * matches f_inode.
1131   * Files with "fake" path should not exist nowadays, so use an assertion to make
1132   * sure that file_dentry() was not papering over filesystem bugs.
1133   */
file_dentry(const struct file * file)1134  static inline struct dentry *file_dentry(const struct file *file)
1135  {
1136  	struct dentry *dentry = file->f_path.dentry;
1137  
1138  	WARN_ON_ONCE(d_inode(dentry) != file_inode(file));
1139  	return dentry;
1140  }
1141  
1142  struct fasync_struct {
1143  	rwlock_t		fa_lock;
1144  	int			magic;
1145  	int			fa_fd;
1146  	struct fasync_struct	*fa_next; /* singly linked list */
1147  	struct file		*fa_file;
1148  	struct rcu_head		fa_rcu;
1149  };
1150  
1151  #define FASYNC_MAGIC 0x4601
1152  
1153  /* SMP safe fasync helpers: */
1154  extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
1155  extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
1156  extern int fasync_remove_entry(struct file *, struct fasync_struct **);
1157  extern struct fasync_struct *fasync_alloc(void);
1158  extern void fasync_free(struct fasync_struct *);
1159  
1160  /* can be called from interrupts */
1161  extern void kill_fasync(struct fasync_struct **, int, int);
1162  
1163  extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
1164  extern int f_setown(struct file *filp, int who, int force);
1165  extern void f_delown(struct file *filp);
1166  extern pid_t f_getown(struct file *filp);
1167  extern int send_sigurg(struct file *file);
1168  
1169  /*
1170   * sb->s_flags.  Note that these mirror the equivalent MS_* flags where
1171   * represented in both.
1172   */
1173  #define SB_RDONLY       BIT(0)	/* Mount read-only */
1174  #define SB_NOSUID       BIT(1)	/* Ignore suid and sgid bits */
1175  #define SB_NODEV        BIT(2)	/* Disallow access to device special files */
1176  #define SB_NOEXEC       BIT(3)	/* Disallow program execution */
1177  #define SB_SYNCHRONOUS  BIT(4)	/* Writes are synced at once */
1178  #define SB_MANDLOCK     BIT(6)	/* Allow mandatory locks on an FS */
1179  #define SB_DIRSYNC      BIT(7)	/* Directory modifications are synchronous */
1180  #define SB_NOATIME      BIT(10)	/* Do not update access times. */
1181  #define SB_NODIRATIME   BIT(11)	/* Do not update directory access times */
1182  #define SB_SILENT       BIT(15)
1183  #define SB_POSIXACL     BIT(16)	/* Supports POSIX ACLs */
1184  #define SB_INLINECRYPT  BIT(17)	/* Use blk-crypto for encrypted files */
1185  #define SB_KERNMOUNT    BIT(22)	/* this is a kern_mount call */
1186  #define SB_I_VERSION    BIT(23)	/* Update inode I_version field */
1187  #define SB_LAZYTIME     BIT(25)	/* Update the on-disk [acm]times lazily */
1188  
1189  /* These sb flags are internal to the kernel */
1190  #define SB_DEAD         BIT(21)
1191  #define SB_DYING        BIT(24)
1192  #define SB_SUBMOUNT     BIT(26)
1193  #define SB_FORCE        BIT(27)
1194  #define SB_NOSEC        BIT(28)
1195  #define SB_BORN         BIT(29)
1196  #define SB_ACTIVE       BIT(30)
1197  #define SB_NOUSER       BIT(31)
1198  
1199  /* These flags relate to encoding and casefolding */
1200  #define SB_ENC_STRICT_MODE_FL	(1 << 0)
1201  
1202  #define sb_has_strict_encoding(sb) \
1203  	(sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
1204  
1205  /*
1206   *	Umount options
1207   */
1208  
1209  #define MNT_FORCE	0x00000001	/* Attempt to forcibily umount */
1210  #define MNT_DETACH	0x00000002	/* Just detach from the tree */
1211  #define MNT_EXPIRE	0x00000004	/* Mark for expiry */
1212  #define UMOUNT_NOFOLLOW	0x00000008	/* Don't follow symlink on umount */
1213  #define UMOUNT_UNUSED	0x80000000	/* Flag guaranteed to be unused */
1214  
1215  /* sb->s_iflags */
1216  #define SB_I_CGROUPWB	0x00000001	/* cgroup-aware writeback enabled */
1217  #define SB_I_NOEXEC	0x00000002	/* Ignore executables on this fs */
1218  #define SB_I_NODEV	0x00000004	/* Ignore devices on this fs */
1219  #define SB_I_STABLE_WRITES 0x00000008	/* don't modify blks until WB is done */
1220  
1221  /* sb->s_iflags to limit user namespace mounts */
1222  #define SB_I_USERNS_VISIBLE		0x00000010 /* fstype already mounted */
1223  #define SB_I_IMA_UNVERIFIABLE_SIGNATURE	0x00000020
1224  #define SB_I_UNTRUSTED_MOUNTER		0x00000040
1225  #define SB_I_EVM_HMAC_UNSUPPORTED	0x00000080
1226  
1227  #define SB_I_SKIP_SYNC	0x00000100	/* Skip superblock at global sync */
1228  #define SB_I_PERSB_BDI	0x00000200	/* has a per-sb bdi */
1229  #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */
1230  #define SB_I_RETIRED	0x00000800	/* superblock shouldn't be reused */
1231  #define SB_I_NOUMASK	0x00001000	/* VFS does not apply umask */
1232  #define SB_I_NOIDMAP	0x00002000	/* No idmapped mounts on this superblock */
1233  
1234  /* Possible states of 'frozen' field */
1235  enum {
1236  	SB_UNFROZEN = 0,		/* FS is unfrozen */
1237  	SB_FREEZE_WRITE	= 1,		/* Writes, dir ops, ioctls frozen */
1238  	SB_FREEZE_PAGEFAULT = 2,	/* Page faults stopped as well */
1239  	SB_FREEZE_FS = 3,		/* For internal FS use (e.g. to stop
1240  					 * internal threads if needed) */
1241  	SB_FREEZE_COMPLETE = 4,		/* ->freeze_fs finished successfully */
1242  };
1243  
1244  #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
1245  
1246  struct sb_writers {
1247  	unsigned short			frozen;		/* Is sb frozen? */
1248  	int				freeze_kcount;	/* How many kernel freeze requests? */
1249  	int				freeze_ucount;	/* How many userspace freeze requests? */
1250  	struct percpu_rw_semaphore	rw_sem[SB_FREEZE_LEVELS];
1251  };
1252  
1253  struct super_block {
1254  	struct list_head	s_list;		/* Keep this first */
1255  	dev_t			s_dev;		/* search index; _not_ kdev_t */
1256  	unsigned char		s_blocksize_bits;
1257  	unsigned long		s_blocksize;
1258  	loff_t			s_maxbytes;	/* Max file size */
1259  	struct file_system_type	*s_type;
1260  	const struct super_operations	*s_op;
1261  	const struct dquot_operations	*dq_op;
1262  	const struct quotactl_ops	*s_qcop;
1263  	const struct export_operations *s_export_op;
1264  	unsigned long		s_flags;
1265  	unsigned long		s_iflags;	/* internal SB_I_* flags */
1266  	unsigned long		s_magic;
1267  	struct dentry		*s_root;
1268  	struct rw_semaphore	s_umount;
1269  	int			s_count;
1270  	atomic_t		s_active;
1271  #ifdef CONFIG_SECURITY
1272  	void                    *s_security;
1273  #endif
1274  	const struct xattr_handler * const *s_xattr;
1275  #ifdef CONFIG_FS_ENCRYPTION
1276  	const struct fscrypt_operations	*s_cop;
1277  	struct fscrypt_keyring	*s_master_keys; /* master crypto keys in use */
1278  #endif
1279  #ifdef CONFIG_FS_VERITY
1280  	const struct fsverity_operations *s_vop;
1281  #endif
1282  #if IS_ENABLED(CONFIG_UNICODE)
1283  	struct unicode_map *s_encoding;
1284  	__u16 s_encoding_flags;
1285  #endif
1286  	struct hlist_bl_head	s_roots;	/* alternate root dentries for NFS */
1287  	struct list_head	s_mounts;	/* list of mounts; _not_ for fs use */
1288  	struct block_device	*s_bdev;	/* can go away once we use an accessor for @s_bdev_file */
1289  	struct file		*s_bdev_file;
1290  	struct backing_dev_info *s_bdi;
1291  	struct mtd_info		*s_mtd;
1292  	struct hlist_node	s_instances;
1293  	unsigned int		s_quota_types;	/* Bitmask of supported quota types */
1294  	struct quota_info	s_dquot;	/* Diskquota specific options */
1295  
1296  	struct sb_writers	s_writers;
1297  
1298  	/*
1299  	 * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
1300  	 * s_fsnotify_info together for cache efficiency. They are frequently
1301  	 * accessed and rarely modified.
1302  	 */
1303  	void			*s_fs_info;	/* Filesystem private info */
1304  
1305  	/* Granularity of c/m/atime in ns (cannot be worse than a second) */
1306  	u32			s_time_gran;
1307  	/* Time limits for c/m/atime in seconds */
1308  	time64_t		   s_time_min;
1309  	time64_t		   s_time_max;
1310  #ifdef CONFIG_FSNOTIFY
1311  	u32			s_fsnotify_mask;
1312  	struct fsnotify_sb_info	*s_fsnotify_info;
1313  #endif
1314  
1315  	/*
1316  	 * q: why are s_id and s_sysfs_name not the same? both are human
1317  	 * readable strings that identify the filesystem
1318  	 * a: s_id is allowed to change at runtime; it's used in log messages,
1319  	 * and we want to when a device starts out as single device (s_id is dev
1320  	 * name) but then a device is hot added and we have to switch to
1321  	 * identifying it by UUID
1322  	 * but s_sysfs_name is a handle for programmatic access, and can't
1323  	 * change at runtime
1324  	 */
1325  	char			s_id[32];	/* Informational name */
1326  	uuid_t			s_uuid;		/* UUID */
1327  	u8			s_uuid_len;	/* Default 16, possibly smaller for weird filesystems */
1328  
1329  	/* if set, fs shows up under sysfs at /sys/fs/$FSTYP/s_sysfs_name */
1330  	char			s_sysfs_name[UUID_STRING_LEN + 1];
1331  
1332  	unsigned int		s_max_links;
1333  
1334  	/*
1335  	 * The next field is for VFS *only*. No filesystems have any business
1336  	 * even looking at it. You had been warned.
1337  	 */
1338  	struct mutex s_vfs_rename_mutex;	/* Kludge */
1339  
1340  	/*
1341  	 * Filesystem subtype.  If non-empty the filesystem type field
1342  	 * in /proc/mounts will be "type.subtype"
1343  	 */
1344  	const char *s_subtype;
1345  
1346  	const struct dentry_operations *s_d_op; /* default d_op for dentries */
1347  
1348  	struct shrinker *s_shrink;	/* per-sb shrinker handle */
1349  
1350  	/* Number of inodes with nlink == 0 but still referenced */
1351  	atomic_long_t s_remove_count;
1352  
1353  	/* Read-only state of the superblock is being changed */
1354  	int s_readonly_remount;
1355  
1356  	/* per-sb errseq_t for reporting writeback errors via syncfs */
1357  	errseq_t s_wb_err;
1358  
1359  	/* AIO completions deferred from interrupt context */
1360  	struct workqueue_struct *s_dio_done_wq;
1361  	struct hlist_head s_pins;
1362  
1363  	/*
1364  	 * Owning user namespace and default context in which to
1365  	 * interpret filesystem uids, gids, quotas, device nodes,
1366  	 * xattrs and security labels.
1367  	 */
1368  	struct user_namespace *s_user_ns;
1369  
1370  	/*
1371  	 * The list_lru structure is essentially just a pointer to a table
1372  	 * of per-node lru lists, each of which has its own spinlock.
1373  	 * There is no need to put them into separate cachelines.
1374  	 */
1375  	struct list_lru		s_dentry_lru;
1376  	struct list_lru		s_inode_lru;
1377  	struct rcu_head		rcu;
1378  	struct work_struct	destroy_work;
1379  
1380  	struct mutex		s_sync_lock;	/* sync serialisation lock */
1381  
1382  	/*
1383  	 * Indicates how deep in a filesystem stack this SB is
1384  	 */
1385  	int s_stack_depth;
1386  
1387  	/* s_inode_list_lock protects s_inodes */
1388  	spinlock_t		s_inode_list_lock ____cacheline_aligned_in_smp;
1389  	struct list_head	s_inodes;	/* all inodes */
1390  
1391  	spinlock_t		s_inode_wblist_lock;
1392  	struct list_head	s_inodes_wb;	/* writeback inodes */
1393  } __randomize_layout;
1394  
i_user_ns(const struct inode * inode)1395  static inline struct user_namespace *i_user_ns(const struct inode *inode)
1396  {
1397  	return inode->i_sb->s_user_ns;
1398  }
1399  
1400  /* Helper functions so that in most cases filesystems will
1401   * not need to deal directly with kuid_t and kgid_t and can
1402   * instead deal with the raw numeric values that are stored
1403   * in the filesystem.
1404   */
i_uid_read(const struct inode * inode)1405  static inline uid_t i_uid_read(const struct inode *inode)
1406  {
1407  	return from_kuid(i_user_ns(inode), inode->i_uid);
1408  }
1409  
i_gid_read(const struct inode * inode)1410  static inline gid_t i_gid_read(const struct inode *inode)
1411  {
1412  	return from_kgid(i_user_ns(inode), inode->i_gid);
1413  }
1414  
i_uid_write(struct inode * inode,uid_t uid)1415  static inline void i_uid_write(struct inode *inode, uid_t uid)
1416  {
1417  	inode->i_uid = make_kuid(i_user_ns(inode), uid);
1418  }
1419  
i_gid_write(struct inode * inode,gid_t gid)1420  static inline void i_gid_write(struct inode *inode, gid_t gid)
1421  {
1422  	inode->i_gid = make_kgid(i_user_ns(inode), gid);
1423  }
1424  
1425  /**
1426   * i_uid_into_vfsuid - map an inode's i_uid down according to an idmapping
1427   * @idmap: idmap of the mount the inode was found from
1428   * @inode: inode to map
1429   *
1430   * Return: whe inode's i_uid mapped down according to @idmap.
1431   * If the inode's i_uid has no mapping INVALID_VFSUID is returned.
1432   */
i_uid_into_vfsuid(struct mnt_idmap * idmap,const struct inode * inode)1433  static inline vfsuid_t i_uid_into_vfsuid(struct mnt_idmap *idmap,
1434  					 const struct inode *inode)
1435  {
1436  	return make_vfsuid(idmap, i_user_ns(inode), inode->i_uid);
1437  }
1438  
1439  /**
1440   * i_uid_needs_update - check whether inode's i_uid needs to be updated
1441   * @idmap: idmap of the mount the inode was found from
1442   * @attr: the new attributes of @inode
1443   * @inode: the inode to update
1444   *
1445   * Check whether the $inode's i_uid field needs to be updated taking idmapped
1446   * mounts into account if the filesystem supports it.
1447   *
1448   * Return: true if @inode's i_uid field needs to be updated, false if not.
1449   */
i_uid_needs_update(struct mnt_idmap * idmap,const struct iattr * attr,const struct inode * inode)1450  static inline bool i_uid_needs_update(struct mnt_idmap *idmap,
1451  				      const struct iattr *attr,
1452  				      const struct inode *inode)
1453  {
1454  	return ((attr->ia_valid & ATTR_UID) &&
1455  		!vfsuid_eq(attr->ia_vfsuid,
1456  			   i_uid_into_vfsuid(idmap, inode)));
1457  }
1458  
1459  /**
1460   * i_uid_update - update @inode's i_uid field
1461   * @idmap: idmap of the mount the inode was found from
1462   * @attr: the new attributes of @inode
1463   * @inode: the inode to update
1464   *
1465   * Safely update @inode's i_uid field translating the vfsuid of any idmapped
1466   * mount into the filesystem kuid.
1467   */
i_uid_update(struct mnt_idmap * idmap,const struct iattr * attr,struct inode * inode)1468  static inline void i_uid_update(struct mnt_idmap *idmap,
1469  				const struct iattr *attr,
1470  				struct inode *inode)
1471  {
1472  	if (attr->ia_valid & ATTR_UID)
1473  		inode->i_uid = from_vfsuid(idmap, i_user_ns(inode),
1474  					   attr->ia_vfsuid);
1475  }
1476  
1477  /**
1478   * i_gid_into_vfsgid - map an inode's i_gid down according to an idmapping
1479   * @idmap: idmap of the mount the inode was found from
1480   * @inode: inode to map
1481   *
1482   * Return: the inode's i_gid mapped down according to @idmap.
1483   * If the inode's i_gid has no mapping INVALID_VFSGID is returned.
1484   */
i_gid_into_vfsgid(struct mnt_idmap * idmap,const struct inode * inode)1485  static inline vfsgid_t i_gid_into_vfsgid(struct mnt_idmap *idmap,
1486  					 const struct inode *inode)
1487  {
1488  	return make_vfsgid(idmap, i_user_ns(inode), inode->i_gid);
1489  }
1490  
1491  /**
1492   * i_gid_needs_update - check whether inode's i_gid needs to be updated
1493   * @idmap: idmap of the mount the inode was found from
1494   * @attr: the new attributes of @inode
1495   * @inode: the inode to update
1496   *
1497   * Check whether the $inode's i_gid field needs to be updated taking idmapped
1498   * mounts into account if the filesystem supports it.
1499   *
1500   * Return: true if @inode's i_gid field needs to be updated, false if not.
1501   */
i_gid_needs_update(struct mnt_idmap * idmap,const struct iattr * attr,const struct inode * inode)1502  static inline bool i_gid_needs_update(struct mnt_idmap *idmap,
1503  				      const struct iattr *attr,
1504  				      const struct inode *inode)
1505  {
1506  	return ((attr->ia_valid & ATTR_GID) &&
1507  		!vfsgid_eq(attr->ia_vfsgid,
1508  			   i_gid_into_vfsgid(idmap, inode)));
1509  }
1510  
1511  /**
1512   * i_gid_update - update @inode's i_gid field
1513   * @idmap: idmap of the mount the inode was found from
1514   * @attr: the new attributes of @inode
1515   * @inode: the inode to update
1516   *
1517   * Safely update @inode's i_gid field translating the vfsgid of any idmapped
1518   * mount into the filesystem kgid.
1519   */
i_gid_update(struct mnt_idmap * idmap,const struct iattr * attr,struct inode * inode)1520  static inline void i_gid_update(struct mnt_idmap *idmap,
1521  				const struct iattr *attr,
1522  				struct inode *inode)
1523  {
1524  	if (attr->ia_valid & ATTR_GID)
1525  		inode->i_gid = from_vfsgid(idmap, i_user_ns(inode),
1526  					   attr->ia_vfsgid);
1527  }
1528  
1529  /**
1530   * inode_fsuid_set - initialize inode's i_uid field with callers fsuid
1531   * @inode: inode to initialize
1532   * @idmap: idmap of the mount the inode was found from
1533   *
1534   * Initialize the i_uid field of @inode. If the inode was found/created via
1535   * an idmapped mount map the caller's fsuid according to @idmap.
1536   */
inode_fsuid_set(struct inode * inode,struct mnt_idmap * idmap)1537  static inline void inode_fsuid_set(struct inode *inode,
1538  				   struct mnt_idmap *idmap)
1539  {
1540  	inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
1541  }
1542  
1543  /**
1544   * inode_fsgid_set - initialize inode's i_gid field with callers fsgid
1545   * @inode: inode to initialize
1546   * @idmap: idmap of the mount the inode was found from
1547   *
1548   * Initialize the i_gid field of @inode. If the inode was found/created via
1549   * an idmapped mount map the caller's fsgid according to @idmap.
1550   */
inode_fsgid_set(struct inode * inode,struct mnt_idmap * idmap)1551  static inline void inode_fsgid_set(struct inode *inode,
1552  				   struct mnt_idmap *idmap)
1553  {
1554  	inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode));
1555  }
1556  
1557  /**
1558   * fsuidgid_has_mapping() - check whether caller's fsuid/fsgid is mapped
1559   * @sb: the superblock we want a mapping in
1560   * @idmap: idmap of the relevant mount
1561   *
1562   * Check whether the caller's fsuid and fsgid have a valid mapping in the
1563   * s_user_ns of the superblock @sb. If the caller is on an idmapped mount map
1564   * the caller's fsuid and fsgid according to the @idmap first.
1565   *
1566   * Return: true if fsuid and fsgid is mapped, false if not.
1567   */
fsuidgid_has_mapping(struct super_block * sb,struct mnt_idmap * idmap)1568  static inline bool fsuidgid_has_mapping(struct super_block *sb,
1569  					struct mnt_idmap *idmap)
1570  {
1571  	struct user_namespace *fs_userns = sb->s_user_ns;
1572  	kuid_t kuid;
1573  	kgid_t kgid;
1574  
1575  	kuid = mapped_fsuid(idmap, fs_userns);
1576  	if (!uid_valid(kuid))
1577  		return false;
1578  	kgid = mapped_fsgid(idmap, fs_userns);
1579  	if (!gid_valid(kgid))
1580  		return false;
1581  	return kuid_has_mapping(fs_userns, kuid) &&
1582  	       kgid_has_mapping(fs_userns, kgid);
1583  }
1584  
1585  struct timespec64 current_time(struct inode *inode);
1586  struct timespec64 inode_set_ctime_current(struct inode *inode);
1587  
inode_get_atime_sec(const struct inode * inode)1588  static inline time64_t inode_get_atime_sec(const struct inode *inode)
1589  {
1590  	return inode->i_atime_sec;
1591  }
1592  
inode_get_atime_nsec(const struct inode * inode)1593  static inline long inode_get_atime_nsec(const struct inode *inode)
1594  {
1595  	return inode->i_atime_nsec;
1596  }
1597  
inode_get_atime(const struct inode * inode)1598  static inline struct timespec64 inode_get_atime(const struct inode *inode)
1599  {
1600  	struct timespec64 ts = { .tv_sec  = inode_get_atime_sec(inode),
1601  				 .tv_nsec = inode_get_atime_nsec(inode) };
1602  
1603  	return ts;
1604  }
1605  
inode_set_atime_to_ts(struct inode * inode,struct timespec64 ts)1606  static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode,
1607  						      struct timespec64 ts)
1608  {
1609  	inode->i_atime_sec = ts.tv_sec;
1610  	inode->i_atime_nsec = ts.tv_nsec;
1611  	return ts;
1612  }
1613  
inode_set_atime(struct inode * inode,time64_t sec,long nsec)1614  static inline struct timespec64 inode_set_atime(struct inode *inode,
1615  						time64_t sec, long nsec)
1616  {
1617  	struct timespec64 ts = { .tv_sec  = sec,
1618  				 .tv_nsec = nsec };
1619  
1620  	return inode_set_atime_to_ts(inode, ts);
1621  }
1622  
inode_get_mtime_sec(const struct inode * inode)1623  static inline time64_t inode_get_mtime_sec(const struct inode *inode)
1624  {
1625  	return inode->i_mtime_sec;
1626  }
1627  
inode_get_mtime_nsec(const struct inode * inode)1628  static inline long inode_get_mtime_nsec(const struct inode *inode)
1629  {
1630  	return inode->i_mtime_nsec;
1631  }
1632  
inode_get_mtime(const struct inode * inode)1633  static inline struct timespec64 inode_get_mtime(const struct inode *inode)
1634  {
1635  	struct timespec64 ts = { .tv_sec  = inode_get_mtime_sec(inode),
1636  				 .tv_nsec = inode_get_mtime_nsec(inode) };
1637  	return ts;
1638  }
1639  
inode_set_mtime_to_ts(struct inode * inode,struct timespec64 ts)1640  static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode,
1641  						      struct timespec64 ts)
1642  {
1643  	inode->i_mtime_sec = ts.tv_sec;
1644  	inode->i_mtime_nsec = ts.tv_nsec;
1645  	return ts;
1646  }
1647  
inode_set_mtime(struct inode * inode,time64_t sec,long nsec)1648  static inline struct timespec64 inode_set_mtime(struct inode *inode,
1649  						time64_t sec, long nsec)
1650  {
1651  	struct timespec64 ts = { .tv_sec  = sec,
1652  				 .tv_nsec = nsec };
1653  	return inode_set_mtime_to_ts(inode, ts);
1654  }
1655  
inode_get_ctime_sec(const struct inode * inode)1656  static inline time64_t inode_get_ctime_sec(const struct inode *inode)
1657  {
1658  	return inode->i_ctime_sec;
1659  }
1660  
inode_get_ctime_nsec(const struct inode * inode)1661  static inline long inode_get_ctime_nsec(const struct inode *inode)
1662  {
1663  	return inode->i_ctime_nsec;
1664  }
1665  
inode_get_ctime(const struct inode * inode)1666  static inline struct timespec64 inode_get_ctime(const struct inode *inode)
1667  {
1668  	struct timespec64 ts = { .tv_sec  = inode_get_ctime_sec(inode),
1669  				 .tv_nsec = inode_get_ctime_nsec(inode) };
1670  
1671  	return ts;
1672  }
1673  
inode_set_ctime_to_ts(struct inode * inode,struct timespec64 ts)1674  static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
1675  						      struct timespec64 ts)
1676  {
1677  	inode->i_ctime_sec = ts.tv_sec;
1678  	inode->i_ctime_nsec = ts.tv_nsec;
1679  	return ts;
1680  }
1681  
1682  /**
1683   * inode_set_ctime - set the ctime in the inode
1684   * @inode: inode in which to set the ctime
1685   * @sec: tv_sec value to set
1686   * @nsec: tv_nsec value to set
1687   *
1688   * Set the ctime in @inode to { @sec, @nsec }
1689   */
inode_set_ctime(struct inode * inode,time64_t sec,long nsec)1690  static inline struct timespec64 inode_set_ctime(struct inode *inode,
1691  						time64_t sec, long nsec)
1692  {
1693  	struct timespec64 ts = { .tv_sec  = sec,
1694  				 .tv_nsec = nsec };
1695  
1696  	return inode_set_ctime_to_ts(inode, ts);
1697  }
1698  
1699  struct timespec64 simple_inode_init_ts(struct inode *inode);
1700  
1701  /*
1702   * Snapshotting support.
1703   */
1704  
1705  /*
1706   * These are internal functions, please use sb_start_{write,pagefault,intwrite}
1707   * instead.
1708   */
__sb_end_write(struct super_block * sb,int level)1709  static inline void __sb_end_write(struct super_block *sb, int level)
1710  {
1711  	percpu_up_read(sb->s_writers.rw_sem + level-1);
1712  }
1713  
__sb_start_write(struct super_block * sb,int level)1714  static inline void __sb_start_write(struct super_block *sb, int level)
1715  {
1716  	percpu_down_read(sb->s_writers.rw_sem + level - 1);
1717  }
1718  
__sb_start_write_trylock(struct super_block * sb,int level)1719  static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
1720  {
1721  	return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
1722  }
1723  
1724  #define __sb_writers_acquired(sb, lev)	\
1725  	percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
1726  #define __sb_writers_release(sb, lev)	\
1727  	percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], _THIS_IP_)
1728  
1729  /**
1730   * __sb_write_started - check if sb freeze level is held
1731   * @sb: the super we write to
1732   * @level: the freeze level
1733   *
1734   * * > 0 - sb freeze level is held
1735   * *   0 - sb freeze level is not held
1736   * * < 0 - !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN
1737   */
__sb_write_started(const struct super_block * sb,int level)1738  static inline int __sb_write_started(const struct super_block *sb, int level)
1739  {
1740  	return lockdep_is_held_type(sb->s_writers.rw_sem + level - 1, 1);
1741  }
1742  
1743  /**
1744   * sb_write_started - check if SB_FREEZE_WRITE is held
1745   * @sb: the super we write to
1746   *
1747   * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
1748   */
sb_write_started(const struct super_block * sb)1749  static inline bool sb_write_started(const struct super_block *sb)
1750  {
1751  	return __sb_write_started(sb, SB_FREEZE_WRITE);
1752  }
1753  
1754  /**
1755   * sb_write_not_started - check if SB_FREEZE_WRITE is not held
1756   * @sb: the super we write to
1757   *
1758   * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
1759   */
sb_write_not_started(const struct super_block * sb)1760  static inline bool sb_write_not_started(const struct super_block *sb)
1761  {
1762  	return __sb_write_started(sb, SB_FREEZE_WRITE) <= 0;
1763  }
1764  
1765  /**
1766   * file_write_started - check if SB_FREEZE_WRITE is held
1767   * @file: the file we write to
1768   *
1769   * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
1770   * May be false positive with !S_ISREG, because file_start_write() has
1771   * no effect on !S_ISREG.
1772   */
file_write_started(const struct file * file)1773  static inline bool file_write_started(const struct file *file)
1774  {
1775  	if (!S_ISREG(file_inode(file)->i_mode))
1776  		return true;
1777  	return sb_write_started(file_inode(file)->i_sb);
1778  }
1779  
1780  /**
1781   * file_write_not_started - check if SB_FREEZE_WRITE is not held
1782   * @file: the file we write to
1783   *
1784   * May be false positive with !CONFIG_LOCKDEP/LOCK_STATE_UNKNOWN.
1785   * May be false positive with !S_ISREG, because file_start_write() has
1786   * no effect on !S_ISREG.
1787   */
file_write_not_started(const struct file * file)1788  static inline bool file_write_not_started(const struct file *file)
1789  {
1790  	if (!S_ISREG(file_inode(file)->i_mode))
1791  		return true;
1792  	return sb_write_not_started(file_inode(file)->i_sb);
1793  }
1794  
1795  /**
1796   * sb_end_write - drop write access to a superblock
1797   * @sb: the super we wrote to
1798   *
1799   * Decrement number of writers to the filesystem. Wake up possible waiters
1800   * wanting to freeze the filesystem.
1801   */
sb_end_write(struct super_block * sb)1802  static inline void sb_end_write(struct super_block *sb)
1803  {
1804  	__sb_end_write(sb, SB_FREEZE_WRITE);
1805  }
1806  
1807  /**
1808   * sb_end_pagefault - drop write access to a superblock from a page fault
1809   * @sb: the super we wrote to
1810   *
1811   * Decrement number of processes handling write page fault to the filesystem.
1812   * Wake up possible waiters wanting to freeze the filesystem.
1813   */
sb_end_pagefault(struct super_block * sb)1814  static inline void sb_end_pagefault(struct super_block *sb)
1815  {
1816  	__sb_end_write(sb, SB_FREEZE_PAGEFAULT);
1817  }
1818  
1819  /**
1820   * sb_end_intwrite - drop write access to a superblock for internal fs purposes
1821   * @sb: the super we wrote to
1822   *
1823   * Decrement fs-internal number of writers to the filesystem.  Wake up possible
1824   * waiters wanting to freeze the filesystem.
1825   */
sb_end_intwrite(struct super_block * sb)1826  static inline void sb_end_intwrite(struct super_block *sb)
1827  {
1828  	__sb_end_write(sb, SB_FREEZE_FS);
1829  }
1830  
1831  /**
1832   * sb_start_write - get write access to a superblock
1833   * @sb: the super we write to
1834   *
1835   * When a process wants to write data or metadata to a file system (i.e. dirty
1836   * a page or an inode), it should embed the operation in a sb_start_write() -
1837   * sb_end_write() pair to get exclusion against file system freezing. This
1838   * function increments number of writers preventing freezing. If the file
1839   * system is already frozen, the function waits until the file system is
1840   * thawed.
1841   *
1842   * Since freeze protection behaves as a lock, users have to preserve
1843   * ordering of freeze protection and other filesystem locks. Generally,
1844   * freeze protection should be the outermost lock. In particular, we have:
1845   *
1846   * sb_start_write
1847   *   -> i_mutex			(write path, truncate, directory ops, ...)
1848   *   -> s_umount		(freeze_super, thaw_super)
1849   */
sb_start_write(struct super_block * sb)1850  static inline void sb_start_write(struct super_block *sb)
1851  {
1852  	__sb_start_write(sb, SB_FREEZE_WRITE);
1853  }
1854  
sb_start_write_trylock(struct super_block * sb)1855  static inline bool sb_start_write_trylock(struct super_block *sb)
1856  {
1857  	return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
1858  }
1859  
1860  /**
1861   * sb_start_pagefault - get write access to a superblock from a page fault
1862   * @sb: the super we write to
1863   *
1864   * When a process starts handling write page fault, it should embed the
1865   * operation into sb_start_pagefault() - sb_end_pagefault() pair to get
1866   * exclusion against file system freezing. This is needed since the page fault
1867   * is going to dirty a page. This function increments number of running page
1868   * faults preventing freezing. If the file system is already frozen, the
1869   * function waits until the file system is thawed.
1870   *
1871   * Since page fault freeze protection behaves as a lock, users have to preserve
1872   * ordering of freeze protection and other filesystem locks. It is advised to
1873   * put sb_start_pagefault() close to mmap_lock in lock ordering. Page fault
1874   * handling code implies lock dependency:
1875   *
1876   * mmap_lock
1877   *   -> sb_start_pagefault
1878   */
sb_start_pagefault(struct super_block * sb)1879  static inline void sb_start_pagefault(struct super_block *sb)
1880  {
1881  	__sb_start_write(sb, SB_FREEZE_PAGEFAULT);
1882  }
1883  
1884  /**
1885   * sb_start_intwrite - get write access to a superblock for internal fs purposes
1886   * @sb: the super we write to
1887   *
1888   * This is the third level of protection against filesystem freezing. It is
1889   * free for use by a filesystem. The only requirement is that it must rank
1890   * below sb_start_pagefault.
1891   *
1892   * For example filesystem can call sb_start_intwrite() when starting a
1893   * transaction which somewhat eases handling of freezing for internal sources
1894   * of filesystem changes (internal fs threads, discarding preallocation on file
1895   * close, etc.).
1896   */
sb_start_intwrite(struct super_block * sb)1897  static inline void sb_start_intwrite(struct super_block *sb)
1898  {
1899  	__sb_start_write(sb, SB_FREEZE_FS);
1900  }
1901  
sb_start_intwrite_trylock(struct super_block * sb)1902  static inline bool sb_start_intwrite_trylock(struct super_block *sb)
1903  {
1904  	return __sb_start_write_trylock(sb, SB_FREEZE_FS);
1905  }
1906  
1907  bool inode_owner_or_capable(struct mnt_idmap *idmap,
1908  			    const struct inode *inode);
1909  
1910  /*
1911   * VFS helper functions..
1912   */
1913  int vfs_create(struct mnt_idmap *, struct inode *,
1914  	       struct dentry *, umode_t, bool);
1915  int vfs_mkdir(struct mnt_idmap *, struct inode *,
1916  	      struct dentry *, umode_t);
1917  int vfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *,
1918                umode_t, dev_t);
1919  int vfs_symlink(struct mnt_idmap *, struct inode *,
1920  		struct dentry *, const char *);
1921  int vfs_link(struct dentry *, struct mnt_idmap *, struct inode *,
1922  	     struct dentry *, struct inode **);
1923  int vfs_rmdir(struct mnt_idmap *, struct inode *, struct dentry *);
1924  int vfs_unlink(struct mnt_idmap *, struct inode *, struct dentry *,
1925  	       struct inode **);
1926  
1927  /**
1928   * struct renamedata - contains all information required for renaming
1929   * @old_mnt_idmap:     idmap of the old mount the inode was found from
1930   * @old_dir:           parent of source
1931   * @old_dentry:                source
1932   * @new_mnt_idmap:     idmap of the new mount the inode was found from
1933   * @new_dir:           parent of destination
1934   * @new_dentry:                destination
1935   * @delegated_inode:   returns an inode needing a delegation break
1936   * @flags:             rename flags
1937   */
1938  struct renamedata {
1939  	struct mnt_idmap *old_mnt_idmap;
1940  	struct inode *old_dir;
1941  	struct dentry *old_dentry;
1942  	struct mnt_idmap *new_mnt_idmap;
1943  	struct inode *new_dir;
1944  	struct dentry *new_dentry;
1945  	struct inode **delegated_inode;
1946  	unsigned int flags;
1947  } __randomize_layout;
1948  
1949  int vfs_rename(struct renamedata *);
1950  
vfs_whiteout(struct mnt_idmap * idmap,struct inode * dir,struct dentry * dentry)1951  static inline int vfs_whiteout(struct mnt_idmap *idmap,
1952  			       struct inode *dir, struct dentry *dentry)
1953  {
1954  	return vfs_mknod(idmap, dir, dentry, S_IFCHR | WHITEOUT_MODE,
1955  			 WHITEOUT_DEV);
1956  }
1957  
1958  struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
1959  				 const struct path *parentpath,
1960  				 umode_t mode, int open_flag,
1961  				 const struct cred *cred);
1962  struct file *kernel_file_open(const struct path *path, int flags,
1963  			      const struct cred *cred);
1964  
1965  int vfs_mkobj(struct dentry *, umode_t,
1966  		int (*f)(struct dentry *, umode_t, void *),
1967  		void *);
1968  
1969  int vfs_fchown(struct file *file, uid_t user, gid_t group);
1970  int vfs_fchmod(struct file *file, umode_t mode);
1971  int vfs_utimes(const struct path *path, struct timespec64 *times);
1972  
1973  extern long vfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1974  
1975  #ifdef CONFIG_COMPAT
1976  extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
1977  					unsigned long arg);
1978  #else
1979  #define compat_ptr_ioctl NULL
1980  #endif
1981  
1982  /*
1983   * VFS file helper functions.
1984   */
1985  void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
1986  		      const struct inode *dir, umode_t mode);
1987  extern bool may_open_dev(const struct path *path);
1988  umode_t mode_strip_sgid(struct mnt_idmap *idmap,
1989  			const struct inode *dir, umode_t mode);
1990  bool in_group_or_capable(struct mnt_idmap *idmap,
1991  			 const struct inode *inode, vfsgid_t vfsgid);
1992  
1993  /*
1994   * This is the "filldir" function type, used by readdir() to let
1995   * the kernel specify what kind of dirent layout it wants to have.
1996   * This allows the kernel to read directories into kernel space or
1997   * to have different dirent layouts depending on the binary type.
1998   * Return 'true' to keep going and 'false' if there are no more entries.
1999   */
2000  struct dir_context;
2001  typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
2002  			 unsigned);
2003  
2004  struct dir_context {
2005  	filldir_t actor;
2006  	loff_t pos;
2007  };
2008  
2009  /*
2010   * These flags let !MMU mmap() govern direct device mapping vs immediate
2011   * copying more easily for MAP_PRIVATE, especially for ROM filesystems.
2012   *
2013   * NOMMU_MAP_COPY:	Copy can be mapped (MAP_PRIVATE)
2014   * NOMMU_MAP_DIRECT:	Can be mapped directly (MAP_SHARED)
2015   * NOMMU_MAP_READ:	Can be mapped for reading
2016   * NOMMU_MAP_WRITE:	Can be mapped for writing
2017   * NOMMU_MAP_EXEC:	Can be mapped for execution
2018   */
2019  #define NOMMU_MAP_COPY		0x00000001
2020  #define NOMMU_MAP_DIRECT	0x00000008
2021  #define NOMMU_MAP_READ		VM_MAYREAD
2022  #define NOMMU_MAP_WRITE		VM_MAYWRITE
2023  #define NOMMU_MAP_EXEC		VM_MAYEXEC
2024  
2025  #define NOMMU_VMFLAGS \
2026  	(NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
2027  
2028  /*
2029   * These flags control the behavior of the remap_file_range function pointer.
2030   * If it is called with len == 0 that means "remap to end of source file".
2031   * See Documentation/filesystems/vfs.rst for more details about this call.
2032   *
2033   * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
2034   * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
2035   */
2036  #define REMAP_FILE_DEDUP		(1 << 0)
2037  #define REMAP_FILE_CAN_SHORTEN		(1 << 1)
2038  
2039  /*
2040   * These flags signal that the caller is ok with altering various aspects of
2041   * the behavior of the remap operation.  The changes must be made by the
2042   * implementation; the vfs remap helper functions can take advantage of them.
2043   * Flags in this category exist to preserve the quirky behavior of the hoisted
2044   * btrfs clone/dedupe ioctls.
2045   */
2046  #define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
2047  
2048  /*
2049   * These flags control the behavior of vfs_copy_file_range().
2050   * They are not available to the user via syscall.
2051   *
2052   * COPY_FILE_SPLICE: call splice direct instead of fs clone/copy ops
2053   */
2054  #define COPY_FILE_SPLICE		(1 << 0)
2055  
2056  struct iov_iter;
2057  struct io_uring_cmd;
2058  struct offset_ctx;
2059  
2060  typedef unsigned int __bitwise fop_flags_t;
2061  
2062  struct file_operations {
2063  	struct module *owner;
2064  	fop_flags_t fop_flags;
2065  	loff_t (*llseek) (struct file *, loff_t, int);
2066  	ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
2067  	ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
2068  	ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
2069  	ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
2070  	int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
2071  			unsigned int flags);
2072  	int (*iterate_shared) (struct file *, struct dir_context *);
2073  	__poll_t (*poll) (struct file *, struct poll_table_struct *);
2074  	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
2075  	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
2076  	int (*mmap) (struct file *, struct vm_area_struct *);
2077  	int (*open) (struct inode *, struct file *);
2078  	int (*flush) (struct file *, fl_owner_t id);
2079  	int (*release) (struct inode *, struct file *);
2080  	int (*fsync) (struct file *, loff_t, loff_t, int datasync);
2081  	int (*fasync) (int, struct file *, int);
2082  	int (*lock) (struct file *, int, struct file_lock *);
2083  	unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
2084  	int (*check_flags)(int);
2085  	int (*flock) (struct file *, int, struct file_lock *);
2086  	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
2087  	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
2088  	void (*splice_eof)(struct file *file);
2089  	int (*setlease)(struct file *, int, struct file_lease **, void **);
2090  	long (*fallocate)(struct file *file, int mode, loff_t offset,
2091  			  loff_t len);
2092  	void (*show_fdinfo)(struct seq_file *m, struct file *f);
2093  #ifndef CONFIG_MMU
2094  	unsigned (*mmap_capabilities)(struct file *);
2095  #endif
2096  	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
2097  			loff_t, size_t, unsigned int);
2098  	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
2099  				   struct file *file_out, loff_t pos_out,
2100  				   loff_t len, unsigned int remap_flags);
2101  	int (*fadvise)(struct file *, loff_t, loff_t, int);
2102  	int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
2103  	int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
2104  				unsigned int poll_flags);
2105  } __randomize_layout;
2106  
2107  /* Supports async buffered reads */
2108  #define FOP_BUFFER_RASYNC	((__force fop_flags_t)(1 << 0))
2109  /* Supports async buffered writes */
2110  #define FOP_BUFFER_WASYNC	((__force fop_flags_t)(1 << 1))
2111  /* Supports synchronous page faults for mappings */
2112  #define FOP_MMAP_SYNC		((__force fop_flags_t)(1 << 2))
2113  /* Supports non-exclusive O_DIRECT writes from multiple threads */
2114  #define FOP_DIO_PARALLEL_WRITE	((__force fop_flags_t)(1 << 3))
2115  /* Contains huge pages */
2116  #define FOP_HUGE_PAGES		((__force fop_flags_t)(1 << 4))
2117  /* Treat loff_t as unsigned (e.g., /dev/mem) */
2118  #define FOP_UNSIGNED_OFFSET	((__force fop_flags_t)(1 << 5))
2119  
2120  /* Wrap a directory iterator that needs exclusive inode access */
2121  int wrap_directory_iterator(struct file *, struct dir_context *,
2122  			    int (*) (struct file *, struct dir_context *));
2123  #define WRAP_DIR_ITER(x) \
2124  	static int shared_##x(struct file *file , struct dir_context *ctx) \
2125  	{ return wrap_directory_iterator(file, ctx, x); }
2126  
2127  struct inode_operations {
2128  	struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
2129  	const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
2130  	int (*permission) (struct mnt_idmap *, struct inode *, int);
2131  	struct posix_acl * (*get_inode_acl)(struct inode *, int, bool);
2132  
2133  	int (*readlink) (struct dentry *, char __user *,int);
2134  
2135  	int (*create) (struct mnt_idmap *, struct inode *,struct dentry *,
2136  		       umode_t, bool);
2137  	int (*link) (struct dentry *,struct inode *,struct dentry *);
2138  	int (*unlink) (struct inode *,struct dentry *);
2139  	int (*symlink) (struct mnt_idmap *, struct inode *,struct dentry *,
2140  			const char *);
2141  	int (*mkdir) (struct mnt_idmap *, struct inode *,struct dentry *,
2142  		      umode_t);
2143  	int (*rmdir) (struct inode *,struct dentry *);
2144  	int (*mknod) (struct mnt_idmap *, struct inode *,struct dentry *,
2145  		      umode_t,dev_t);
2146  	int (*rename) (struct mnt_idmap *, struct inode *, struct dentry *,
2147  			struct inode *, struct dentry *, unsigned int);
2148  	int (*setattr) (struct mnt_idmap *, struct dentry *, struct iattr *);
2149  	int (*getattr) (struct mnt_idmap *, const struct path *,
2150  			struct kstat *, u32, unsigned int);
2151  	ssize_t (*listxattr) (struct dentry *, char *, size_t);
2152  	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
2153  		      u64 len);
2154  	int (*update_time)(struct inode *, int);
2155  	int (*atomic_open)(struct inode *, struct dentry *,
2156  			   struct file *, unsigned open_flag,
2157  			   umode_t create_mode);
2158  	int (*tmpfile) (struct mnt_idmap *, struct inode *,
2159  			struct file *, umode_t);
2160  	struct posix_acl *(*get_acl)(struct mnt_idmap *, struct dentry *,
2161  				     int);
2162  	int (*set_acl)(struct mnt_idmap *, struct dentry *,
2163  		       struct posix_acl *, int);
2164  	int (*fileattr_set)(struct mnt_idmap *idmap,
2165  			    struct dentry *dentry, struct fileattr *fa);
2166  	int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
2167  	struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
2168  } ____cacheline_aligned;
2169  
call_mmap(struct file * file,struct vm_area_struct * vma)2170  static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
2171  {
2172  	return file->f_op->mmap(file, vma);
2173  }
2174  
2175  extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
2176  extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
2177  extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
2178  				   loff_t, size_t, unsigned int);
2179  int remap_verify_area(struct file *file, loff_t pos, loff_t len, bool write);
2180  int __generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
2181  				    struct file *file_out, loff_t pos_out,
2182  				    loff_t *len, unsigned int remap_flags,
2183  				    const struct iomap_ops *dax_read_ops);
2184  int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
2185  				  struct file *file_out, loff_t pos_out,
2186  				  loff_t *count, unsigned int remap_flags);
2187  extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
2188  				   struct file *file_out, loff_t pos_out,
2189  				   loff_t len, unsigned int remap_flags);
2190  extern int vfs_dedupe_file_range(struct file *file,
2191  				 struct file_dedupe_range *same);
2192  extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
2193  					struct file *dst_file, loff_t dst_pos,
2194  					loff_t len, unsigned int remap_flags);
2195  
2196  /**
2197   * enum freeze_holder - holder of the freeze
2198   * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem
2199   * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem
2200   * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed
2201   *
2202   * Indicate who the owner of the freeze or thaw request is and whether
2203   * the freeze needs to be exclusive or can nest.
2204   * Without @FREEZE_MAY_NEST, multiple freeze and thaw requests from the
2205   * same holder aren't allowed. It is however allowed to hold a single
2206   * @FREEZE_HOLDER_USERSPACE and a single @FREEZE_HOLDER_KERNEL freeze at
2207   * the same time. This is relied upon by some filesystems during online
2208   * repair or similar.
2209   */
2210  enum freeze_holder {
2211  	FREEZE_HOLDER_KERNEL	= (1U << 0),
2212  	FREEZE_HOLDER_USERSPACE	= (1U << 1),
2213  	FREEZE_MAY_NEST		= (1U << 2),
2214  };
2215  
2216  struct super_operations {
2217     	struct inode *(*alloc_inode)(struct super_block *sb);
2218  	void (*destroy_inode)(struct inode *);
2219  	void (*free_inode)(struct inode *);
2220  
2221     	void (*dirty_inode) (struct inode *, int flags);
2222  	int (*write_inode) (struct inode *, struct writeback_control *wbc);
2223  	int (*drop_inode) (struct inode *);
2224  	void (*evict_inode) (struct inode *);
2225  	void (*put_super) (struct super_block *);
2226  	int (*sync_fs)(struct super_block *sb, int wait);
2227  	int (*freeze_super) (struct super_block *, enum freeze_holder who);
2228  	int (*freeze_fs) (struct super_block *);
2229  	int (*thaw_super) (struct super_block *, enum freeze_holder who);
2230  	int (*unfreeze_fs) (struct super_block *);
2231  	int (*statfs) (struct dentry *, struct kstatfs *);
2232  	int (*remount_fs) (struct super_block *, int *, char *);
2233  	void (*umount_begin) (struct super_block *);
2234  
2235  	int (*show_options)(struct seq_file *, struct dentry *);
2236  	int (*show_devname)(struct seq_file *, struct dentry *);
2237  	int (*show_path)(struct seq_file *, struct dentry *);
2238  	int (*show_stats)(struct seq_file *, struct dentry *);
2239  #ifdef CONFIG_QUOTA
2240  	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
2241  	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
2242  	struct dquot __rcu **(*get_dquots)(struct inode *);
2243  #endif
2244  	long (*nr_cached_objects)(struct super_block *,
2245  				  struct shrink_control *);
2246  	long (*free_cached_objects)(struct super_block *,
2247  				    struct shrink_control *);
2248  	void (*shutdown)(struct super_block *sb);
2249  };
2250  
2251  /*
2252   * Inode flags - they have no relation to superblock flags now
2253   */
2254  #define S_SYNC		(1 << 0)  /* Writes are synced at once */
2255  #define S_NOATIME	(1 << 1)  /* Do not update access times */
2256  #define S_APPEND	(1 << 2)  /* Append-only file */
2257  #define S_IMMUTABLE	(1 << 3)  /* Immutable file */
2258  #define S_DEAD		(1 << 4)  /* removed, but still open directory */
2259  #define S_NOQUOTA	(1 << 5)  /* Inode is not counted to quota */
2260  #define S_DIRSYNC	(1 << 6)  /* Directory modifications are synchronous */
2261  #define S_NOCMTIME	(1 << 7)  /* Do not update file c/mtime */
2262  #define S_SWAPFILE	(1 << 8)  /* Do not truncate: swapon got its bmaps */
2263  #define S_PRIVATE	(1 << 9)  /* Inode is fs-internal */
2264  #define S_IMA		(1 << 10) /* Inode has an associated IMA struct */
2265  #define S_AUTOMOUNT	(1 << 11) /* Automount/referral quasi-directory */
2266  #define S_NOSEC		(1 << 12) /* no suid or xattr security attributes */
2267  #ifdef CONFIG_FS_DAX
2268  #define S_DAX		(1 << 13) /* Direct Access, avoiding the page cache */
2269  #else
2270  #define S_DAX		0	  /* Make all the DAX code disappear */
2271  #endif
2272  #define S_ENCRYPTED	(1 << 14) /* Encrypted file (using fs/crypto/) */
2273  #define S_CASEFOLD	(1 << 15) /* Casefolded file */
2274  #define S_VERITY	(1 << 16) /* Verity file (using fs/verity/) */
2275  #define S_KERNEL_FILE	(1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */
2276  
2277  /*
2278   * Note that nosuid etc flags are inode-specific: setting some file-system
2279   * flags just means all the inodes inherit those flags by default. It might be
2280   * possible to override it selectively if you really wanted to with some
2281   * ioctl() that is not currently implemented.
2282   *
2283   * Exception: SB_RDONLY is always applied to the entire file system.
2284   *
2285   * Unfortunately, it is possible to change a filesystems flags with it mounted
2286   * with files in use.  This means that all of the inodes will not have their
2287   * i_flags updated.  Hence, i_flags no longer inherit the superblock mount
2288   * flags, so these have to be checked separately. -- rmk@arm.uk.linux.org
2289   */
2290  #define __IS_FLG(inode, flg)	((inode)->i_sb->s_flags & (flg))
2291  
sb_rdonly(const struct super_block * sb)2292  static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; }
2293  #define IS_RDONLY(inode)	sb_rdonly((inode)->i_sb)
2294  #define IS_SYNC(inode)		(__IS_FLG(inode, SB_SYNCHRONOUS) || \
2295  					((inode)->i_flags & S_SYNC))
2296  #define IS_DIRSYNC(inode)	(__IS_FLG(inode, SB_SYNCHRONOUS|SB_DIRSYNC) || \
2297  					((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
2298  #define IS_MANDLOCK(inode)	__IS_FLG(inode, SB_MANDLOCK)
2299  #define IS_NOATIME(inode)	__IS_FLG(inode, SB_RDONLY|SB_NOATIME)
2300  #define IS_I_VERSION(inode)	__IS_FLG(inode, SB_I_VERSION)
2301  
2302  #define IS_NOQUOTA(inode)	((inode)->i_flags & S_NOQUOTA)
2303  #define IS_APPEND(inode)	((inode)->i_flags & S_APPEND)
2304  #define IS_IMMUTABLE(inode)	((inode)->i_flags & S_IMMUTABLE)
2305  
2306  #ifdef CONFIG_FS_POSIX_ACL
2307  #define IS_POSIXACL(inode)	__IS_FLG(inode, SB_POSIXACL)
2308  #else
2309  #define IS_POSIXACL(inode)	0
2310  #endif
2311  
2312  #define IS_DEADDIR(inode)	((inode)->i_flags & S_DEAD)
2313  #define IS_NOCMTIME(inode)	((inode)->i_flags & S_NOCMTIME)
2314  
2315  #ifdef CONFIG_SWAP
2316  #define IS_SWAPFILE(inode)	((inode)->i_flags & S_SWAPFILE)
2317  #else
2318  #define IS_SWAPFILE(inode)	((void)(inode), 0U)
2319  #endif
2320  
2321  #define IS_PRIVATE(inode)	((inode)->i_flags & S_PRIVATE)
2322  #define IS_IMA(inode)		((inode)->i_flags & S_IMA)
2323  #define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
2324  #define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
2325  #define IS_DAX(inode)		((inode)->i_flags & S_DAX)
2326  #define IS_ENCRYPTED(inode)	((inode)->i_flags & S_ENCRYPTED)
2327  #define IS_CASEFOLDED(inode)	((inode)->i_flags & S_CASEFOLD)
2328  #define IS_VERITY(inode)	((inode)->i_flags & S_VERITY)
2329  
2330  #define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
2331  				 (inode)->i_rdev == WHITEOUT_DEV)
2332  
HAS_UNMAPPED_ID(struct mnt_idmap * idmap,struct inode * inode)2333  static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap,
2334  				   struct inode *inode)
2335  {
2336  	return !vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) ||
2337  	       !vfsgid_valid(i_gid_into_vfsgid(idmap, inode));
2338  }
2339  
init_sync_kiocb(struct kiocb * kiocb,struct file * filp)2340  static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
2341  {
2342  	*kiocb = (struct kiocb) {
2343  		.ki_filp = filp,
2344  		.ki_flags = filp->f_iocb_flags,
2345  		.ki_ioprio = get_current_ioprio(),
2346  	};
2347  }
2348  
kiocb_clone(struct kiocb * kiocb,struct kiocb * kiocb_src,struct file * filp)2349  static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
2350  			       struct file *filp)
2351  {
2352  	*kiocb = (struct kiocb) {
2353  		.ki_filp = filp,
2354  		.ki_flags = kiocb_src->ki_flags,
2355  		.ki_ioprio = kiocb_src->ki_ioprio,
2356  		.ki_pos = kiocb_src->ki_pos,
2357  	};
2358  }
2359  
2360  /*
2361   * Inode state bits.  Protected by inode->i_lock
2362   *
2363   * Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
2364   * I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
2365   *
2366   * Four bits define the lifetime of an inode.  Initially, inodes are I_NEW,
2367   * until that flag is cleared.  I_WILL_FREE, I_FREEING and I_CLEAR are set at
2368   * various stages of removing an inode.
2369   *
2370   * Two bits are used for locking and completion notification, I_NEW and I_SYNC.
2371   *
2372   * I_DIRTY_SYNC		Inode is dirty, but doesn't have to be written on
2373   *			fdatasync() (unless I_DIRTY_DATASYNC is also set).
2374   *			Timestamp updates are the usual cause.
2375   * I_DIRTY_DATASYNC	Data-related inode changes pending.  We keep track of
2376   *			these changes separately from I_DIRTY_SYNC so that we
2377   *			don't have to write inode on fdatasync() when only
2378   *			e.g. the timestamps have changed.
2379   * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
2380   * I_DIRTY_TIME		The inode itself has dirty timestamps, and the
2381   *			lazytime mount option is enabled.  We keep track of this
2382   *			separately from I_DIRTY_SYNC in order to implement
2383   *			lazytime.  This gets cleared if I_DIRTY_INODE
2384   *			(I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
2385   *			I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
2386   *			in place because writeback might already be in progress
2387   *			and we don't want to lose the time update
2388   * I_NEW		Serves as both a mutex and completion notification.
2389   *			New inodes set I_NEW.  If two processes both create
2390   *			the same inode, one of them will release its inode and
2391   *			wait for I_NEW to be released before returning.
2392   *			Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can
2393   *			also cause waiting on I_NEW, without I_NEW actually
2394   *			being set.  find_inode() uses this to prevent returning
2395   *			nearly-dead inodes.
2396   * I_WILL_FREE		Must be set when calling write_inode_now() if i_count
2397   *			is zero.  I_FREEING must be set when I_WILL_FREE is
2398   *			cleared.
2399   * I_FREEING		Set when inode is about to be freed but still has dirty
2400   *			pages or buffers attached or the inode itself is still
2401   *			dirty.
2402   * I_CLEAR		Added by clear_inode().  In this state the inode is
2403   *			clean and can be destroyed.  Inode keeps I_FREEING.
2404   *
2405   *			Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are
2406   *			prohibited for many purposes.  iget() must wait for
2407   *			the inode to be completely released, then create it
2408   *			anew.  Other functions will just ignore such inodes,
2409   *			if appropriate.  I_NEW is used for waiting.
2410   *
2411   * I_SYNC		Writeback of inode is running. The bit is set during
2412   *			data writeback, and cleared with a wakeup on the bit
2413   *			address once it is done. The bit is also used to pin
2414   *			the inode in memory for flusher thread.
2415   *
2416   * I_REFERENCED		Marks the inode as recently references on the LRU list.
2417   *
2418   * I_WB_SWITCH		Cgroup bdi_writeback switching in progress.  Used to
2419   *			synchronize competing switching instances and to tell
2420   *			wb stat updates to grab the i_pages lock.  See
2421   *			inode_switch_wbs_work_fn() for details.
2422   *
2423   * I_OVL_INUSE		Used by overlayfs to get exclusive ownership on upper
2424   *			and work dirs among overlayfs mounts.
2425   *
2426   * I_CREATING		New object's inode in the middle of setting up.
2427   *
2428   * I_DONTCACHE		Evict inode as soon as it is not used anymore.
2429   *
2430   * I_SYNC_QUEUED	Inode is queued in b_io or b_more_io writeback lists.
2431   *			Used to detect that mark_inode_dirty() should not move
2432   * 			inode between dirty lists.
2433   *
2434   * I_PINNING_FSCACHE_WB	Inode is pinning an fscache object for writeback.
2435   *
2436   * I_LRU_ISOLATING	Inode is pinned being isolated from LRU without holding
2437   *			i_count.
2438   *
2439   * Q: What is the difference between I_WILL_FREE and I_FREEING?
2440   *
2441   * __I_{SYNC,NEW,LRU_ISOLATING} are used to derive unique addresses to wait
2442   * upon. There's one free address left.
2443   */
2444  #define __I_NEW			0
2445  #define I_NEW			(1 << __I_NEW)
2446  #define __I_SYNC		1
2447  #define I_SYNC			(1 << __I_SYNC)
2448  #define __I_LRU_ISOLATING	2
2449  #define I_LRU_ISOLATING		(1 << __I_LRU_ISOLATING)
2450  
2451  #define I_DIRTY_SYNC		(1 << 3)
2452  #define I_DIRTY_DATASYNC	(1 << 4)
2453  #define I_DIRTY_PAGES		(1 << 5)
2454  #define I_WILL_FREE		(1 << 6)
2455  #define I_FREEING		(1 << 7)
2456  #define I_CLEAR			(1 << 8)
2457  #define I_REFERENCED		(1 << 9)
2458  #define I_LINKABLE		(1 << 10)
2459  #define I_DIRTY_TIME		(1 << 11)
2460  #define I_WB_SWITCH		(1 << 12)
2461  #define I_OVL_INUSE		(1 << 13)
2462  #define I_CREATING		(1 << 14)
2463  #define I_DONTCACHE		(1 << 15)
2464  #define I_SYNC_QUEUED		(1 << 16)
2465  #define I_PINNING_NETFS_WB	(1 << 17)
2466  
2467  #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
2468  #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES)
2469  #define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
2470  
2471  extern void __mark_inode_dirty(struct inode *, int);
mark_inode_dirty(struct inode * inode)2472  static inline void mark_inode_dirty(struct inode *inode)
2473  {
2474  	__mark_inode_dirty(inode, I_DIRTY);
2475  }
2476  
mark_inode_dirty_sync(struct inode * inode)2477  static inline void mark_inode_dirty_sync(struct inode *inode)
2478  {
2479  	__mark_inode_dirty(inode, I_DIRTY_SYNC);
2480  }
2481  
2482  /*
2483   * Returns true if the given inode itself only has dirty timestamps (its pages
2484   * may still be dirty) and isn't currently being allocated or freed.
2485   * Filesystems should call this if when writing an inode when lazytime is
2486   * enabled, they want to opportunistically write the timestamps of other inodes
2487   * located very nearby on-disk, e.g. in the same inode block.  This returns true
2488   * if the given inode is in need of such an opportunistic update.  Requires
2489   * i_lock, or at least later re-checking under i_lock.
2490   */
inode_is_dirtytime_only(struct inode * inode)2491  static inline bool inode_is_dirtytime_only(struct inode *inode)
2492  {
2493  	return (inode->i_state & (I_DIRTY_TIME | I_NEW |
2494  				  I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
2495  }
2496  
2497  extern void inc_nlink(struct inode *inode);
2498  extern void drop_nlink(struct inode *inode);
2499  extern void clear_nlink(struct inode *inode);
2500  extern void set_nlink(struct inode *inode, unsigned int nlink);
2501  
inode_inc_link_count(struct inode * inode)2502  static inline void inode_inc_link_count(struct inode *inode)
2503  {
2504  	inc_nlink(inode);
2505  	mark_inode_dirty(inode);
2506  }
2507  
inode_dec_link_count(struct inode * inode)2508  static inline void inode_dec_link_count(struct inode *inode)
2509  {
2510  	drop_nlink(inode);
2511  	mark_inode_dirty(inode);
2512  }
2513  
2514  enum file_time_flags {
2515  	S_ATIME = 1,
2516  	S_MTIME = 2,
2517  	S_CTIME = 4,
2518  	S_VERSION = 8,
2519  };
2520  
2521  extern bool atime_needs_update(const struct path *, struct inode *);
2522  extern void touch_atime(const struct path *);
2523  int inode_update_time(struct inode *inode, int flags);
2524  
file_accessed(struct file * file)2525  static inline void file_accessed(struct file *file)
2526  {
2527  	if (!(file->f_flags & O_NOATIME))
2528  		touch_atime(&file->f_path);
2529  }
2530  
2531  extern int file_modified(struct file *file);
2532  int kiocb_modified(struct kiocb *iocb);
2533  
2534  int sync_inode_metadata(struct inode *inode, int wait);
2535  
2536  struct file_system_type {
2537  	const char *name;
2538  	int fs_flags;
2539  #define FS_REQUIRES_DEV		1
2540  #define FS_BINARY_MOUNTDATA	2
2541  #define FS_HAS_SUBTYPE		4
2542  #define FS_USERNS_MOUNT		8	/* Can be mounted by userns root */
2543  #define FS_DISALLOW_NOTIFY_PERM	16	/* Disable fanotify permission events */
2544  #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
2545  #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move() during rename() internally. */
2546  	int (*init_fs_context)(struct fs_context *);
2547  	const struct fs_parameter_spec *parameters;
2548  	struct dentry *(*mount) (struct file_system_type *, int,
2549  		       const char *, void *);
2550  	void (*kill_sb) (struct super_block *);
2551  	struct module *owner;
2552  	struct file_system_type * next;
2553  	struct hlist_head fs_supers;
2554  
2555  	struct lock_class_key s_lock_key;
2556  	struct lock_class_key s_umount_key;
2557  	struct lock_class_key s_vfs_rename_key;
2558  	struct lock_class_key s_writers_key[SB_FREEZE_LEVELS];
2559  
2560  	struct lock_class_key i_lock_key;
2561  	struct lock_class_key i_mutex_key;
2562  	struct lock_class_key invalidate_lock_key;
2563  	struct lock_class_key i_mutex_dir_key;
2564  };
2565  
2566  #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
2567  
2568  extern struct dentry *mount_bdev(struct file_system_type *fs_type,
2569  	int flags, const char *dev_name, void *data,
2570  	int (*fill_super)(struct super_block *, void *, int));
2571  extern struct dentry *mount_single(struct file_system_type *fs_type,
2572  	int flags, void *data,
2573  	int (*fill_super)(struct super_block *, void *, int));
2574  extern struct dentry *mount_nodev(struct file_system_type *fs_type,
2575  	int flags, void *data,
2576  	int (*fill_super)(struct super_block *, void *, int));
2577  extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
2578  void retire_super(struct super_block *sb);
2579  void generic_shutdown_super(struct super_block *sb);
2580  void kill_block_super(struct super_block *sb);
2581  void kill_anon_super(struct super_block *sb);
2582  void kill_litter_super(struct super_block *sb);
2583  void deactivate_super(struct super_block *sb);
2584  void deactivate_locked_super(struct super_block *sb);
2585  int set_anon_super(struct super_block *s, void *data);
2586  int set_anon_super_fc(struct super_block *s, struct fs_context *fc);
2587  int get_anon_bdev(dev_t *);
2588  void free_anon_bdev(dev_t);
2589  struct super_block *sget_fc(struct fs_context *fc,
2590  			    int (*test)(struct super_block *, struct fs_context *),
2591  			    int (*set)(struct super_block *, struct fs_context *));
2592  struct super_block *sget(struct file_system_type *type,
2593  			int (*test)(struct super_block *,void *),
2594  			int (*set)(struct super_block *,void *),
2595  			int flags, void *data);
2596  struct super_block *sget_dev(struct fs_context *fc, dev_t dev);
2597  
2598  /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
2599  #define fops_get(fops) ({						\
2600  	const struct file_operations *_fops = (fops);			\
2601  	(((_fops) && try_module_get((_fops)->owner) ? (_fops) : NULL));	\
2602  })
2603  
2604  #define fops_put(fops) ({						\
2605  	const struct file_operations *_fops = (fops);			\
2606  	if (_fops)							\
2607  		module_put((_fops)->owner);				\
2608  })
2609  
2610  /*
2611   * This one is to be used *ONLY* from ->open() instances.
2612   * fops must be non-NULL, pinned down *and* module dependencies
2613   * should be sufficient to pin the caller down as well.
2614   */
2615  #define replace_fops(f, fops) \
2616  	do {	\
2617  		struct file *__file = (f); \
2618  		fops_put(__file->f_op); \
2619  		BUG_ON(!(__file->f_op = (fops))); \
2620  	} while(0)
2621  
2622  extern int register_filesystem(struct file_system_type *);
2623  extern int unregister_filesystem(struct file_system_type *);
2624  extern int vfs_statfs(const struct path *, struct kstatfs *);
2625  extern int user_statfs(const char __user *, struct kstatfs *);
2626  extern int fd_statfs(int, struct kstatfs *);
2627  int freeze_super(struct super_block *super, enum freeze_holder who);
2628  int thaw_super(struct super_block *super, enum freeze_holder who);
2629  extern __printf(2, 3)
2630  int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
2631  extern int super_setup_bdi(struct super_block *sb);
2632  
super_set_uuid(struct super_block * sb,const u8 * uuid,unsigned len)2633  static inline void super_set_uuid(struct super_block *sb, const u8 *uuid, unsigned len)
2634  {
2635  	if (WARN_ON(len > sizeof(sb->s_uuid)))
2636  		len = sizeof(sb->s_uuid);
2637  	sb->s_uuid_len = len;
2638  	memcpy(&sb->s_uuid, uuid, len);
2639  }
2640  
2641  /* set sb sysfs name based on sb->s_bdev */
super_set_sysfs_name_bdev(struct super_block * sb)2642  static inline void super_set_sysfs_name_bdev(struct super_block *sb)
2643  {
2644  	snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pg", sb->s_bdev);
2645  }
2646  
2647  /* set sb sysfs name based on sb->s_uuid */
super_set_sysfs_name_uuid(struct super_block * sb)2648  static inline void super_set_sysfs_name_uuid(struct super_block *sb)
2649  {
2650  	WARN_ON(sb->s_uuid_len != sizeof(sb->s_uuid));
2651  	snprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), "%pU", sb->s_uuid.b);
2652  }
2653  
2654  /* set sb sysfs name based on sb->s_id */
super_set_sysfs_name_id(struct super_block * sb)2655  static inline void super_set_sysfs_name_id(struct super_block *sb)
2656  {
2657  	strscpy(sb->s_sysfs_name, sb->s_id, sizeof(sb->s_sysfs_name));
2658  }
2659  
2660  /* try to use something standard before you use this */
2661  __printf(2, 3)
super_set_sysfs_name_generic(struct super_block * sb,const char * fmt,...)2662  static inline void super_set_sysfs_name_generic(struct super_block *sb, const char *fmt, ...)
2663  {
2664  	va_list args;
2665  
2666  	va_start(args, fmt);
2667  	vsnprintf(sb->s_sysfs_name, sizeof(sb->s_sysfs_name), fmt, args);
2668  	va_end(args);
2669  }
2670  
2671  extern int current_umask(void);
2672  
2673  extern void ihold(struct inode * inode);
2674  extern void iput(struct inode *);
2675  int inode_update_timestamps(struct inode *inode, int flags);
2676  int generic_update_time(struct inode *, int);
2677  
2678  /* /sys/fs */
2679  extern struct kobject *fs_kobj;
2680  
2681  #define MAX_RW_COUNT (INT_MAX & PAGE_MASK)
2682  
2683  /* fs/open.c */
2684  struct audit_names;
2685  struct filename {
2686  	const char		*name;	/* pointer to actual string */
2687  	const __user char	*uptr;	/* original userland pointer */
2688  	atomic_t		refcnt;
2689  	struct audit_names	*aname;
2690  	const char		iname[];
2691  };
2692  static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
2693  
file_mnt_idmap(const struct file * file)2694  static inline struct mnt_idmap *file_mnt_idmap(const struct file *file)
2695  {
2696  	return mnt_idmap(file->f_path.mnt);
2697  }
2698  
2699  /**
2700   * is_idmapped_mnt - check whether a mount is mapped
2701   * @mnt: the mount to check
2702   *
2703   * If @mnt has an non @nop_mnt_idmap attached to it then @mnt is mapped.
2704   *
2705   * Return: true if mount is mapped, false if not.
2706   */
is_idmapped_mnt(const struct vfsmount * mnt)2707  static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
2708  {
2709  	return mnt_idmap(mnt) != &nop_mnt_idmap;
2710  }
2711  
2712  extern long vfs_truncate(const struct path *, loff_t);
2713  int do_truncate(struct mnt_idmap *, struct dentry *, loff_t start,
2714  		unsigned int time_attrs, struct file *filp);
2715  extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
2716  			loff_t len);
2717  extern long do_sys_open(int dfd, const char __user *filename, int flags,
2718  			umode_t mode);
2719  extern struct file *file_open_name(struct filename *, int, umode_t);
2720  extern struct file *filp_open(const char *, int, umode_t);
2721  extern struct file *file_open_root(const struct path *,
2722  				   const char *, int, umode_t);
file_open_root_mnt(struct vfsmount * mnt,const char * name,int flags,umode_t mode)2723  static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
2724  				   const char *name, int flags, umode_t mode)
2725  {
2726  	return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
2727  			      name, flags, mode);
2728  }
2729  struct file *dentry_open(const struct path *path, int flags,
2730  			 const struct cred *creds);
2731  struct file *dentry_create(const struct path *path, int flags, umode_t mode,
2732  			   const struct cred *cred);
2733  struct path *backing_file_user_path(struct file *f);
2734  
2735  /*
2736   * When mmapping a file on a stackable filesystem (e.g., overlayfs), the file
2737   * stored in ->vm_file is a backing file whose f_inode is on the underlying
2738   * filesystem.  When the mapped file path and inode number are displayed to
2739   * user (e.g. via /proc/<pid>/maps), these helpers should be used to get the
2740   * path and inode number to display to the user, which is the path of the fd
2741   * that user has requested to map and the inode number that would be returned
2742   * by fstat() on that same fd.
2743   */
2744  /* Get the path to display in /proc/<pid>/maps */
file_user_path(struct file * f)2745  static inline const struct path *file_user_path(struct file *f)
2746  {
2747  	if (unlikely(f->f_mode & FMODE_BACKING))
2748  		return backing_file_user_path(f);
2749  	return &f->f_path;
2750  }
2751  /* Get the inode whose inode number to display in /proc/<pid>/maps */
file_user_inode(struct file * f)2752  static inline const struct inode *file_user_inode(struct file *f)
2753  {
2754  	if (unlikely(f->f_mode & FMODE_BACKING))
2755  		return d_inode(backing_file_user_path(f)->dentry);
2756  	return file_inode(f);
2757  }
2758  
file_clone_open(struct file * file)2759  static inline struct file *file_clone_open(struct file *file)
2760  {
2761  	return dentry_open(&file->f_path, file->f_flags, file->f_cred);
2762  }
2763  extern int filp_close(struct file *, fl_owner_t id);
2764  
2765  extern struct filename *getname_flags(const char __user *, int);
2766  extern struct filename *getname_uflags(const char __user *, int);
2767  extern struct filename *getname(const char __user *);
2768  extern struct filename *getname_kernel(const char *);
2769  extern void putname(struct filename *name);
2770  
2771  extern int finish_open(struct file *file, struct dentry *dentry,
2772  			int (*open)(struct inode *, struct file *));
2773  extern int finish_no_open(struct file *file, struct dentry *dentry);
2774  
2775  /* Helper for the simple case when original dentry is used */
finish_open_simple(struct file * file,int error)2776  static inline int finish_open_simple(struct file *file, int error)
2777  {
2778  	if (error)
2779  		return error;
2780  
2781  	return finish_open(file, file->f_path.dentry, NULL);
2782  }
2783  
2784  /* fs/dcache.c */
2785  extern void __init vfs_caches_init_early(void);
2786  extern void __init vfs_caches_init(void);
2787  
2788  extern struct kmem_cache *names_cachep;
2789  
2790  #define __getname()		kmem_cache_alloc(names_cachep, GFP_KERNEL)
2791  #define __putname(name)		kmem_cache_free(names_cachep, (void *)(name))
2792  
2793  extern struct super_block *blockdev_superblock;
sb_is_blkdev_sb(struct super_block * sb)2794  static inline bool sb_is_blkdev_sb(struct super_block *sb)
2795  {
2796  	return IS_ENABLED(CONFIG_BLOCK) && sb == blockdev_superblock;
2797  }
2798  
2799  void emergency_thaw_all(void);
2800  extern int sync_filesystem(struct super_block *);
2801  extern const struct file_operations def_blk_fops;
2802  extern const struct file_operations def_chr_fops;
2803  
2804  /* fs/char_dev.c */
2805  #define CHRDEV_MAJOR_MAX 512
2806  /* Marks the bottom of the first segment of free char majors */
2807  #define CHRDEV_MAJOR_DYN_END 234
2808  /* Marks the top and bottom of the second segment of free char majors */
2809  #define CHRDEV_MAJOR_DYN_EXT_START 511
2810  #define CHRDEV_MAJOR_DYN_EXT_END 384
2811  
2812  extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
2813  extern int register_chrdev_region(dev_t, unsigned, const char *);
2814  extern int __register_chrdev(unsigned int major, unsigned int baseminor,
2815  			     unsigned int count, const char *name,
2816  			     const struct file_operations *fops);
2817  extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
2818  				unsigned int count, const char *name);
2819  extern void unregister_chrdev_region(dev_t, unsigned);
2820  extern void chrdev_show(struct seq_file *,off_t);
2821  
register_chrdev(unsigned int major,const char * name,const struct file_operations * fops)2822  static inline int register_chrdev(unsigned int major, const char *name,
2823  				  const struct file_operations *fops)
2824  {
2825  	return __register_chrdev(major, 0, 256, name, fops);
2826  }
2827  
unregister_chrdev(unsigned int major,const char * name)2828  static inline void unregister_chrdev(unsigned int major, const char *name)
2829  {
2830  	__unregister_chrdev(major, 0, 256, name);
2831  }
2832  
2833  extern void init_special_inode(struct inode *, umode_t, dev_t);
2834  
2835  /* Invalid inode operations -- fs/bad_inode.c */
2836  extern void make_bad_inode(struct inode *);
2837  extern bool is_bad_inode(struct inode *);
2838  
2839  extern int __must_check file_fdatawait_range(struct file *file, loff_t lstart,
2840  						loff_t lend);
2841  extern int __must_check file_check_and_advance_wb_err(struct file *file);
2842  extern int __must_check file_write_and_wait_range(struct file *file,
2843  						loff_t start, loff_t end);
2844  
file_write_and_wait(struct file * file)2845  static inline int file_write_and_wait(struct file *file)
2846  {
2847  	return file_write_and_wait_range(file, 0, LLONG_MAX);
2848  }
2849  
2850  extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
2851  			   int datasync);
2852  extern int vfs_fsync(struct file *file, int datasync);
2853  
2854  extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes,
2855  				unsigned int flags);
2856  
iocb_is_dsync(const struct kiocb * iocb)2857  static inline bool iocb_is_dsync(const struct kiocb *iocb)
2858  {
2859  	return (iocb->ki_flags & IOCB_DSYNC) ||
2860  		IS_SYNC(iocb->ki_filp->f_mapping->host);
2861  }
2862  
2863  /*
2864   * Sync the bytes written if this was a synchronous write.  Expect ki_pos
2865   * to already be updated for the write, and will return either the amount
2866   * of bytes passed in, or an error if syncing the file failed.
2867   */
generic_write_sync(struct kiocb * iocb,ssize_t count)2868  static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count)
2869  {
2870  	if (iocb_is_dsync(iocb)) {
2871  		int ret = vfs_fsync_range(iocb->ki_filp,
2872  				iocb->ki_pos - count, iocb->ki_pos - 1,
2873  				(iocb->ki_flags & IOCB_SYNC) ? 0 : 1);
2874  		if (ret)
2875  			return ret;
2876  	}
2877  
2878  	return count;
2879  }
2880  
2881  extern void emergency_sync(void);
2882  extern void emergency_remount(void);
2883  
2884  #ifdef CONFIG_BLOCK
2885  extern int bmap(struct inode *inode, sector_t *block);
2886  #else
bmap(struct inode * inode,sector_t * block)2887  static inline int bmap(struct inode *inode,  sector_t *block)
2888  {
2889  	return -EINVAL;
2890  }
2891  #endif
2892  
2893  int notify_change(struct mnt_idmap *, struct dentry *,
2894  		  struct iattr *, struct inode **);
2895  int inode_permission(struct mnt_idmap *, struct inode *, int);
2896  int generic_permission(struct mnt_idmap *, struct inode *, int);
file_permission(struct file * file,int mask)2897  static inline int file_permission(struct file *file, int mask)
2898  {
2899  	return inode_permission(file_mnt_idmap(file),
2900  				file_inode(file), mask);
2901  }
path_permission(const struct path * path,int mask)2902  static inline int path_permission(const struct path *path, int mask)
2903  {
2904  	return inode_permission(mnt_idmap(path->mnt),
2905  				d_inode(path->dentry), mask);
2906  }
2907  int __check_sticky(struct mnt_idmap *idmap, struct inode *dir,
2908  		   struct inode *inode);
2909  
execute_ok(struct inode * inode)2910  static inline bool execute_ok(struct inode *inode)
2911  {
2912  	return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode);
2913  }
2914  
inode_wrong_type(const struct inode * inode,umode_t mode)2915  static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
2916  {
2917  	return (inode->i_mode ^ mode) & S_IFMT;
2918  }
2919  
2920  /**
2921   * file_start_write - get write access to a superblock for regular file io
2922   * @file: the file we want to write to
2923   *
2924   * This is a variant of sb_start_write() which is a noop on non-regualr file.
2925   * Should be matched with a call to file_end_write().
2926   */
file_start_write(struct file * file)2927  static inline void file_start_write(struct file *file)
2928  {
2929  	if (!S_ISREG(file_inode(file)->i_mode))
2930  		return;
2931  	sb_start_write(file_inode(file)->i_sb);
2932  }
2933  
file_start_write_trylock(struct file * file)2934  static inline bool file_start_write_trylock(struct file *file)
2935  {
2936  	if (!S_ISREG(file_inode(file)->i_mode))
2937  		return true;
2938  	return sb_start_write_trylock(file_inode(file)->i_sb);
2939  }
2940  
2941  /**
2942   * file_end_write - drop write access to a superblock of a regular file
2943   * @file: the file we wrote to
2944   *
2945   * Should be matched with a call to file_start_write().
2946   */
file_end_write(struct file * file)2947  static inline void file_end_write(struct file *file)
2948  {
2949  	if (!S_ISREG(file_inode(file)->i_mode))
2950  		return;
2951  	sb_end_write(file_inode(file)->i_sb);
2952  }
2953  
2954  /**
2955   * kiocb_start_write - get write access to a superblock for async file io
2956   * @iocb: the io context we want to submit the write with
2957   *
2958   * This is a variant of sb_start_write() for async io submission.
2959   * Should be matched with a call to kiocb_end_write().
2960   */
kiocb_start_write(struct kiocb * iocb)2961  static inline void kiocb_start_write(struct kiocb *iocb)
2962  {
2963  	struct inode *inode = file_inode(iocb->ki_filp);
2964  
2965  	sb_start_write(inode->i_sb);
2966  	/*
2967  	 * Fool lockdep by telling it the lock got released so that it
2968  	 * doesn't complain about the held lock when we return to userspace.
2969  	 */
2970  	__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
2971  }
2972  
2973  /**
2974   * kiocb_end_write - drop write access to a superblock after async file io
2975   * @iocb: the io context we sumbitted the write with
2976   *
2977   * Should be matched with a call to kiocb_start_write().
2978   */
kiocb_end_write(struct kiocb * iocb)2979  static inline void kiocb_end_write(struct kiocb *iocb)
2980  {
2981  	struct inode *inode = file_inode(iocb->ki_filp);
2982  
2983  	/*
2984  	 * Tell lockdep we inherited freeze protection from submission thread.
2985  	 */
2986  	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
2987  	sb_end_write(inode->i_sb);
2988  }
2989  
2990  /*
2991   * This is used for regular files where some users -- especially the
2992   * currently executed binary in a process, previously handled via
2993   * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap
2994   * read-write shared) accesses.
2995   *
2996   * get_write_access() gets write permission for a file.
2997   * put_write_access() releases this write permission.
2998   * deny_write_access() denies write access to a file.
2999   * allow_write_access() re-enables write access to a file.
3000   *
3001   * The i_writecount field of an inode can have the following values:
3002   * 0: no write access, no denied write access
3003   * < 0: (-i_writecount) users that denied write access to the file.
3004   * > 0: (i_writecount) users that have write access to the file.
3005   *
3006   * Normally we operate on that counter with atomic_{inc,dec} and it's safe
3007   * except for the cases where we don't hold i_writecount yet. Then we need to
3008   * use {get,deny}_write_access() - these functions check the sign and refuse
3009   * to do the change if sign is wrong.
3010   */
get_write_access(struct inode * inode)3011  static inline int get_write_access(struct inode *inode)
3012  {
3013  	return atomic_inc_unless_negative(&inode->i_writecount) ? 0 : -ETXTBSY;
3014  }
deny_write_access(struct file * file)3015  static inline int deny_write_access(struct file *file)
3016  {
3017  	struct inode *inode = file_inode(file);
3018  	return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
3019  }
put_write_access(struct inode * inode)3020  static inline void put_write_access(struct inode * inode)
3021  {
3022  	atomic_dec(&inode->i_writecount);
3023  }
allow_write_access(struct file * file)3024  static inline void allow_write_access(struct file *file)
3025  {
3026  	if (file)
3027  		atomic_inc(&file_inode(file)->i_writecount);
3028  }
inode_is_open_for_write(const struct inode * inode)3029  static inline bool inode_is_open_for_write(const struct inode *inode)
3030  {
3031  	return atomic_read(&inode->i_writecount) > 0;
3032  }
3033  
3034  #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
i_readcount_dec(struct inode * inode)3035  static inline void i_readcount_dec(struct inode *inode)
3036  {
3037  	BUG_ON(atomic_dec_return(&inode->i_readcount) < 0);
3038  }
i_readcount_inc(struct inode * inode)3039  static inline void i_readcount_inc(struct inode *inode)
3040  {
3041  	atomic_inc(&inode->i_readcount);
3042  }
3043  #else
i_readcount_dec(struct inode * inode)3044  static inline void i_readcount_dec(struct inode *inode)
3045  {
3046  	return;
3047  }
i_readcount_inc(struct inode * inode)3048  static inline void i_readcount_inc(struct inode *inode)
3049  {
3050  	return;
3051  }
3052  #endif
3053  extern int do_pipe_flags(int *, int);
3054  
3055  extern ssize_t kernel_read(struct file *, void *, size_t, loff_t *);
3056  ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos);
3057  extern ssize_t kernel_write(struct file *, const void *, size_t, loff_t *);
3058  extern ssize_t __kernel_write(struct file *, const void *, size_t, loff_t *);
3059  extern struct file * open_exec(const char *);
3060  
3061  /* fs/dcache.c -- generic fs support functions */
3062  extern bool is_subdir(struct dentry *, struct dentry *);
3063  extern bool path_is_under(const struct path *, const struct path *);
3064  
3065  extern char *file_path(struct file *, char *, int);
3066  
3067  /**
3068   * is_dot_dotdot - returns true only if @name is "." or ".."
3069   * @name: file name to check
3070   * @len: length of file name, in bytes
3071   */
is_dot_dotdot(const char * name,size_t len)3072  static inline bool is_dot_dotdot(const char *name, size_t len)
3073  {
3074  	return len && unlikely(name[0] == '.') &&
3075  		(len == 1 || (len == 2 && name[1] == '.'));
3076  }
3077  
3078  #include <linux/err.h>
3079  
3080  /* needed for stackable file system support */
3081  extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
3082  
3083  extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
3084  
3085  extern int inode_init_always_gfp(struct super_block *, struct inode *, gfp_t);
inode_init_always(struct super_block * sb,struct inode * inode)3086  static inline int inode_init_always(struct super_block *sb, struct inode *inode)
3087  {
3088  	return inode_init_always_gfp(sb, inode, GFP_NOFS);
3089  }
3090  
3091  extern void inode_init_once(struct inode *);
3092  extern void address_space_init_once(struct address_space *mapping);
3093  extern struct inode * igrab(struct inode *);
3094  extern ino_t iunique(struct super_block *, ino_t);
3095  extern int inode_needs_sync(struct inode *inode);
3096  extern int generic_delete_inode(struct inode *inode);
generic_drop_inode(struct inode * inode)3097  static inline int generic_drop_inode(struct inode *inode)
3098  {
3099  	return !inode->i_nlink || inode_unhashed(inode);
3100  }
3101  extern void d_mark_dontcache(struct inode *inode);
3102  
3103  extern struct inode *ilookup5_nowait(struct super_block *sb,
3104  		unsigned long hashval, int (*test)(struct inode *, void *),
3105  		void *data);
3106  extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
3107  		int (*test)(struct inode *, void *), void *data);
3108  extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
3109  
3110  extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
3111  		int (*test)(struct inode *, void *),
3112  		int (*set)(struct inode *, void *),
3113  		void *data);
3114  struct inode *iget5_locked(struct super_block *, unsigned long,
3115  			   int (*test)(struct inode *, void *),
3116  			   int (*set)(struct inode *, void *), void *);
3117  struct inode *iget5_locked_rcu(struct super_block *, unsigned long,
3118  			       int (*test)(struct inode *, void *),
3119  			       int (*set)(struct inode *, void *), void *);
3120  extern struct inode * iget_locked(struct super_block *, unsigned long);
3121  extern struct inode *find_inode_nowait(struct super_block *,
3122  				       unsigned long,
3123  				       int (*match)(struct inode *,
3124  						    unsigned long, void *),
3125  				       void *data);
3126  extern struct inode *find_inode_rcu(struct super_block *, unsigned long,
3127  				    int (*)(struct inode *, void *), void *);
3128  extern struct inode *find_inode_by_ino_rcu(struct super_block *, unsigned long);
3129  extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
3130  extern int insert_inode_locked(struct inode *);
3131  #ifdef CONFIG_DEBUG_LOCK_ALLOC
3132  extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
3133  #else
lockdep_annotate_inode_mutex_key(struct inode * inode)3134  static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
3135  #endif
3136  extern void unlock_new_inode(struct inode *);
3137  extern void discard_new_inode(struct inode *);
3138  extern unsigned int get_next_ino(void);
3139  extern void evict_inodes(struct super_block *sb);
3140  void dump_mapping(const struct address_space *);
3141  
3142  /*
3143   * Userspace may rely on the inode number being non-zero. For example, glibc
3144   * simply ignores files with zero i_ino in unlink() and other places.
3145   *
3146   * As an additional complication, if userspace was compiled with
3147   * _FILE_OFFSET_BITS=32 on a 64-bit kernel we'll only end up reading out the
3148   * lower 32 bits, so we need to check that those aren't zero explicitly. With
3149   * _FILE_OFFSET_BITS=64, this may cause some harmless false-negatives, but
3150   * better safe than sorry.
3151   */
is_zero_ino(ino_t ino)3152  static inline bool is_zero_ino(ino_t ino)
3153  {
3154  	return (u32)ino == 0;
3155  }
3156  
3157  /*
3158   * inode->i_lock must be held
3159   */
__iget(struct inode * inode)3160  static inline void __iget(struct inode *inode)
3161  {
3162  	atomic_inc(&inode->i_count);
3163  }
3164  
3165  extern void iget_failed(struct inode *);
3166  extern void clear_inode(struct inode *);
3167  extern void __destroy_inode(struct inode *);
3168  extern struct inode *new_inode_pseudo(struct super_block *sb);
3169  extern struct inode *new_inode(struct super_block *sb);
3170  extern void free_inode_nonrcu(struct inode *inode);
3171  extern int setattr_should_drop_suidgid(struct mnt_idmap *, struct inode *);
3172  extern int file_remove_privs_flags(struct file *file, unsigned int flags);
3173  extern int file_remove_privs(struct file *);
3174  int setattr_should_drop_sgid(struct mnt_idmap *idmap,
3175  			     const struct inode *inode);
3176  
3177  /*
3178   * This must be used for allocating filesystems specific inodes to set
3179   * up the inode reclaim context correctly.
3180   */
3181  #define alloc_inode_sb(_sb, _cache, _gfp) kmem_cache_alloc_lru(_cache, &_sb->s_inode_lru, _gfp)
3182  
3183  extern void __insert_inode_hash(struct inode *, unsigned long hashval);
insert_inode_hash(struct inode * inode)3184  static inline void insert_inode_hash(struct inode *inode)
3185  {
3186  	__insert_inode_hash(inode, inode->i_ino);
3187  }
3188  
3189  extern void __remove_inode_hash(struct inode *);
remove_inode_hash(struct inode * inode)3190  static inline void remove_inode_hash(struct inode *inode)
3191  {
3192  	if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
3193  		__remove_inode_hash(inode);
3194  }
3195  
3196  extern void inode_sb_list_add(struct inode *inode);
3197  extern void inode_add_lru(struct inode *inode);
3198  
3199  extern int sb_set_blocksize(struct super_block *, int);
3200  extern int sb_min_blocksize(struct super_block *, int);
3201  
3202  extern int generic_file_mmap(struct file *, struct vm_area_struct *);
3203  extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
3204  extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
3205  int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
3206  extern int generic_write_check_limits(struct file *file, loff_t pos,
3207  		loff_t *count);
3208  extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
3209  ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to,
3210  		ssize_t already_read);
3211  extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
3212  extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
3213  extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
3214  extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *);
3215  ssize_t generic_perform_write(struct kiocb *, struct iov_iter *);
3216  ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
3217  		ssize_t direct_written, ssize_t buffered_written);
3218  
3219  ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
3220  		rwf_t flags);
3221  ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
3222  		rwf_t flags);
3223  ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb,
3224  			   struct iov_iter *iter);
3225  ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb,
3226  			    struct iov_iter *iter);
3227  
3228  /* fs/splice.c */
3229  ssize_t filemap_splice_read(struct file *in, loff_t *ppos,
3230  			    struct pipe_inode_info *pipe,
3231  			    size_t len, unsigned int flags);
3232  ssize_t copy_splice_read(struct file *in, loff_t *ppos,
3233  			 struct pipe_inode_info *pipe,
3234  			 size_t len, unsigned int flags);
3235  extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
3236  		struct file *, loff_t *, size_t, unsigned int);
3237  
3238  
3239  extern void
3240  file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
3241  extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
3242  extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
3243  extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
3244  extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
3245  		int whence, loff_t maxsize, loff_t eof);
3246  loff_t generic_llseek_cookie(struct file *file, loff_t offset, int whence,
3247  			     u64 *cookie);
3248  extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
3249  		int whence, loff_t size);
3250  extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
3251  extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
3252  int rw_verify_area(int, struct file *, const loff_t *, size_t);
3253  extern int generic_file_open(struct inode * inode, struct file * filp);
3254  extern int nonseekable_open(struct inode * inode, struct file * filp);
3255  extern int stream_open(struct inode * inode, struct file * filp);
3256  
3257  #ifdef CONFIG_BLOCK
3258  typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
3259  			    loff_t file_offset);
3260  
3261  enum {
3262  	/* need locking between buffered and direct access */
3263  	DIO_LOCKING	= 0x01,
3264  
3265  	/* filesystem does not support filling holes */
3266  	DIO_SKIP_HOLES	= 0x02,
3267  };
3268  
3269  ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
3270  			     struct block_device *bdev, struct iov_iter *iter,
3271  			     get_block_t get_block,
3272  			     dio_iodone_t end_io,
3273  			     int flags);
3274  
blockdev_direct_IO(struct kiocb * iocb,struct inode * inode,struct iov_iter * iter,get_block_t get_block)3275  static inline ssize_t blockdev_direct_IO(struct kiocb *iocb,
3276  					 struct inode *inode,
3277  					 struct iov_iter *iter,
3278  					 get_block_t get_block)
3279  {
3280  	return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
3281  			get_block, NULL, DIO_LOCKING | DIO_SKIP_HOLES);
3282  }
3283  #endif
3284  
3285  bool inode_dio_finished(const struct inode *inode);
3286  void inode_dio_wait(struct inode *inode);
3287  void inode_dio_wait_interruptible(struct inode *inode);
3288  
3289  /**
3290   * inode_dio_begin - signal start of a direct I/O requests
3291   * @inode: inode the direct I/O happens on
3292   *
3293   * This is called once we've finished processing a direct I/O request,
3294   * and is used to wake up callers waiting for direct I/O to be quiesced.
3295   */
inode_dio_begin(struct inode * inode)3296  static inline void inode_dio_begin(struct inode *inode)
3297  {
3298  	atomic_inc(&inode->i_dio_count);
3299  }
3300  
3301  /**
3302   * inode_dio_end - signal finish of a direct I/O requests
3303   * @inode: inode the direct I/O happens on
3304   *
3305   * This is called once we've finished processing a direct I/O request,
3306   * and is used to wake up callers waiting for direct I/O to be quiesced.
3307   */
inode_dio_end(struct inode * inode)3308  static inline void inode_dio_end(struct inode *inode)
3309  {
3310  	if (atomic_dec_and_test(&inode->i_dio_count))
3311  		wake_up_var(&inode->i_dio_count);
3312  }
3313  
3314  extern void inode_set_flags(struct inode *inode, unsigned int flags,
3315  			    unsigned int mask);
3316  
3317  extern const struct file_operations generic_ro_fops;
3318  
3319  #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
3320  
3321  extern int readlink_copy(char __user *, int, const char *);
3322  extern int page_readlink(struct dentry *, char __user *, int);
3323  extern const char *page_get_link(struct dentry *, struct inode *,
3324  				 struct delayed_call *);
3325  extern void page_put_link(void *);
3326  extern int page_symlink(struct inode *inode, const char *symname, int len);
3327  extern const struct inode_operations page_symlink_inode_operations;
3328  extern void kfree_link(void *);
3329  void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
3330  void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
3331  void generic_fill_statx_atomic_writes(struct kstat *stat,
3332  				      unsigned int unit_min,
3333  				      unsigned int unit_max);
3334  extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
3335  extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
3336  void __inode_add_bytes(struct inode *inode, loff_t bytes);
3337  void inode_add_bytes(struct inode *inode, loff_t bytes);
3338  void __inode_sub_bytes(struct inode *inode, loff_t bytes);
3339  void inode_sub_bytes(struct inode *inode, loff_t bytes);
__inode_get_bytes(struct inode * inode)3340  static inline loff_t __inode_get_bytes(struct inode *inode)
3341  {
3342  	return (((loff_t)inode->i_blocks) << 9) + inode->i_bytes;
3343  }
3344  loff_t inode_get_bytes(struct inode *inode);
3345  void inode_set_bytes(struct inode *inode, loff_t bytes);
3346  const char *simple_get_link(struct dentry *, struct inode *,
3347  			    struct delayed_call *);
3348  extern const struct inode_operations simple_symlink_inode_operations;
3349  
3350  extern int iterate_dir(struct file *, struct dir_context *);
3351  
3352  int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat,
3353  		int flags);
3354  int vfs_fstat(int fd, struct kstat *stat);
3355  
vfs_stat(const char __user * filename,struct kstat * stat)3356  static inline int vfs_stat(const char __user *filename, struct kstat *stat)
3357  {
3358  	return vfs_fstatat(AT_FDCWD, filename, stat, 0);
3359  }
vfs_lstat(const char __user * name,struct kstat * stat)3360  static inline int vfs_lstat(const char __user *name, struct kstat *stat)
3361  {
3362  	return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW);
3363  }
3364  
3365  extern const char *vfs_get_link(struct dentry *, struct delayed_call *);
3366  extern int vfs_readlink(struct dentry *, char __user *, int);
3367  
3368  extern struct file_system_type *get_filesystem(struct file_system_type *fs);
3369  extern void put_filesystem(struct file_system_type *fs);
3370  extern struct file_system_type *get_fs_type(const char *name);
3371  extern void drop_super(struct super_block *sb);
3372  extern void drop_super_exclusive(struct super_block *sb);
3373  extern void iterate_supers(void (*)(struct super_block *, void *), void *);
3374  extern void iterate_supers_type(struct file_system_type *,
3375  			        void (*)(struct super_block *, void *), void *);
3376  
3377  extern int dcache_dir_open(struct inode *, struct file *);
3378  extern int dcache_dir_close(struct inode *, struct file *);
3379  extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
3380  extern int dcache_readdir(struct file *, struct dir_context *);
3381  extern int simple_setattr(struct mnt_idmap *, struct dentry *,
3382  			  struct iattr *);
3383  extern int simple_getattr(struct mnt_idmap *, const struct path *,
3384  			  struct kstat *, u32, unsigned int);
3385  extern int simple_statfs(struct dentry *, struct kstatfs *);
3386  extern int simple_open(struct inode *inode, struct file *file);
3387  extern int simple_link(struct dentry *, struct inode *, struct dentry *);
3388  extern int simple_unlink(struct inode *, struct dentry *);
3389  extern int simple_rmdir(struct inode *, struct dentry *);
3390  void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
3391  			     struct inode *new_dir, struct dentry *new_dentry);
3392  extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
3393  				  struct inode *new_dir, struct dentry *new_dentry);
3394  extern int simple_rename(struct mnt_idmap *, struct inode *,
3395  			 struct dentry *, struct inode *, struct dentry *,
3396  			 unsigned int);
3397  extern void simple_recursive_removal(struct dentry *,
3398                                void (*callback)(struct dentry *));
3399  extern int noop_fsync(struct file *, loff_t, loff_t, int);
3400  extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
3401  extern int simple_empty(struct dentry *);
3402  extern int simple_write_begin(struct file *file, struct address_space *mapping,
3403  			loff_t pos, unsigned len,
3404  			struct folio **foliop, void **fsdata);
3405  extern const struct address_space_operations ram_aops;
3406  extern int always_delete_dentry(const struct dentry *);
3407  extern struct inode *alloc_anon_inode(struct super_block *);
3408  extern int simple_nosetlease(struct file *, int, struct file_lease **, void **);
3409  extern const struct dentry_operations simple_dentry_operations;
3410  
3411  extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
3412  extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
3413  extern const struct file_operations simple_dir_operations;
3414  extern const struct inode_operations simple_dir_inode_operations;
3415  extern void make_empty_dir_inode(struct inode *inode);
3416  extern bool is_empty_dir_inode(struct inode *inode);
3417  struct tree_descr { const char *name; const struct file_operations *ops; int mode; };
3418  struct dentry *d_alloc_name(struct dentry *, const char *);
3419  extern int simple_fill_super(struct super_block *, unsigned long,
3420  			     const struct tree_descr *);
3421  extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
3422  extern void simple_release_fs(struct vfsmount **mount, int *count);
3423  
3424  extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
3425  			loff_t *ppos, const void *from, size_t available);
3426  extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
3427  		const void __user *from, size_t count);
3428  
3429  struct offset_ctx {
3430  	struct maple_tree	mt;
3431  	unsigned long		next_offset;
3432  };
3433  
3434  void simple_offset_init(struct offset_ctx *octx);
3435  int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
3436  void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
3437  int simple_offset_empty(struct dentry *dentry);
3438  int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
3439  			 struct inode *new_dir, struct dentry *new_dentry);
3440  int simple_offset_rename_exchange(struct inode *old_dir,
3441  				  struct dentry *old_dentry,
3442  				  struct inode *new_dir,
3443  				  struct dentry *new_dentry);
3444  void simple_offset_destroy(struct offset_ctx *octx);
3445  
3446  extern const struct file_operations simple_offset_dir_operations;
3447  
3448  extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
3449  extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
3450  
3451  extern int generic_check_addressable(unsigned, u64);
3452  
3453  extern void generic_set_sb_d_ops(struct super_block *sb);
3454  extern int generic_ci_match(const struct inode *parent,
3455  			    const struct qstr *name,
3456  			    const struct qstr *folded_name,
3457  			    const u8 *de_name, u32 de_name_len);
3458  
sb_has_encoding(const struct super_block * sb)3459  static inline bool sb_has_encoding(const struct super_block *sb)
3460  {
3461  #if IS_ENABLED(CONFIG_UNICODE)
3462  	return !!sb->s_encoding;
3463  #else
3464  	return false;
3465  #endif
3466  }
3467  
3468  int may_setattr(struct mnt_idmap *idmap, struct inode *inode,
3469  		unsigned int ia_valid);
3470  int setattr_prepare(struct mnt_idmap *, struct dentry *, struct iattr *);
3471  extern int inode_newsize_ok(const struct inode *, loff_t offset);
3472  void setattr_copy(struct mnt_idmap *, struct inode *inode,
3473  		  const struct iattr *attr);
3474  
3475  extern int file_update_time(struct file *file);
3476  
vma_is_dax(const struct vm_area_struct * vma)3477  static inline bool vma_is_dax(const struct vm_area_struct *vma)
3478  {
3479  	return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host);
3480  }
3481  
vma_is_fsdax(struct vm_area_struct * vma)3482  static inline bool vma_is_fsdax(struct vm_area_struct *vma)
3483  {
3484  	struct inode *inode;
3485  
3486  	if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file)
3487  		return false;
3488  	if (!vma_is_dax(vma))
3489  		return false;
3490  	inode = file_inode(vma->vm_file);
3491  	if (S_ISCHR(inode->i_mode))
3492  		return false; /* device-dax */
3493  	return true;
3494  }
3495  
iocb_flags(struct file * file)3496  static inline int iocb_flags(struct file *file)
3497  {
3498  	int res = 0;
3499  	if (file->f_flags & O_APPEND)
3500  		res |= IOCB_APPEND;
3501  	if (file->f_flags & O_DIRECT)
3502  		res |= IOCB_DIRECT;
3503  	if (file->f_flags & O_DSYNC)
3504  		res |= IOCB_DSYNC;
3505  	if (file->f_flags & __O_SYNC)
3506  		res |= IOCB_SYNC;
3507  	return res;
3508  }
3509  
kiocb_set_rw_flags(struct kiocb * ki,rwf_t flags,int rw_type)3510  static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags,
3511  				     int rw_type)
3512  {
3513  	int kiocb_flags = 0;
3514  
3515  	/* make sure there's no overlap between RWF and private IOCB flags */
3516  	BUILD_BUG_ON((__force int) RWF_SUPPORTED & IOCB_EVENTFD);
3517  
3518  	if (!flags)
3519  		return 0;
3520  	if (unlikely(flags & ~RWF_SUPPORTED))
3521  		return -EOPNOTSUPP;
3522  	if (unlikely((flags & RWF_APPEND) && (flags & RWF_NOAPPEND)))
3523  		return -EINVAL;
3524  
3525  	if (flags & RWF_NOWAIT) {
3526  		if (!(ki->ki_filp->f_mode & FMODE_NOWAIT))
3527  			return -EOPNOTSUPP;
3528  	}
3529  	if (flags & RWF_ATOMIC) {
3530  		if (rw_type != WRITE)
3531  			return -EOPNOTSUPP;
3532  		if (!(ki->ki_filp->f_mode & FMODE_CAN_ATOMIC_WRITE))
3533  			return -EOPNOTSUPP;
3534  	}
3535  	kiocb_flags |= (__force int) (flags & RWF_SUPPORTED);
3536  	if (flags & RWF_SYNC)
3537  		kiocb_flags |= IOCB_DSYNC;
3538  
3539  	if ((flags & RWF_NOAPPEND) && (ki->ki_flags & IOCB_APPEND)) {
3540  		if (IS_APPEND(file_inode(ki->ki_filp)))
3541  			return -EPERM;
3542  		ki->ki_flags &= ~IOCB_APPEND;
3543  	}
3544  
3545  	ki->ki_flags |= kiocb_flags;
3546  	return 0;
3547  }
3548  
3549  /* Transaction based IO helpers */
3550  
3551  /*
3552   * An argresp is stored in an allocated page and holds the
3553   * size of the argument or response, along with its content
3554   */
3555  struct simple_transaction_argresp {
3556  	ssize_t size;
3557  	char data[];
3558  };
3559  
3560  #define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
3561  
3562  char *simple_transaction_get(struct file *file, const char __user *buf,
3563  				size_t size);
3564  ssize_t simple_transaction_read(struct file *file, char __user *buf,
3565  				size_t size, loff_t *pos);
3566  int simple_transaction_release(struct inode *inode, struct file *file);
3567  
3568  void simple_transaction_set(struct file *file, size_t n);
3569  
3570  /*
3571   * simple attribute files
3572   *
3573   * These attributes behave similar to those in sysfs:
3574   *
3575   * Writing to an attribute immediately sets a value, an open file can be
3576   * written to multiple times.
3577   *
3578   * Reading from an attribute creates a buffer from the value that might get
3579   * read with multiple read calls. When the attribute has been read
3580   * completely, no further read calls are possible until the file is opened
3581   * again.
3582   *
3583   * All attributes contain a text representation of a numeric value
3584   * that are accessed with the get() and set() functions.
3585   */
3586  #define DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, __is_signed)	\
3587  static int __fops ## _open(struct inode *inode, struct file *file)	\
3588  {									\
3589  	__simple_attr_check_format(__fmt, 0ull);			\
3590  	return simple_attr_open(inode, file, __get, __set, __fmt);	\
3591  }									\
3592  static const struct file_operations __fops = {				\
3593  	.owner	 = THIS_MODULE,						\
3594  	.open	 = __fops ## _open,					\
3595  	.release = simple_attr_release,					\
3596  	.read	 = simple_attr_read,					\
3597  	.write	 = (__is_signed) ? simple_attr_write_signed : simple_attr_write,	\
3598  	.llseek	 = generic_file_llseek,					\
3599  }
3600  
3601  #define DEFINE_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt)		\
3602  	DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, false)
3603  
3604  #define DEFINE_SIMPLE_ATTRIBUTE_SIGNED(__fops, __get, __set, __fmt)	\
3605  	DEFINE_SIMPLE_ATTRIBUTE_XSIGNED(__fops, __get, __set, __fmt, true)
3606  
3607  static inline __printf(1, 2)
__simple_attr_check_format(const char * fmt,...)3608  void __simple_attr_check_format(const char *fmt, ...)
3609  {
3610  	/* don't do anything, just let the compiler check the arguments; */
3611  }
3612  
3613  int simple_attr_open(struct inode *inode, struct file *file,
3614  		     int (*get)(void *, u64 *), int (*set)(void *, u64),
3615  		     const char *fmt);
3616  int simple_attr_release(struct inode *inode, struct file *file);
3617  ssize_t simple_attr_read(struct file *file, char __user *buf,
3618  			 size_t len, loff_t *ppos);
3619  ssize_t simple_attr_write(struct file *file, const char __user *buf,
3620  			  size_t len, loff_t *ppos);
3621  ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
3622  				 size_t len, loff_t *ppos);
3623  
3624  struct ctl_table;
3625  int __init list_bdev_fs_names(char *buf, size_t size);
3626  
3627  #define __FMODE_EXEC		((__force int) FMODE_EXEC)
3628  #define __FMODE_NONOTIFY	((__force int) FMODE_NONOTIFY)
3629  
3630  #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
3631  #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
3632  					    (flag & __FMODE_NONOTIFY)))
3633  
is_sxid(umode_t mode)3634  static inline bool is_sxid(umode_t mode)
3635  {
3636  	return mode & (S_ISUID | S_ISGID);
3637  }
3638  
check_sticky(struct mnt_idmap * idmap,struct inode * dir,struct inode * inode)3639  static inline int check_sticky(struct mnt_idmap *idmap,
3640  			       struct inode *dir, struct inode *inode)
3641  {
3642  	if (!(dir->i_mode & S_ISVTX))
3643  		return 0;
3644  
3645  	return __check_sticky(idmap, dir, inode);
3646  }
3647  
inode_has_no_xattr(struct inode * inode)3648  static inline void inode_has_no_xattr(struct inode *inode)
3649  {
3650  	if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & SB_NOSEC))
3651  		inode->i_flags |= S_NOSEC;
3652  }
3653  
is_root_inode(struct inode * inode)3654  static inline bool is_root_inode(struct inode *inode)
3655  {
3656  	return inode == inode->i_sb->s_root->d_inode;
3657  }
3658  
dir_emit(struct dir_context * ctx,const char * name,int namelen,u64 ino,unsigned type)3659  static inline bool dir_emit(struct dir_context *ctx,
3660  			    const char *name, int namelen,
3661  			    u64 ino, unsigned type)
3662  {
3663  	return ctx->actor(ctx, name, namelen, ctx->pos, ino, type);
3664  }
dir_emit_dot(struct file * file,struct dir_context * ctx)3665  static inline bool dir_emit_dot(struct file *file, struct dir_context *ctx)
3666  {
3667  	return ctx->actor(ctx, ".", 1, ctx->pos,
3668  			  file->f_path.dentry->d_inode->i_ino, DT_DIR);
3669  }
dir_emit_dotdot(struct file * file,struct dir_context * ctx)3670  static inline bool dir_emit_dotdot(struct file *file, struct dir_context *ctx)
3671  {
3672  	return ctx->actor(ctx, "..", 2, ctx->pos,
3673  			  d_parent_ino(file->f_path.dentry), DT_DIR);
3674  }
dir_emit_dots(struct file * file,struct dir_context * ctx)3675  static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx)
3676  {
3677  	if (ctx->pos == 0) {
3678  		if (!dir_emit_dot(file, ctx))
3679  			return false;
3680  		ctx->pos = 1;
3681  	}
3682  	if (ctx->pos == 1) {
3683  		if (!dir_emit_dotdot(file, ctx))
3684  			return false;
3685  		ctx->pos = 2;
3686  	}
3687  	return true;
3688  }
dir_relax(struct inode * inode)3689  static inline bool dir_relax(struct inode *inode)
3690  {
3691  	inode_unlock(inode);
3692  	inode_lock(inode);
3693  	return !IS_DEADDIR(inode);
3694  }
3695  
dir_relax_shared(struct inode * inode)3696  static inline bool dir_relax_shared(struct inode *inode)
3697  {
3698  	inode_unlock_shared(inode);
3699  	inode_lock_shared(inode);
3700  	return !IS_DEADDIR(inode);
3701  }
3702  
3703  extern bool path_noexec(const struct path *path);
3704  extern void inode_nohighmem(struct inode *inode);
3705  
3706  /* mm/fadvise.c */
3707  extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
3708  		       int advice);
3709  extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
3710  			   int advice);
3711  
vfs_empty_path(int dfd,const char __user * path)3712  static inline bool vfs_empty_path(int dfd, const char __user *path)
3713  {
3714  	char c;
3715  
3716  	if (dfd < 0)
3717  		return false;
3718  
3719  	/* We now allow NULL to be used for empty path. */
3720  	if (!path)
3721  		return true;
3722  
3723  	if (unlikely(get_user(c, path)))
3724  		return false;
3725  
3726  	return !c;
3727  }
3728  
3729  bool generic_atomic_write_valid(struct iov_iter *iter, loff_t pos);
3730  
3731  #endif /* _LINUX_FS_H */
3732