1  /* SPDX-License-Identifier: GPL-2.0 */
2  #ifndef _LINUX_PAGEMAP_H
3  #define _LINUX_PAGEMAP_H
4  
5  /*
6   * Copyright 1995 Linus Torvalds
7   */
8  #include <linux/mm.h>
9  #include <linux/fs.h>
10  #include <linux/list.h>
11  #include <linux/highmem.h>
12  #include <linux/compiler.h>
13  #include <linux/uaccess.h>
14  #include <linux/gfp.h>
15  #include <linux/bitops.h>
16  #include <linux/hardirq.h> /* for in_interrupt() */
17  #include <linux/hugetlb_inline.h>
18  
19  struct folio_batch;
20  
21  unsigned long invalidate_mapping_pages(struct address_space *mapping,
22  					pgoff_t start, pgoff_t end);
23  
invalidate_remote_inode(struct inode * inode)24  static inline void invalidate_remote_inode(struct inode *inode)
25  {
26  	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
27  	    S_ISLNK(inode->i_mode))
28  		invalidate_mapping_pages(inode->i_mapping, 0, -1);
29  }
30  int invalidate_inode_pages2(struct address_space *mapping);
31  int invalidate_inode_pages2_range(struct address_space *mapping,
32  		pgoff_t start, pgoff_t end);
33  int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
34  void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
35  int filemap_invalidate_pages(struct address_space *mapping,
36  			     loff_t pos, loff_t end, bool nowait);
37  
38  int write_inode_now(struct inode *, int sync);
39  int filemap_fdatawrite(struct address_space *);
40  int filemap_flush(struct address_space *);
41  int filemap_fdatawait_keep_errors(struct address_space *mapping);
42  int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
43  int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
44  		loff_t start_byte, loff_t end_byte);
45  int filemap_invalidate_inode(struct inode *inode, bool flush,
46  			     loff_t start, loff_t end);
47  
filemap_fdatawait(struct address_space * mapping)48  static inline int filemap_fdatawait(struct address_space *mapping)
49  {
50  	return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
51  }
52  
53  bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
54  int filemap_write_and_wait_range(struct address_space *mapping,
55  		loff_t lstart, loff_t lend);
56  int __filemap_fdatawrite_range(struct address_space *mapping,
57  		loff_t start, loff_t end, int sync_mode);
58  int filemap_fdatawrite_range(struct address_space *mapping,
59  		loff_t start, loff_t end);
60  int filemap_check_errors(struct address_space *mapping);
61  void __filemap_set_wb_err(struct address_space *mapping, int err);
62  int filemap_fdatawrite_wbc(struct address_space *mapping,
63  			   struct writeback_control *wbc);
64  int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
65  
filemap_write_and_wait(struct address_space * mapping)66  static inline int filemap_write_and_wait(struct address_space *mapping)
67  {
68  	return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
69  }
70  
71  /**
72   * filemap_set_wb_err - set a writeback error on an address_space
73   * @mapping: mapping in which to set writeback error
74   * @err: error to be set in mapping
75   *
76   * When writeback fails in some way, we must record that error so that
77   * userspace can be informed when fsync and the like are called.  We endeavor
78   * to report errors on any file that was open at the time of the error.  Some
79   * internal callers also need to know when writeback errors have occurred.
80   *
81   * When a writeback error occurs, most filesystems will want to call
82   * filemap_set_wb_err to record the error in the mapping so that it will be
83   * automatically reported whenever fsync is called on the file.
84   */
filemap_set_wb_err(struct address_space * mapping,int err)85  static inline void filemap_set_wb_err(struct address_space *mapping, int err)
86  {
87  	/* Fastpath for common case of no error */
88  	if (unlikely(err))
89  		__filemap_set_wb_err(mapping, err);
90  }
91  
92  /**
93   * filemap_check_wb_err - has an error occurred since the mark was sampled?
94   * @mapping: mapping to check for writeback errors
95   * @since: previously-sampled errseq_t
96   *
97   * Grab the errseq_t value from the mapping, and see if it has changed "since"
98   * the given value was sampled.
99   *
100   * If it has then report the latest error set, otherwise return 0.
101   */
filemap_check_wb_err(struct address_space * mapping,errseq_t since)102  static inline int filemap_check_wb_err(struct address_space *mapping,
103  					errseq_t since)
104  {
105  	return errseq_check(&mapping->wb_err, since);
106  }
107  
108  /**
109   * filemap_sample_wb_err - sample the current errseq_t to test for later errors
110   * @mapping: mapping to be sampled
111   *
112   * Writeback errors are always reported relative to a particular sample point
113   * in the past. This function provides those sample points.
114   */
filemap_sample_wb_err(struct address_space * mapping)115  static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
116  {
117  	return errseq_sample(&mapping->wb_err);
118  }
119  
120  /**
121   * file_sample_sb_err - sample the current errseq_t to test for later errors
122   * @file: file pointer to be sampled
123   *
124   * Grab the most current superblock-level errseq_t value for the given
125   * struct file.
126   */
file_sample_sb_err(struct file * file)127  static inline errseq_t file_sample_sb_err(struct file *file)
128  {
129  	return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
130  }
131  
132  /*
133   * Flush file data before changing attributes.  Caller must hold any locks
134   * required to prevent further writes to this file until we're done setting
135   * flags.
136   */
inode_drain_writes(struct inode * inode)137  static inline int inode_drain_writes(struct inode *inode)
138  {
139  	inode_dio_wait(inode);
140  	return filemap_write_and_wait(inode->i_mapping);
141  }
142  
mapping_empty(struct address_space * mapping)143  static inline bool mapping_empty(struct address_space *mapping)
144  {
145  	return xa_empty(&mapping->i_pages);
146  }
147  
148  /*
149   * mapping_shrinkable - test if page cache state allows inode reclaim
150   * @mapping: the page cache mapping
151   *
152   * This checks the mapping's cache state for the pupose of inode
153   * reclaim and LRU management.
154   *
155   * The caller is expected to hold the i_lock, but is not required to
156   * hold the i_pages lock, which usually protects cache state. That's
157   * because the i_lock and the list_lru lock that protect the inode and
158   * its LRU state don't nest inside the irq-safe i_pages lock.
159   *
160   * Cache deletions are performed under the i_lock, which ensures that
161   * when an inode goes empty, it will reliably get queued on the LRU.
162   *
163   * Cache additions do not acquire the i_lock and may race with this
164   * check, in which case we'll report the inode as shrinkable when it
165   * has cache pages. This is okay: the shrinker also checks the
166   * refcount and the referenced bit, which will be elevated or set in
167   * the process of adding new cache pages to an inode.
168   */
mapping_shrinkable(struct address_space * mapping)169  static inline bool mapping_shrinkable(struct address_space *mapping)
170  {
171  	void *head;
172  
173  	/*
174  	 * On highmem systems, there could be lowmem pressure from the
175  	 * inodes before there is highmem pressure from the page
176  	 * cache. Make inodes shrinkable regardless of cache state.
177  	 */
178  	if (IS_ENABLED(CONFIG_HIGHMEM))
179  		return true;
180  
181  	/* Cache completely empty? Shrink away. */
182  	head = rcu_access_pointer(mapping->i_pages.xa_head);
183  	if (!head)
184  		return true;
185  
186  	/*
187  	 * The xarray stores single offset-0 entries directly in the
188  	 * head pointer, which allows non-resident page cache entries
189  	 * to escape the shadow shrinker's list of xarray nodes. The
190  	 * inode shrinker needs to pick them up under memory pressure.
191  	 */
192  	if (!xa_is_node(head) && xa_is_value(head))
193  		return true;
194  
195  	return false;
196  }
197  
198  /*
199   * Bits in mapping->flags.
200   */
201  enum mapping_flags {
202  	AS_EIO		= 0,	/* IO error on async write */
203  	AS_ENOSPC	= 1,	/* ENOSPC on async write */
204  	AS_MM_ALL_LOCKS	= 2,	/* under mm_take_all_locks() */
205  	AS_UNEVICTABLE	= 3,	/* e.g., ramdisk, SHM_LOCK */
206  	AS_EXITING	= 4, 	/* final truncate in progress */
207  	/* writeback related tags are not used */
208  	AS_NO_WRITEBACK_TAGS = 5,
209  	AS_RELEASE_ALWAYS = 6,	/* Call ->release_folio(), even if no private data */
210  	AS_STABLE_WRITES = 7,	/* must wait for writeback before modifying
211  				   folio contents */
212  	AS_INACCESSIBLE = 8,	/* Do not attempt direct R/W access to the mapping */
213  	/* Bits 16-25 are used for FOLIO_ORDER */
214  	AS_FOLIO_ORDER_BITS = 5,
215  	AS_FOLIO_ORDER_MIN = 16,
216  	AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS,
217  };
218  
219  #define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1)
220  #define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN)
221  #define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX)
222  #define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK)
223  
224  /**
225   * mapping_set_error - record a writeback error in the address_space
226   * @mapping: the mapping in which an error should be set
227   * @error: the error to set in the mapping
228   *
229   * When writeback fails in some way, we must record that error so that
230   * userspace can be informed when fsync and the like are called.  We endeavor
231   * to report errors on any file that was open at the time of the error.  Some
232   * internal callers also need to know when writeback errors have occurred.
233   *
234   * When a writeback error occurs, most filesystems will want to call
235   * mapping_set_error to record the error in the mapping so that it can be
236   * reported when the application calls fsync(2).
237   */
mapping_set_error(struct address_space * mapping,int error)238  static inline void mapping_set_error(struct address_space *mapping, int error)
239  {
240  	if (likely(!error))
241  		return;
242  
243  	/* Record in wb_err for checkers using errseq_t based tracking */
244  	__filemap_set_wb_err(mapping, error);
245  
246  	/* Record it in superblock */
247  	if (mapping->host)
248  		errseq_set(&mapping->host->i_sb->s_wb_err, error);
249  
250  	/* Record it in flags for now, for legacy callers */
251  	if (error == -ENOSPC)
252  		set_bit(AS_ENOSPC, &mapping->flags);
253  	else
254  		set_bit(AS_EIO, &mapping->flags);
255  }
256  
mapping_set_unevictable(struct address_space * mapping)257  static inline void mapping_set_unevictable(struct address_space *mapping)
258  {
259  	set_bit(AS_UNEVICTABLE, &mapping->flags);
260  }
261  
mapping_clear_unevictable(struct address_space * mapping)262  static inline void mapping_clear_unevictable(struct address_space *mapping)
263  {
264  	clear_bit(AS_UNEVICTABLE, &mapping->flags);
265  }
266  
mapping_unevictable(struct address_space * mapping)267  static inline bool mapping_unevictable(struct address_space *mapping)
268  {
269  	return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
270  }
271  
mapping_set_exiting(struct address_space * mapping)272  static inline void mapping_set_exiting(struct address_space *mapping)
273  {
274  	set_bit(AS_EXITING, &mapping->flags);
275  }
276  
mapping_exiting(struct address_space * mapping)277  static inline int mapping_exiting(struct address_space *mapping)
278  {
279  	return test_bit(AS_EXITING, &mapping->flags);
280  }
281  
mapping_set_no_writeback_tags(struct address_space * mapping)282  static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
283  {
284  	set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
285  }
286  
mapping_use_writeback_tags(struct address_space * mapping)287  static inline int mapping_use_writeback_tags(struct address_space *mapping)
288  {
289  	return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
290  }
291  
mapping_release_always(const struct address_space * mapping)292  static inline bool mapping_release_always(const struct address_space *mapping)
293  {
294  	return test_bit(AS_RELEASE_ALWAYS, &mapping->flags);
295  }
296  
mapping_set_release_always(struct address_space * mapping)297  static inline void mapping_set_release_always(struct address_space *mapping)
298  {
299  	set_bit(AS_RELEASE_ALWAYS, &mapping->flags);
300  }
301  
mapping_clear_release_always(struct address_space * mapping)302  static inline void mapping_clear_release_always(struct address_space *mapping)
303  {
304  	clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
305  }
306  
mapping_stable_writes(const struct address_space * mapping)307  static inline bool mapping_stable_writes(const struct address_space *mapping)
308  {
309  	return test_bit(AS_STABLE_WRITES, &mapping->flags);
310  }
311  
mapping_set_stable_writes(struct address_space * mapping)312  static inline void mapping_set_stable_writes(struct address_space *mapping)
313  {
314  	set_bit(AS_STABLE_WRITES, &mapping->flags);
315  }
316  
mapping_clear_stable_writes(struct address_space * mapping)317  static inline void mapping_clear_stable_writes(struct address_space *mapping)
318  {
319  	clear_bit(AS_STABLE_WRITES, &mapping->flags);
320  }
321  
mapping_set_inaccessible(struct address_space * mapping)322  static inline void mapping_set_inaccessible(struct address_space *mapping)
323  {
324  	/*
325  	 * It's expected inaccessible mappings are also unevictable. Compaction
326  	 * migrate scanner (isolate_migratepages_block()) relies on this to
327  	 * reduce page locking.
328  	 */
329  	set_bit(AS_UNEVICTABLE, &mapping->flags);
330  	set_bit(AS_INACCESSIBLE, &mapping->flags);
331  }
332  
mapping_inaccessible(struct address_space * mapping)333  static inline bool mapping_inaccessible(struct address_space *mapping)
334  {
335  	return test_bit(AS_INACCESSIBLE, &mapping->flags);
336  }
337  
mapping_gfp_mask(struct address_space * mapping)338  static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
339  {
340  	return mapping->gfp_mask;
341  }
342  
343  /* Restricts the given gfp_mask to what the mapping allows. */
mapping_gfp_constraint(struct address_space * mapping,gfp_t gfp_mask)344  static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
345  		gfp_t gfp_mask)
346  {
347  	return mapping_gfp_mask(mapping) & gfp_mask;
348  }
349  
350  /*
351   * This is non-atomic.  Only to be used before the mapping is activated.
352   * Probably needs a barrier...
353   */
mapping_set_gfp_mask(struct address_space * m,gfp_t mask)354  static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
355  {
356  	m->gfp_mask = mask;
357  }
358  
359  /*
360   * There are some parts of the kernel which assume that PMD entries
361   * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
362   * limit the maximum allocation order to PMD size.  I'm not aware of any
363   * assumptions about maximum order if THP are disabled, but 8 seems like
364   * a good order (that's 1MB if you're using 4kB pages)
365   */
366  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
367  #define PREFERRED_MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
368  #else
369  #define PREFERRED_MAX_PAGECACHE_ORDER	8
370  #endif
371  
372  /*
373   * xas_split_alloc() does not support arbitrary orders. This implies no
374   * 512MB THP on ARM64 with 64KB base page size.
375   */
376  #define MAX_XAS_ORDER		(XA_CHUNK_SHIFT * 2 - 1)
377  #define MAX_PAGECACHE_ORDER	min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)
378  
379  /*
380   * mapping_max_folio_size_supported() - Check the max folio size supported
381   *
382   * The filesystem should call this function at mount time if there is a
383   * requirement on the folio mapping size in the page cache.
384   */
mapping_max_folio_size_supported(void)385  static inline size_t mapping_max_folio_size_supported(void)
386  {
387  	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
388  		return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER);
389  	return PAGE_SIZE;
390  }
391  
392  /*
393   * mapping_set_folio_order_range() - Set the orders supported by a file.
394   * @mapping: The address space of the file.
395   * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive).
396   * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive).
397   *
398   * The filesystem should call this function in its inode constructor to
399   * indicate which base size (min) and maximum size (max) of folio the VFS
400   * can use to cache the contents of the file.  This should only be used
401   * if the filesystem needs special handling of folio sizes (ie there is
402   * something the core cannot know).
403   * Do not tune it based on, eg, i_size.
404   *
405   * Context: This should not be called while the inode is active as it
406   * is non-atomic.
407   */
mapping_set_folio_order_range(struct address_space * mapping,unsigned int min,unsigned int max)408  static inline void mapping_set_folio_order_range(struct address_space *mapping,
409  						 unsigned int min,
410  						 unsigned int max)
411  {
412  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
413  		return;
414  
415  	if (min > MAX_PAGECACHE_ORDER)
416  		min = MAX_PAGECACHE_ORDER;
417  
418  	if (max > MAX_PAGECACHE_ORDER)
419  		max = MAX_PAGECACHE_ORDER;
420  
421  	if (max < min)
422  		max = min;
423  
424  	mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) |
425  		(min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX);
426  }
427  
mapping_set_folio_min_order(struct address_space * mapping,unsigned int min)428  static inline void mapping_set_folio_min_order(struct address_space *mapping,
429  					       unsigned int min)
430  {
431  	mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER);
432  }
433  
434  /**
435   * mapping_set_large_folios() - Indicate the file supports large folios.
436   * @mapping: The address space of the file.
437   *
438   * The filesystem should call this function in its inode constructor to
439   * indicate that the VFS can use large folios to cache the contents of
440   * the file.
441   *
442   * Context: This should not be called while the inode is active as it
443   * is non-atomic.
444   */
mapping_set_large_folios(struct address_space * mapping)445  static inline void mapping_set_large_folios(struct address_space *mapping)
446  {
447  	mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER);
448  }
449  
450  static inline unsigned int
mapping_max_folio_order(const struct address_space * mapping)451  mapping_max_folio_order(const struct address_space *mapping)
452  {
453  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
454  		return 0;
455  	return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX;
456  }
457  
458  static inline unsigned int
mapping_min_folio_order(const struct address_space * mapping)459  mapping_min_folio_order(const struct address_space *mapping)
460  {
461  	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
462  		return 0;
463  	return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN;
464  }
465  
466  static inline unsigned long
mapping_min_folio_nrpages(struct address_space * mapping)467  mapping_min_folio_nrpages(struct address_space *mapping)
468  {
469  	return 1UL << mapping_min_folio_order(mapping);
470  }
471  
472  /**
473   * mapping_align_index() - Align index for this mapping.
474   * @mapping: The address_space.
475   * @index: The page index.
476   *
477   * The index of a folio must be naturally aligned.  If you are adding a
478   * new folio to the page cache and need to know what index to give it,
479   * call this function.
480   */
mapping_align_index(struct address_space * mapping,pgoff_t index)481  static inline pgoff_t mapping_align_index(struct address_space *mapping,
482  					  pgoff_t index)
483  {
484  	return round_down(index, mapping_min_folio_nrpages(mapping));
485  }
486  
487  /*
488   * Large folio support currently depends on THP.  These dependencies are
489   * being worked on but are not yet fixed.
490   */
mapping_large_folio_support(struct address_space * mapping)491  static inline bool mapping_large_folio_support(struct address_space *mapping)
492  {
493  	/* AS_FOLIO_ORDER is only reasonable for pagecache folios */
494  	VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON,
495  			"Anonymous mapping always supports large folio");
496  
497  	return mapping_max_folio_order(mapping) > 0;
498  }
499  
500  /* Return the maximum folio size for this pagecache mapping, in bytes. */
mapping_max_folio_size(const struct address_space * mapping)501  static inline size_t mapping_max_folio_size(const struct address_space *mapping)
502  {
503  	return PAGE_SIZE << mapping_max_folio_order(mapping);
504  }
505  
filemap_nr_thps(struct address_space * mapping)506  static inline int filemap_nr_thps(struct address_space *mapping)
507  {
508  #ifdef CONFIG_READ_ONLY_THP_FOR_FS
509  	return atomic_read(&mapping->nr_thps);
510  #else
511  	return 0;
512  #endif
513  }
514  
filemap_nr_thps_inc(struct address_space * mapping)515  static inline void filemap_nr_thps_inc(struct address_space *mapping)
516  {
517  #ifdef CONFIG_READ_ONLY_THP_FOR_FS
518  	if (!mapping_large_folio_support(mapping))
519  		atomic_inc(&mapping->nr_thps);
520  #else
521  	WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
522  #endif
523  }
524  
filemap_nr_thps_dec(struct address_space * mapping)525  static inline void filemap_nr_thps_dec(struct address_space *mapping)
526  {
527  #ifdef CONFIG_READ_ONLY_THP_FOR_FS
528  	if (!mapping_large_folio_support(mapping))
529  		atomic_dec(&mapping->nr_thps);
530  #else
531  	WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
532  #endif
533  }
534  
535  struct address_space *folio_mapping(struct folio *);
536  struct address_space *swapcache_mapping(struct folio *);
537  
538  /**
539   * folio_file_mapping - Find the mapping this folio belongs to.
540   * @folio: The folio.
541   *
542   * For folios which are in the page cache, return the mapping that this
543   * page belongs to.  Folios in the swap cache return the mapping of the
544   * swap file or swap device where the data is stored.  This is different
545   * from the mapping returned by folio_mapping().  The only reason to
546   * use it is if, like NFS, you return 0 from ->activate_swapfile.
547   *
548   * Do not call this for folios which aren't in the page cache or swap cache.
549   */
folio_file_mapping(struct folio * folio)550  static inline struct address_space *folio_file_mapping(struct folio *folio)
551  {
552  	if (unlikely(folio_test_swapcache(folio)))
553  		return swapcache_mapping(folio);
554  
555  	return folio->mapping;
556  }
557  
558  /**
559   * folio_flush_mapping - Find the file mapping this folio belongs to.
560   * @folio: The folio.
561   *
562   * For folios which are in the page cache, return the mapping that this
563   * page belongs to.  Anonymous folios return NULL, even if they're in
564   * the swap cache.  Other kinds of folio also return NULL.
565   *
566   * This is ONLY used by architecture cache flushing code.  If you aren't
567   * writing cache flushing code, you want either folio_mapping() or
568   * folio_file_mapping().
569   */
folio_flush_mapping(struct folio * folio)570  static inline struct address_space *folio_flush_mapping(struct folio *folio)
571  {
572  	if (unlikely(folio_test_swapcache(folio)))
573  		return NULL;
574  
575  	return folio_mapping(folio);
576  }
577  
page_file_mapping(struct page * page)578  static inline struct address_space *page_file_mapping(struct page *page)
579  {
580  	return folio_file_mapping(page_folio(page));
581  }
582  
583  /**
584   * folio_inode - Get the host inode for this folio.
585   * @folio: The folio.
586   *
587   * For folios which are in the page cache, return the inode that this folio
588   * belongs to.
589   *
590   * Do not call this for folios which aren't in the page cache.
591   */
folio_inode(struct folio * folio)592  static inline struct inode *folio_inode(struct folio *folio)
593  {
594  	return folio->mapping->host;
595  }
596  
597  /**
598   * folio_attach_private - Attach private data to a folio.
599   * @folio: Folio to attach data to.
600   * @data: Data to attach to folio.
601   *
602   * Attaching private data to a folio increments the page's reference count.
603   * The data must be detached before the folio will be freed.
604   */
folio_attach_private(struct folio * folio,void * data)605  static inline void folio_attach_private(struct folio *folio, void *data)
606  {
607  	folio_get(folio);
608  	folio->private = data;
609  	folio_set_private(folio);
610  }
611  
612  /**
613   * folio_change_private - Change private data on a folio.
614   * @folio: Folio to change the data on.
615   * @data: Data to set on the folio.
616   *
617   * Change the private data attached to a folio and return the old
618   * data.  The page must previously have had data attached and the data
619   * must be detached before the folio will be freed.
620   *
621   * Return: Data that was previously attached to the folio.
622   */
folio_change_private(struct folio * folio,void * data)623  static inline void *folio_change_private(struct folio *folio, void *data)
624  {
625  	void *old = folio_get_private(folio);
626  
627  	folio->private = data;
628  	return old;
629  }
630  
631  /**
632   * folio_detach_private - Detach private data from a folio.
633   * @folio: Folio to detach data from.
634   *
635   * Removes the data that was previously attached to the folio and decrements
636   * the refcount on the page.
637   *
638   * Return: Data that was attached to the folio.
639   */
folio_detach_private(struct folio * folio)640  static inline void *folio_detach_private(struct folio *folio)
641  {
642  	void *data = folio_get_private(folio);
643  
644  	if (!folio_test_private(folio))
645  		return NULL;
646  	folio_clear_private(folio);
647  	folio->private = NULL;
648  	folio_put(folio);
649  
650  	return data;
651  }
652  
attach_page_private(struct page * page,void * data)653  static inline void attach_page_private(struct page *page, void *data)
654  {
655  	folio_attach_private(page_folio(page), data);
656  }
657  
detach_page_private(struct page * page)658  static inline void *detach_page_private(struct page *page)
659  {
660  	return folio_detach_private(page_folio(page));
661  }
662  
663  #ifdef CONFIG_NUMA
664  struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order);
665  #else
filemap_alloc_folio_noprof(gfp_t gfp,unsigned int order)666  static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order)
667  {
668  	return folio_alloc_noprof(gfp, order);
669  }
670  #endif
671  
672  #define filemap_alloc_folio(...)				\
673  	alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__))
674  
__page_cache_alloc(gfp_t gfp)675  static inline struct page *__page_cache_alloc(gfp_t gfp)
676  {
677  	return &filemap_alloc_folio(gfp, 0)->page;
678  }
679  
readahead_gfp_mask(struct address_space * x)680  static inline gfp_t readahead_gfp_mask(struct address_space *x)
681  {
682  	return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
683  }
684  
685  typedef int filler_t(struct file *, struct folio *);
686  
687  pgoff_t page_cache_next_miss(struct address_space *mapping,
688  			     pgoff_t index, unsigned long max_scan);
689  pgoff_t page_cache_prev_miss(struct address_space *mapping,
690  			     pgoff_t index, unsigned long max_scan);
691  
692  /**
693   * typedef fgf_t - Flags for getting folios from the page cache.
694   *
695   * Most users of the page cache will not need to use these flags;
696   * there are convenience functions such as filemap_get_folio() and
697   * filemap_lock_folio().  For users which need more control over exactly
698   * what is done with the folios, these flags to __filemap_get_folio()
699   * are available.
700   *
701   * * %FGP_ACCESSED - The folio will be marked accessed.
702   * * %FGP_LOCK - The folio is returned locked.
703   * * %FGP_CREAT - If no folio is present then a new folio is allocated,
704   *   added to the page cache and the VM's LRU list.  The folio is
705   *   returned locked.
706   * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
707   *   folio is already in cache.  If the folio was allocated, unlock it
708   *   before returning so the caller can do the same dance.
709   * * %FGP_WRITE - The folio will be written to by the caller.
710   * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
711   * * %FGP_NOWAIT - Don't block on the folio lock.
712   * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
713   * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
714   *   implementation.
715   */
716  typedef unsigned int __bitwise fgf_t;
717  
718  #define FGP_ACCESSED		((__force fgf_t)0x00000001)
719  #define FGP_LOCK		((__force fgf_t)0x00000002)
720  #define FGP_CREAT		((__force fgf_t)0x00000004)
721  #define FGP_WRITE		((__force fgf_t)0x00000008)
722  #define FGP_NOFS		((__force fgf_t)0x00000010)
723  #define FGP_NOWAIT		((__force fgf_t)0x00000020)
724  #define FGP_FOR_MMAP		((__force fgf_t)0x00000040)
725  #define FGP_STABLE		((__force fgf_t)0x00000080)
726  #define FGF_GET_ORDER(fgf)	(((__force unsigned)fgf) >> 26)	/* top 6 bits */
727  
728  #define FGP_WRITEBEGIN		(FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
729  
730  /**
731   * fgf_set_order - Encode a length in the fgf_t flags.
732   * @size: The suggested size of the folio to create.
733   *
734   * The caller of __filemap_get_folio() can use this to suggest a preferred
735   * size for the folio that is created.  If there is already a folio at
736   * the index, it will be returned, no matter what its size.  If a folio
737   * is freshly created, it may be of a different size than requested
738   * due to alignment constraints, memory pressure, or the presence of
739   * other folios at nearby indices.
740   */
fgf_set_order(size_t size)741  static inline fgf_t fgf_set_order(size_t size)
742  {
743  	unsigned int shift = ilog2(size);
744  
745  	if (shift <= PAGE_SHIFT)
746  		return 0;
747  	return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
748  }
749  
750  void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
751  struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
752  		fgf_t fgp_flags, gfp_t gfp);
753  struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
754  		fgf_t fgp_flags, gfp_t gfp);
755  
756  /**
757   * filemap_get_folio - Find and get a folio.
758   * @mapping: The address_space to search.
759   * @index: The page index.
760   *
761   * Looks up the page cache entry at @mapping & @index.  If a folio is
762   * present, it is returned with an increased refcount.
763   *
764   * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
765   * this index.  Will not return a shadow, swap or DAX entry.
766   */
filemap_get_folio(struct address_space * mapping,pgoff_t index)767  static inline struct folio *filemap_get_folio(struct address_space *mapping,
768  					pgoff_t index)
769  {
770  	return __filemap_get_folio(mapping, index, 0, 0);
771  }
772  
773  /**
774   * filemap_lock_folio - Find and lock a folio.
775   * @mapping: The address_space to search.
776   * @index: The page index.
777   *
778   * Looks up the page cache entry at @mapping & @index.  If a folio is
779   * present, it is returned locked with an increased refcount.
780   *
781   * Context: May sleep.
782   * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
783   * this index.  Will not return a shadow, swap or DAX entry.
784   */
filemap_lock_folio(struct address_space * mapping,pgoff_t index)785  static inline struct folio *filemap_lock_folio(struct address_space *mapping,
786  					pgoff_t index)
787  {
788  	return __filemap_get_folio(mapping, index, FGP_LOCK, 0);
789  }
790  
791  /**
792   * filemap_grab_folio - grab a folio from the page cache
793   * @mapping: The address space to search
794   * @index: The page index
795   *
796   * Looks up the page cache entry at @mapping & @index. If no folio is found,
797   * a new folio is created. The folio is locked, marked as accessed, and
798   * returned.
799   *
800   * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found
801   * and failed to create a folio.
802   */
filemap_grab_folio(struct address_space * mapping,pgoff_t index)803  static inline struct folio *filemap_grab_folio(struct address_space *mapping,
804  					pgoff_t index)
805  {
806  	return __filemap_get_folio(mapping, index,
807  			FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
808  			mapping_gfp_mask(mapping));
809  }
810  
811  /**
812   * find_get_page - find and get a page reference
813   * @mapping: the address_space to search
814   * @offset: the page index
815   *
816   * Looks up the page cache slot at @mapping & @offset.  If there is a
817   * page cache page, it is returned with an increased refcount.
818   *
819   * Otherwise, %NULL is returned.
820   */
find_get_page(struct address_space * mapping,pgoff_t offset)821  static inline struct page *find_get_page(struct address_space *mapping,
822  					pgoff_t offset)
823  {
824  	return pagecache_get_page(mapping, offset, 0, 0);
825  }
826  
find_get_page_flags(struct address_space * mapping,pgoff_t offset,fgf_t fgp_flags)827  static inline struct page *find_get_page_flags(struct address_space *mapping,
828  					pgoff_t offset, fgf_t fgp_flags)
829  {
830  	return pagecache_get_page(mapping, offset, fgp_flags, 0);
831  }
832  
833  /**
834   * find_lock_page - locate, pin and lock a pagecache page
835   * @mapping: the address_space to search
836   * @index: the page index
837   *
838   * Looks up the page cache entry at @mapping & @index.  If there is a
839   * page cache page, it is returned locked and with an increased
840   * refcount.
841   *
842   * Context: May sleep.
843   * Return: A struct page or %NULL if there is no page in the cache for this
844   * index.
845   */
find_lock_page(struct address_space * mapping,pgoff_t index)846  static inline struct page *find_lock_page(struct address_space *mapping,
847  					pgoff_t index)
848  {
849  	return pagecache_get_page(mapping, index, FGP_LOCK, 0);
850  }
851  
852  /**
853   * find_or_create_page - locate or add a pagecache page
854   * @mapping: the page's address_space
855   * @index: the page's index into the mapping
856   * @gfp_mask: page allocation mode
857   *
858   * Looks up the page cache slot at @mapping & @offset.  If there is a
859   * page cache page, it is returned locked and with an increased
860   * refcount.
861   *
862   * If the page is not present, a new page is allocated using @gfp_mask
863   * and added to the page cache and the VM's LRU list.  The page is
864   * returned locked and with an increased refcount.
865   *
866   * On memory exhaustion, %NULL is returned.
867   *
868   * find_or_create_page() may sleep, even if @gfp_flags specifies an
869   * atomic allocation!
870   */
find_or_create_page(struct address_space * mapping,pgoff_t index,gfp_t gfp_mask)871  static inline struct page *find_or_create_page(struct address_space *mapping,
872  					pgoff_t index, gfp_t gfp_mask)
873  {
874  	return pagecache_get_page(mapping, index,
875  					FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
876  					gfp_mask);
877  }
878  
879  /**
880   * grab_cache_page_nowait - returns locked page at given index in given cache
881   * @mapping: target address_space
882   * @index: the page index
883   *
884   * Same as grab_cache_page(), but do not wait if the page is unavailable.
885   * This is intended for speculative data generators, where the data can
886   * be regenerated if the page couldn't be grabbed.  This routine should
887   * be safe to call while holding the lock for another page.
888   *
889   * Clear __GFP_FS when allocating the page to avoid recursion into the fs
890   * and deadlock against the caller's locked page.
891   */
grab_cache_page_nowait(struct address_space * mapping,pgoff_t index)892  static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
893  				pgoff_t index)
894  {
895  	return pagecache_get_page(mapping, index,
896  			FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
897  			mapping_gfp_mask(mapping));
898  }
899  
900  extern pgoff_t __folio_swap_cache_index(struct folio *folio);
901  
902  /**
903   * folio_index - File index of a folio.
904   * @folio: The folio.
905   *
906   * For a folio which is either in the page cache or the swap cache,
907   * return its index within the address_space it belongs to.  If you know
908   * the page is definitely in the page cache, you can look at the folio's
909   * index directly.
910   *
911   * Return: The index (offset in units of pages) of a folio in its file.
912   */
folio_index(struct folio * folio)913  static inline pgoff_t folio_index(struct folio *folio)
914  {
915  	if (unlikely(folio_test_swapcache(folio)))
916  		return __folio_swap_cache_index(folio);
917  	return folio->index;
918  }
919  
920  /**
921   * folio_next_index - Get the index of the next folio.
922   * @folio: The current folio.
923   *
924   * Return: The index of the folio which follows this folio in the file.
925   */
folio_next_index(struct folio * folio)926  static inline pgoff_t folio_next_index(struct folio *folio)
927  {
928  	return folio->index + folio_nr_pages(folio);
929  }
930  
931  /**
932   * folio_file_page - The page for a particular index.
933   * @folio: The folio which contains this index.
934   * @index: The index we want to look up.
935   *
936   * Sometimes after looking up a folio in the page cache, we need to
937   * obtain the specific page for an index (eg a page fault).
938   *
939   * Return: The page containing the file data for this index.
940   */
folio_file_page(struct folio * folio,pgoff_t index)941  static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
942  {
943  	return folio_page(folio, index & (folio_nr_pages(folio) - 1));
944  }
945  
946  /**
947   * folio_contains - Does this folio contain this index?
948   * @folio: The folio.
949   * @index: The page index within the file.
950   *
951   * Context: The caller should have the page locked in order to prevent
952   * (eg) shmem from moving the page between the page cache and swap cache
953   * and changing its index in the middle of the operation.
954   * Return: true or false.
955   */
folio_contains(struct folio * folio,pgoff_t index)956  static inline bool folio_contains(struct folio *folio, pgoff_t index)
957  {
958  	return index - folio_index(folio) < folio_nr_pages(folio);
959  }
960  
961  /*
962   * Given the page we found in the page cache, return the page corresponding
963   * to this index in the file
964   */
find_subpage(struct page * head,pgoff_t index)965  static inline struct page *find_subpage(struct page *head, pgoff_t index)
966  {
967  	/* HugeTLBfs wants the head page regardless */
968  	if (PageHuge(head))
969  		return head;
970  
971  	return head + (index & (thp_nr_pages(head) - 1));
972  }
973  
974  unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
975  		pgoff_t end, struct folio_batch *fbatch);
976  unsigned filemap_get_folios_contig(struct address_space *mapping,
977  		pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
978  unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
979  		pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
980  
981  struct page *grab_cache_page_write_begin(struct address_space *mapping,
982  			pgoff_t index);
983  
984  /*
985   * Returns locked page at given index in given cache, creating it if needed.
986   */
grab_cache_page(struct address_space * mapping,pgoff_t index)987  static inline struct page *grab_cache_page(struct address_space *mapping,
988  								pgoff_t index)
989  {
990  	return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
991  }
992  
993  struct folio *read_cache_folio(struct address_space *, pgoff_t index,
994  		filler_t *filler, struct file *file);
995  struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index,
996  		gfp_t flags);
997  struct page *read_cache_page(struct address_space *, pgoff_t index,
998  		filler_t *filler, struct file *file);
999  extern struct page * read_cache_page_gfp(struct address_space *mapping,
1000  				pgoff_t index, gfp_t gfp_mask);
1001  
read_mapping_page(struct address_space * mapping,pgoff_t index,struct file * file)1002  static inline struct page *read_mapping_page(struct address_space *mapping,
1003  				pgoff_t index, struct file *file)
1004  {
1005  	return read_cache_page(mapping, index, NULL, file);
1006  }
1007  
read_mapping_folio(struct address_space * mapping,pgoff_t index,struct file * file)1008  static inline struct folio *read_mapping_folio(struct address_space *mapping,
1009  				pgoff_t index, struct file *file)
1010  {
1011  	return read_cache_folio(mapping, index, NULL, file);
1012  }
1013  
1014  /*
1015   * Get the offset in PAGE_SIZE (even for hugetlb pages).
1016   */
page_to_pgoff(struct page * page)1017  static inline pgoff_t page_to_pgoff(struct page *page)
1018  {
1019  	struct page *head;
1020  
1021  	if (likely(!PageTransTail(page)))
1022  		return page->index;
1023  
1024  	head = compound_head(page);
1025  	/*
1026  	 *  We don't initialize ->index for tail pages: calculate based on
1027  	 *  head page
1028  	 */
1029  	return head->index + page - head;
1030  }
1031  
1032  /*
1033   * Return byte-offset into filesystem object for page.
1034   */
page_offset(struct page * page)1035  static inline loff_t page_offset(struct page *page)
1036  {
1037  	return ((loff_t)page->index) << PAGE_SHIFT;
1038  }
1039  
1040  /**
1041   * folio_pos - Returns the byte position of this folio in its file.
1042   * @folio: The folio.
1043   */
folio_pos(struct folio * folio)1044  static inline loff_t folio_pos(struct folio *folio)
1045  {
1046  	return page_offset(&folio->page);
1047  }
1048  
1049  /*
1050   * Get the offset in PAGE_SIZE (even for hugetlb folios).
1051   */
folio_pgoff(struct folio * folio)1052  static inline pgoff_t folio_pgoff(struct folio *folio)
1053  {
1054  	return folio->index;
1055  }
1056  
linear_page_index(struct vm_area_struct * vma,unsigned long address)1057  static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
1058  					unsigned long address)
1059  {
1060  	pgoff_t pgoff;
1061  	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
1062  	pgoff += vma->vm_pgoff;
1063  	return pgoff;
1064  }
1065  
1066  struct wait_page_key {
1067  	struct folio *folio;
1068  	int bit_nr;
1069  	int page_match;
1070  };
1071  
1072  struct wait_page_queue {
1073  	struct folio *folio;
1074  	int bit_nr;
1075  	wait_queue_entry_t wait;
1076  };
1077  
wake_page_match(struct wait_page_queue * wait_page,struct wait_page_key * key)1078  static inline bool wake_page_match(struct wait_page_queue *wait_page,
1079  				  struct wait_page_key *key)
1080  {
1081  	if (wait_page->folio != key->folio)
1082  	       return false;
1083  	key->page_match = 1;
1084  
1085  	if (wait_page->bit_nr != key->bit_nr)
1086  		return false;
1087  
1088  	return true;
1089  }
1090  
1091  void __folio_lock(struct folio *folio);
1092  int __folio_lock_killable(struct folio *folio);
1093  vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf);
1094  void unlock_page(struct page *page);
1095  void folio_unlock(struct folio *folio);
1096  
1097  /**
1098   * folio_trylock() - Attempt to lock a folio.
1099   * @folio: The folio to attempt to lock.
1100   *
1101   * Sometimes it is undesirable to wait for a folio to be unlocked (eg
1102   * when the locks are being taken in the wrong order, or if making
1103   * progress through a batch of folios is more important than processing
1104   * them in order).  Usually folio_lock() is the correct function to call.
1105   *
1106   * Context: Any context.
1107   * Return: Whether the lock was successfully acquired.
1108   */
folio_trylock(struct folio * folio)1109  static inline bool folio_trylock(struct folio *folio)
1110  {
1111  	return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
1112  }
1113  
1114  /*
1115   * Return true if the page was successfully locked
1116   */
trylock_page(struct page * page)1117  static inline bool trylock_page(struct page *page)
1118  {
1119  	return folio_trylock(page_folio(page));
1120  }
1121  
1122  /**
1123   * folio_lock() - Lock this folio.
1124   * @folio: The folio to lock.
1125   *
1126   * The folio lock protects against many things, probably more than it
1127   * should.  It is primarily held while a folio is being brought uptodate,
1128   * either from its backing file or from swap.  It is also held while a
1129   * folio is being truncated from its address_space, so holding the lock
1130   * is sufficient to keep folio->mapping stable.
1131   *
1132   * The folio lock is also held while write() is modifying the page to
1133   * provide POSIX atomicity guarantees (as long as the write does not
1134   * cross a page boundary).  Other modifications to the data in the folio
1135   * do not hold the folio lock and can race with writes, eg DMA and stores
1136   * to mapped pages.
1137   *
1138   * Context: May sleep.  If you need to acquire the locks of two or
1139   * more folios, they must be in order of ascending index, if they are
1140   * in the same address_space.  If they are in different address_spaces,
1141   * acquire the lock of the folio which belongs to the address_space which
1142   * has the lowest address in memory first.
1143   */
folio_lock(struct folio * folio)1144  static inline void folio_lock(struct folio *folio)
1145  {
1146  	might_sleep();
1147  	if (!folio_trylock(folio))
1148  		__folio_lock(folio);
1149  }
1150  
1151  /**
1152   * lock_page() - Lock the folio containing this page.
1153   * @page: The page to lock.
1154   *
1155   * See folio_lock() for a description of what the lock protects.
1156   * This is a legacy function and new code should probably use folio_lock()
1157   * instead.
1158   *
1159   * Context: May sleep.  Pages in the same folio share a lock, so do not
1160   * attempt to lock two pages which share a folio.
1161   */
lock_page(struct page * page)1162  static inline void lock_page(struct page *page)
1163  {
1164  	struct folio *folio;
1165  	might_sleep();
1166  
1167  	folio = page_folio(page);
1168  	if (!folio_trylock(folio))
1169  		__folio_lock(folio);
1170  }
1171  
1172  /**
1173   * folio_lock_killable() - Lock this folio, interruptible by a fatal signal.
1174   * @folio: The folio to lock.
1175   *
1176   * Attempts to lock the folio, like folio_lock(), except that the sleep
1177   * to acquire the lock is interruptible by a fatal signal.
1178   *
1179   * Context: May sleep; see folio_lock().
1180   * Return: 0 if the lock was acquired; -EINTR if a fatal signal was received.
1181   */
folio_lock_killable(struct folio * folio)1182  static inline int folio_lock_killable(struct folio *folio)
1183  {
1184  	might_sleep();
1185  	if (!folio_trylock(folio))
1186  		return __folio_lock_killable(folio);
1187  	return 0;
1188  }
1189  
1190  /*
1191   * folio_lock_or_retry - Lock the folio, unless this would block and the
1192   * caller indicated that it can handle a retry.
1193   *
1194   * Return value and mmap_lock implications depend on flags; see
1195   * __folio_lock_or_retry().
1196   */
folio_lock_or_retry(struct folio * folio,struct vm_fault * vmf)1197  static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
1198  					     struct vm_fault *vmf)
1199  {
1200  	might_sleep();
1201  	if (!folio_trylock(folio))
1202  		return __folio_lock_or_retry(folio, vmf);
1203  	return 0;
1204  }
1205  
1206  /*
1207   * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
1208   * and should not be used directly.
1209   */
1210  void folio_wait_bit(struct folio *folio, int bit_nr);
1211  int folio_wait_bit_killable(struct folio *folio, int bit_nr);
1212  
1213  /*
1214   * Wait for a folio to be unlocked.
1215   *
1216   * This must be called with the caller "holding" the folio,
1217   * ie with increased folio reference count so that the folio won't
1218   * go away during the wait.
1219   */
folio_wait_locked(struct folio * folio)1220  static inline void folio_wait_locked(struct folio *folio)
1221  {
1222  	if (folio_test_locked(folio))
1223  		folio_wait_bit(folio, PG_locked);
1224  }
1225  
folio_wait_locked_killable(struct folio * folio)1226  static inline int folio_wait_locked_killable(struct folio *folio)
1227  {
1228  	if (!folio_test_locked(folio))
1229  		return 0;
1230  	return folio_wait_bit_killable(folio, PG_locked);
1231  }
1232  
wait_on_page_locked(struct page * page)1233  static inline void wait_on_page_locked(struct page *page)
1234  {
1235  	folio_wait_locked(page_folio(page));
1236  }
1237  
1238  void folio_end_read(struct folio *folio, bool success);
1239  void wait_on_page_writeback(struct page *page);
1240  void folio_wait_writeback(struct folio *folio);
1241  int folio_wait_writeback_killable(struct folio *folio);
1242  void end_page_writeback(struct page *page);
1243  void folio_end_writeback(struct folio *folio);
1244  void wait_for_stable_page(struct page *page);
1245  void folio_wait_stable(struct folio *folio);
1246  void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
1247  void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
1248  void __folio_cancel_dirty(struct folio *folio);
folio_cancel_dirty(struct folio * folio)1249  static inline void folio_cancel_dirty(struct folio *folio)
1250  {
1251  	/* Avoid atomic ops, locking, etc. when not actually needed. */
1252  	if (folio_test_dirty(folio))
1253  		__folio_cancel_dirty(folio);
1254  }
1255  bool folio_clear_dirty_for_io(struct folio *folio);
1256  bool clear_page_dirty_for_io(struct page *page);
1257  void folio_invalidate(struct folio *folio, size_t offset, size_t length);
1258  bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);
1259  
1260  #ifdef CONFIG_MIGRATION
1261  int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
1262  		struct folio *src, enum migrate_mode mode);
1263  #else
1264  #define filemap_migrate_folio NULL
1265  #endif
1266  void folio_end_private_2(struct folio *folio);
1267  void folio_wait_private_2(struct folio *folio);
1268  int folio_wait_private_2_killable(struct folio *folio);
1269  
1270  /*
1271   * Add an arbitrary waiter to a page's wait queue
1272   */
1273  void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
1274  
1275  /*
1276   * Fault in userspace address range.
1277   */
1278  size_t fault_in_writeable(char __user *uaddr, size_t size);
1279  size_t fault_in_subpage_writeable(char __user *uaddr, size_t size);
1280  size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
1281  size_t fault_in_readable(const char __user *uaddr, size_t size);
1282  
1283  int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
1284  		pgoff_t index, gfp_t gfp);
1285  int filemap_add_folio(struct address_space *mapping, struct folio *folio,
1286  		pgoff_t index, gfp_t gfp);
1287  void filemap_remove_folio(struct folio *folio);
1288  void __filemap_remove_folio(struct folio *folio, void *shadow);
1289  void replace_page_cache_folio(struct folio *old, struct folio *new);
1290  void delete_from_page_cache_batch(struct address_space *mapping,
1291  				  struct folio_batch *fbatch);
1292  bool filemap_release_folio(struct folio *folio, gfp_t gfp);
1293  loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
1294  		int whence);
1295  
1296  /* Must be non-static for BPF error injection */
1297  int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
1298  		pgoff_t index, gfp_t gfp, void **shadowp);
1299  
1300  bool filemap_range_has_writeback(struct address_space *mapping,
1301  				 loff_t start_byte, loff_t end_byte);
1302  
1303  /**
1304   * filemap_range_needs_writeback - check if range potentially needs writeback
1305   * @mapping:           address space within which to check
1306   * @start_byte:        offset in bytes where the range starts
1307   * @end_byte:          offset in bytes where the range ends (inclusive)
1308   *
1309   * Find at least one page in the range supplied, usually used to check if
1310   * direct writing in this range will trigger a writeback. Used by O_DIRECT
1311   * read/write with IOCB_NOWAIT, to see if the caller needs to do
1312   * filemap_write_and_wait_range() before proceeding.
1313   *
1314   * Return: %true if the caller should do filemap_write_and_wait_range() before
1315   * doing O_DIRECT to a page in this range, %false otherwise.
1316   */
filemap_range_needs_writeback(struct address_space * mapping,loff_t start_byte,loff_t end_byte)1317  static inline bool filemap_range_needs_writeback(struct address_space *mapping,
1318  						 loff_t start_byte,
1319  						 loff_t end_byte)
1320  {
1321  	if (!mapping->nrpages)
1322  		return false;
1323  	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
1324  	    !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
1325  		return false;
1326  	return filemap_range_has_writeback(mapping, start_byte, end_byte);
1327  }
1328  
1329  /**
1330   * struct readahead_control - Describes a readahead request.
1331   *
1332   * A readahead request is for consecutive pages.  Filesystems which
1333   * implement the ->readahead method should call readahead_page() or
1334   * readahead_page_batch() in a loop and attempt to start I/O against
1335   * each page in the request.
1336   *
1337   * Most of the fields in this struct are private and should be accessed
1338   * by the functions below.
1339   *
1340   * @file: The file, used primarily by network filesystems for authentication.
1341   *	  May be NULL if invoked internally by the filesystem.
1342   * @mapping: Readahead this filesystem object.
1343   * @ra: File readahead state.  May be NULL.
1344   */
1345  struct readahead_control {
1346  	struct file *file;
1347  	struct address_space *mapping;
1348  	struct file_ra_state *ra;
1349  /* private: use the readahead_* accessors instead */
1350  	pgoff_t _index;
1351  	unsigned int _nr_pages;
1352  	unsigned int _batch_count;
1353  	bool _workingset;
1354  	unsigned long _pflags;
1355  };
1356  
1357  #define DEFINE_READAHEAD(ractl, f, r, m, i)				\
1358  	struct readahead_control ractl = {				\
1359  		.file = f,						\
1360  		.mapping = m,						\
1361  		.ra = r,						\
1362  		._index = i,						\
1363  	}
1364  
1365  #define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
1366  
1367  void page_cache_ra_unbounded(struct readahead_control *,
1368  		unsigned long nr_to_read, unsigned long lookahead_count);
1369  void page_cache_sync_ra(struct readahead_control *, unsigned long req_count);
1370  void page_cache_async_ra(struct readahead_control *, struct folio *,
1371  		unsigned long req_count);
1372  void readahead_expand(struct readahead_control *ractl,
1373  		      loff_t new_start, size_t new_len);
1374  
1375  /**
1376   * page_cache_sync_readahead - generic file readahead
1377   * @mapping: address_space which holds the pagecache and I/O vectors
1378   * @ra: file_ra_state which holds the readahead state
1379   * @file: Used by the filesystem for authentication.
1380   * @index: Index of first page to be read.
1381   * @req_count: Total number of pages being read by the caller.
1382   *
1383   * page_cache_sync_readahead() should be called when a cache miss happened:
1384   * it will submit the read.  The readahead logic may decide to piggyback more
1385   * pages onto the read request if access patterns suggest it will improve
1386   * performance.
1387   */
1388  static inline
page_cache_sync_readahead(struct address_space * mapping,struct file_ra_state * ra,struct file * file,pgoff_t index,unsigned long req_count)1389  void page_cache_sync_readahead(struct address_space *mapping,
1390  		struct file_ra_state *ra, struct file *file, pgoff_t index,
1391  		unsigned long req_count)
1392  {
1393  	DEFINE_READAHEAD(ractl, file, ra, mapping, index);
1394  	page_cache_sync_ra(&ractl, req_count);
1395  }
1396  
1397  /**
1398   * page_cache_async_readahead - file readahead for marked pages
1399   * @mapping: address_space which holds the pagecache and I/O vectors
1400   * @ra: file_ra_state which holds the readahead state
1401   * @file: Used by the filesystem for authentication.
1402   * @folio: The folio which triggered the readahead call.
1403   * @req_count: Total number of pages being read by the caller.
1404   *
1405   * page_cache_async_readahead() should be called when a page is used which
1406   * is marked as PageReadahead; this is a marker to suggest that the application
1407   * has used up enough of the readahead window that we should start pulling in
1408   * more pages.
1409   */
1410  static inline
page_cache_async_readahead(struct address_space * mapping,struct file_ra_state * ra,struct file * file,struct folio * folio,unsigned long req_count)1411  void page_cache_async_readahead(struct address_space *mapping,
1412  		struct file_ra_state *ra, struct file *file,
1413  		struct folio *folio, unsigned long req_count)
1414  {
1415  	DEFINE_READAHEAD(ractl, file, ra, mapping, folio->index);
1416  	page_cache_async_ra(&ractl, folio, req_count);
1417  }
1418  
__readahead_folio(struct readahead_control * ractl)1419  static inline struct folio *__readahead_folio(struct readahead_control *ractl)
1420  {
1421  	struct folio *folio;
1422  
1423  	BUG_ON(ractl->_batch_count > ractl->_nr_pages);
1424  	ractl->_nr_pages -= ractl->_batch_count;
1425  	ractl->_index += ractl->_batch_count;
1426  
1427  	if (!ractl->_nr_pages) {
1428  		ractl->_batch_count = 0;
1429  		return NULL;
1430  	}
1431  
1432  	folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
1433  	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
1434  	ractl->_batch_count = folio_nr_pages(folio);
1435  
1436  	return folio;
1437  }
1438  
1439  /**
1440   * readahead_page - Get the next page to read.
1441   * @ractl: The current readahead request.
1442   *
1443   * Context: The page is locked and has an elevated refcount.  The caller
1444   * should decreases the refcount once the page has been submitted for I/O
1445   * and unlock the page once all I/O to that page has completed.
1446   * Return: A pointer to the next page, or %NULL if we are done.
1447   */
readahead_page(struct readahead_control * ractl)1448  static inline struct page *readahead_page(struct readahead_control *ractl)
1449  {
1450  	struct folio *folio = __readahead_folio(ractl);
1451  
1452  	return &folio->page;
1453  }
1454  
1455  /**
1456   * readahead_folio - Get the next folio to read.
1457   * @ractl: The current readahead request.
1458   *
1459   * Context: The folio is locked.  The caller should unlock the folio once
1460   * all I/O to that folio has completed.
1461   * Return: A pointer to the next folio, or %NULL if we are done.
1462   */
readahead_folio(struct readahead_control * ractl)1463  static inline struct folio *readahead_folio(struct readahead_control *ractl)
1464  {
1465  	struct folio *folio = __readahead_folio(ractl);
1466  
1467  	if (folio)
1468  		folio_put(folio);
1469  	return folio;
1470  }
1471  
__readahead_batch(struct readahead_control * rac,struct page ** array,unsigned int array_sz)1472  static inline unsigned int __readahead_batch(struct readahead_control *rac,
1473  		struct page **array, unsigned int array_sz)
1474  {
1475  	unsigned int i = 0;
1476  	XA_STATE(xas, &rac->mapping->i_pages, 0);
1477  	struct page *page;
1478  
1479  	BUG_ON(rac->_batch_count > rac->_nr_pages);
1480  	rac->_nr_pages -= rac->_batch_count;
1481  	rac->_index += rac->_batch_count;
1482  	rac->_batch_count = 0;
1483  
1484  	xas_set(&xas, rac->_index);
1485  	rcu_read_lock();
1486  	xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
1487  		if (xas_retry(&xas, page))
1488  			continue;
1489  		VM_BUG_ON_PAGE(!PageLocked(page), page);
1490  		VM_BUG_ON_PAGE(PageTail(page), page);
1491  		array[i++] = page;
1492  		rac->_batch_count += thp_nr_pages(page);
1493  		if (i == array_sz)
1494  			break;
1495  	}
1496  	rcu_read_unlock();
1497  
1498  	return i;
1499  }
1500  
1501  /**
1502   * readahead_page_batch - Get a batch of pages to read.
1503   * @rac: The current readahead request.
1504   * @array: An array of pointers to struct page.
1505   *
1506   * Context: The pages are locked and have an elevated refcount.  The caller
1507   * should decreases the refcount once the page has been submitted for I/O
1508   * and unlock the page once all I/O to that page has completed.
1509   * Return: The number of pages placed in the array.  0 indicates the request
1510   * is complete.
1511   */
1512  #define readahead_page_batch(rac, array)				\
1513  	__readahead_batch(rac, array, ARRAY_SIZE(array))
1514  
1515  /**
1516   * readahead_pos - The byte offset into the file of this readahead request.
1517   * @rac: The readahead request.
1518   */
readahead_pos(struct readahead_control * rac)1519  static inline loff_t readahead_pos(struct readahead_control *rac)
1520  {
1521  	return (loff_t)rac->_index * PAGE_SIZE;
1522  }
1523  
1524  /**
1525   * readahead_length - The number of bytes in this readahead request.
1526   * @rac: The readahead request.
1527   */
readahead_length(struct readahead_control * rac)1528  static inline size_t readahead_length(struct readahead_control *rac)
1529  {
1530  	return rac->_nr_pages * PAGE_SIZE;
1531  }
1532  
1533  /**
1534   * readahead_index - The index of the first page in this readahead request.
1535   * @rac: The readahead request.
1536   */
readahead_index(struct readahead_control * rac)1537  static inline pgoff_t readahead_index(struct readahead_control *rac)
1538  {
1539  	return rac->_index;
1540  }
1541  
1542  /**
1543   * readahead_count - The number of pages in this readahead request.
1544   * @rac: The readahead request.
1545   */
readahead_count(struct readahead_control * rac)1546  static inline unsigned int readahead_count(struct readahead_control *rac)
1547  {
1548  	return rac->_nr_pages;
1549  }
1550  
1551  /**
1552   * readahead_batch_length - The number of bytes in the current batch.
1553   * @rac: The readahead request.
1554   */
readahead_batch_length(struct readahead_control * rac)1555  static inline size_t readahead_batch_length(struct readahead_control *rac)
1556  {
1557  	return rac->_batch_count * PAGE_SIZE;
1558  }
1559  
dir_pages(struct inode * inode)1560  static inline unsigned long dir_pages(struct inode *inode)
1561  {
1562  	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
1563  			       PAGE_SHIFT;
1564  }
1565  
1566  /**
1567   * folio_mkwrite_check_truncate - check if folio was truncated
1568   * @folio: the folio to check
1569   * @inode: the inode to check the folio against
1570   *
1571   * Return: the number of bytes in the folio up to EOF,
1572   * or -EFAULT if the folio was truncated.
1573   */
folio_mkwrite_check_truncate(struct folio * folio,struct inode * inode)1574  static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
1575  					      struct inode *inode)
1576  {
1577  	loff_t size = i_size_read(inode);
1578  	pgoff_t index = size >> PAGE_SHIFT;
1579  	size_t offset = offset_in_folio(folio, size);
1580  
1581  	if (!folio->mapping)
1582  		return -EFAULT;
1583  
1584  	/* folio is wholly inside EOF */
1585  	if (folio_next_index(folio) - 1 < index)
1586  		return folio_size(folio);
1587  	/* folio is wholly past EOF */
1588  	if (folio->index > index || !offset)
1589  		return -EFAULT;
1590  	/* folio is partially inside EOF */
1591  	return offset;
1592  }
1593  
1594  /**
1595   * page_mkwrite_check_truncate - check if page was truncated
1596   * @page: the page to check
1597   * @inode: the inode to check the page against
1598   *
1599   * Returns the number of bytes in the page up to EOF,
1600   * or -EFAULT if the page was truncated.
1601   */
page_mkwrite_check_truncate(struct page * page,struct inode * inode)1602  static inline int page_mkwrite_check_truncate(struct page *page,
1603  					      struct inode *inode)
1604  {
1605  	loff_t size = i_size_read(inode);
1606  	pgoff_t index = size >> PAGE_SHIFT;
1607  	int offset = offset_in_page(size);
1608  
1609  	if (page->mapping != inode->i_mapping)
1610  		return -EFAULT;
1611  
1612  	/* page is wholly inside EOF */
1613  	if (page->index < index)
1614  		return PAGE_SIZE;
1615  	/* page is wholly past EOF */
1616  	if (page->index > index || !offset)
1617  		return -EFAULT;
1618  	/* page is partially inside EOF */
1619  	return offset;
1620  }
1621  
1622  /**
1623   * i_blocks_per_folio - How many blocks fit in this folio.
1624   * @inode: The inode which contains the blocks.
1625   * @folio: The folio.
1626   *
1627   * If the block size is larger than the size of this folio, return zero.
1628   *
1629   * Context: The caller should hold a refcount on the folio to prevent it
1630   * from being split.
1631   * Return: The number of filesystem blocks covered by this folio.
1632   */
1633  static inline
i_blocks_per_folio(struct inode * inode,struct folio * folio)1634  unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
1635  {
1636  	return folio_size(folio) >> inode->i_blkbits;
1637  }
1638  #endif /* _LINUX_PAGEMAP_H */
1639