1  // SPDX-License-Identifier: GPL-2.0
2  
3  #include <linux/slab.h>
4  #include "messages.h"
5  #include "ctree.h"
6  #include "subpage.h"
7  #include "btrfs_inode.h"
8  
9  /*
10   * Subpage (sectorsize < PAGE_SIZE) support overview:
11   *
12   * Limitations:
13   *
14   * - Only support 64K page size for now
15   *   This is to make metadata handling easier, as 64K page would ensure
16   *   all nodesize would fit inside one page, thus we don't need to handle
17   *   cases where a tree block crosses several pages.
18   *
19   * - Only metadata read-write for now
20   *   The data read-write part is in development.
21   *
22   * - Metadata can't cross 64K page boundary
23   *   btrfs-progs and kernel have done that for a while, thus only ancient
24   *   filesystems could have such problem.  For such case, do a graceful
25   *   rejection.
26   *
27   * Special behavior:
28   *
29   * - Metadata
30   *   Metadata read is fully supported.
31   *   Meaning when reading one tree block will only trigger the read for the
32   *   needed range, other unrelated range in the same page will not be touched.
33   *
34   *   Metadata write support is partial.
35   *   The writeback is still for the full page, but we will only submit
36   *   the dirty extent buffers in the page.
37   *
38   *   This means, if we have a metadata page like this:
39   *
40   *   Page offset
41   *   0         16K         32K         48K        64K
42   *   |/////////|           |///////////|
43   *        \- Tree block A        \- Tree block B
44   *
45   *   Even if we just want to writeback tree block A, we will also writeback
46   *   tree block B if it's also dirty.
47   *
48   *   This may cause extra metadata writeback which results more COW.
49   *
50   * Implementation:
51   *
52   * - Common
53   *   Both metadata and data will use a new structure, btrfs_subpage, to
54   *   record the status of each sector inside a page.  This provides the extra
55   *   granularity needed.
56   *
57   * - Metadata
58   *   Since we have multiple tree blocks inside one page, we can't rely on page
59   *   locking anymore, or we will have greatly reduced concurrency or even
60   *   deadlocks (hold one tree lock while trying to lock another tree lock in
61   *   the same page).
62   *
63   *   Thus for metadata locking, subpage support relies on io_tree locking only.
64   *   This means a slightly higher tree locking latency.
65   */
66  
67  #if PAGE_SIZE > SZ_4K
btrfs_is_subpage(const struct btrfs_fs_info * fs_info,struct address_space * mapping)68  bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space *mapping)
69  {
70  	if (fs_info->sectorsize >= PAGE_SIZE)
71  		return false;
72  
73  	/*
74  	 * Only data pages (either through DIO or compression) can have no
75  	 * mapping. And if page->mapping->host is data inode, it's subpage.
76  	 * As we have ruled our sectorsize >= PAGE_SIZE case already.
77  	 */
78  	if (!mapping || !mapping->host || is_data_inode(BTRFS_I(mapping->host)))
79  		return true;
80  
81  	/*
82  	 * Now the only remaining case is metadata, which we only go subpage
83  	 * routine if nodesize < PAGE_SIZE.
84  	 */
85  	if (fs_info->nodesize < PAGE_SIZE)
86  		return true;
87  	return false;
88  }
89  #endif
90  
btrfs_attach_subpage(const struct btrfs_fs_info * fs_info,struct folio * folio,enum btrfs_subpage_type type)91  int btrfs_attach_subpage(const struct btrfs_fs_info *fs_info,
92  			 struct folio *folio, enum btrfs_subpage_type type)
93  {
94  	struct btrfs_subpage *subpage;
95  
96  	/*
97  	 * We have cases like a dummy extent buffer page, which is not mapped
98  	 * and doesn't need to be locked.
99  	 */
100  	if (folio->mapping)
101  		ASSERT(folio_test_locked(folio));
102  
103  	/* Either not subpage, or the folio already has private attached. */
104  	if (!btrfs_is_subpage(fs_info, folio->mapping) || folio_test_private(folio))
105  		return 0;
106  
107  	subpage = btrfs_alloc_subpage(fs_info, type);
108  	if (IS_ERR(subpage))
109  		return  PTR_ERR(subpage);
110  
111  	folio_attach_private(folio, subpage);
112  	return 0;
113  }
114  
btrfs_detach_subpage(const struct btrfs_fs_info * fs_info,struct folio * folio)115  void btrfs_detach_subpage(const struct btrfs_fs_info *fs_info, struct folio *folio)
116  {
117  	struct btrfs_subpage *subpage;
118  
119  	/* Either not subpage, or the folio already has private attached. */
120  	if (!btrfs_is_subpage(fs_info, folio->mapping) || !folio_test_private(folio))
121  		return;
122  
123  	subpage = folio_detach_private(folio);
124  	ASSERT(subpage);
125  	btrfs_free_subpage(subpage);
126  }
127  
btrfs_alloc_subpage(const struct btrfs_fs_info * fs_info,enum btrfs_subpage_type type)128  struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
129  					  enum btrfs_subpage_type type)
130  {
131  	struct btrfs_subpage *ret;
132  	unsigned int real_size;
133  
134  	ASSERT(fs_info->sectorsize < PAGE_SIZE);
135  
136  	real_size = struct_size(ret, bitmaps,
137  			BITS_TO_LONGS(btrfs_bitmap_nr_max * fs_info->sectors_per_page));
138  	ret = kzalloc(real_size, GFP_NOFS);
139  	if (!ret)
140  		return ERR_PTR(-ENOMEM);
141  
142  	spin_lock_init(&ret->lock);
143  	if (type == BTRFS_SUBPAGE_METADATA) {
144  		atomic_set(&ret->eb_refs, 0);
145  	} else {
146  		atomic_set(&ret->readers, 0);
147  		atomic_set(&ret->writers, 0);
148  	}
149  	return ret;
150  }
151  
btrfs_free_subpage(struct btrfs_subpage * subpage)152  void btrfs_free_subpage(struct btrfs_subpage *subpage)
153  {
154  	kfree(subpage);
155  }
156  
157  /*
158   * Increase the eb_refs of current subpage.
159   *
160   * This is important for eb allocation, to prevent race with last eb freeing
161   * of the same page.
162   * With the eb_refs increased before the eb inserted into radix tree,
163   * detach_extent_buffer_page() won't detach the folio private while we're still
164   * allocating the extent buffer.
165   */
btrfs_folio_inc_eb_refs(const struct btrfs_fs_info * fs_info,struct folio * folio)166  void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
167  {
168  	struct btrfs_subpage *subpage;
169  
170  	if (!btrfs_is_subpage(fs_info, folio->mapping))
171  		return;
172  
173  	ASSERT(folio_test_private(folio) && folio->mapping);
174  	lockdep_assert_held(&folio->mapping->i_private_lock);
175  
176  	subpage = folio_get_private(folio);
177  	atomic_inc(&subpage->eb_refs);
178  }
179  
btrfs_folio_dec_eb_refs(const struct btrfs_fs_info * fs_info,struct folio * folio)180  void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio)
181  {
182  	struct btrfs_subpage *subpage;
183  
184  	if (!btrfs_is_subpage(fs_info, folio->mapping))
185  		return;
186  
187  	ASSERT(folio_test_private(folio) && folio->mapping);
188  	lockdep_assert_held(&folio->mapping->i_private_lock);
189  
190  	subpage = folio_get_private(folio);
191  	ASSERT(atomic_read(&subpage->eb_refs));
192  	atomic_dec(&subpage->eb_refs);
193  }
194  
btrfs_subpage_assert(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)195  static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
196  				 struct folio *folio, u64 start, u32 len)
197  {
198  	/* For subpage support, the folio must be single page. */
199  	ASSERT(folio_order(folio) == 0);
200  
201  	/* Basic checks */
202  	ASSERT(folio_test_private(folio) && folio_get_private(folio));
203  	ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
204  	       IS_ALIGNED(len, fs_info->sectorsize));
205  	/*
206  	 * The range check only works for mapped page, we can still have
207  	 * unmapped page like dummy extent buffer pages.
208  	 */
209  	if (folio->mapping)
210  		ASSERT(folio_pos(folio) <= start &&
211  		       start + len <= folio_pos(folio) + PAGE_SIZE);
212  }
213  
214  #define subpage_calc_start_bit(fs_info, folio, name, start, len)	\
215  ({									\
216  	unsigned int __start_bit;						\
217  									\
218  	btrfs_subpage_assert(fs_info, folio, start, len);		\
219  	__start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
220  	__start_bit += fs_info->sectors_per_page * btrfs_bitmap_nr_##name; \
221  	__start_bit;							\
222  })
223  
btrfs_subpage_start_reader(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)224  void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
225  				struct folio *folio, u64 start, u32 len)
226  {
227  	struct btrfs_subpage *subpage = folio_get_private(folio);
228  	const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
229  	const int nbits = len >> fs_info->sectorsize_bits;
230  	unsigned long flags;
231  
232  
233  	btrfs_subpage_assert(fs_info, folio, start, len);
234  
235  	spin_lock_irqsave(&subpage->lock, flags);
236  	/*
237  	 * Even though it's just for reading the page, no one should have
238  	 * locked the subpage range.
239  	 */
240  	ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
241  	bitmap_set(subpage->bitmaps, start_bit, nbits);
242  	atomic_add(nbits, &subpage->readers);
243  	spin_unlock_irqrestore(&subpage->lock, flags);
244  }
245  
btrfs_subpage_end_reader(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)246  void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
247  			      struct folio *folio, u64 start, u32 len)
248  {
249  	struct btrfs_subpage *subpage = folio_get_private(folio);
250  	const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
251  	const int nbits = len >> fs_info->sectorsize_bits;
252  	unsigned long flags;
253  	bool is_data;
254  	bool last;
255  
256  	btrfs_subpage_assert(fs_info, folio, start, len);
257  	is_data = is_data_inode(BTRFS_I(folio->mapping->host));
258  
259  	spin_lock_irqsave(&subpage->lock, flags);
260  
261  	/* The range should have already been locked. */
262  	ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits));
263  	ASSERT(atomic_read(&subpage->readers) >= nbits);
264  
265  	bitmap_clear(subpage->bitmaps, start_bit, nbits);
266  	last = atomic_sub_and_test(nbits, &subpage->readers);
267  
268  	/*
269  	 * For data we need to unlock the page if the last read has finished.
270  	 *
271  	 * And please don't replace @last with atomic_sub_and_test() call
272  	 * inside if () condition.
273  	 * As we want the atomic_sub_and_test() to be always executed.
274  	 */
275  	if (is_data && last)
276  		folio_unlock(folio);
277  	spin_unlock_irqrestore(&subpage->lock, flags);
278  }
279  
btrfs_subpage_clamp_range(struct folio * folio,u64 * start,u32 * len)280  static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
281  {
282  	u64 orig_start = *start;
283  	u32 orig_len = *len;
284  
285  	*start = max_t(u64, folio_pos(folio), orig_start);
286  	/*
287  	 * For certain call sites like btrfs_drop_pages(), we may have pages
288  	 * beyond the target range. In that case, just set @len to 0, subpage
289  	 * helpers can handle @len == 0 without any problem.
290  	 */
291  	if (folio_pos(folio) >= orig_start + orig_len)
292  		*len = 0;
293  	else
294  		*len = min_t(u64, folio_pos(folio) + PAGE_SIZE,
295  			     orig_start + orig_len) - *start;
296  }
297  
btrfs_subpage_start_writer(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)298  static void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
299  				       struct folio *folio, u64 start, u32 len)
300  {
301  	struct btrfs_subpage *subpage = folio_get_private(folio);
302  	const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
303  	const int nbits = (len >> fs_info->sectorsize_bits);
304  	unsigned long flags;
305  	int ret;
306  
307  	btrfs_subpage_assert(fs_info, folio, start, len);
308  
309  	spin_lock_irqsave(&subpage->lock, flags);
310  	ASSERT(atomic_read(&subpage->readers) == 0);
311  	ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
312  	bitmap_set(subpage->bitmaps, start_bit, nbits);
313  	ret = atomic_add_return(nbits, &subpage->writers);
314  	ASSERT(ret == nbits);
315  	spin_unlock_irqrestore(&subpage->lock, flags);
316  }
317  
btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)318  static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
319  					      struct folio *folio, u64 start, u32 len)
320  {
321  	struct btrfs_subpage *subpage = folio_get_private(folio);
322  	const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
323  	const int nbits = (len >> fs_info->sectorsize_bits);
324  	unsigned long flags;
325  	unsigned int cleared = 0;
326  	int bit = start_bit;
327  	bool last;
328  
329  	btrfs_subpage_assert(fs_info, folio, start, len);
330  
331  	spin_lock_irqsave(&subpage->lock, flags);
332  	/*
333  	 * We have call sites passing @lock_page into
334  	 * extent_clear_unlock_delalloc() for compression path.
335  	 *
336  	 * This @locked_page is locked by plain lock_page(), thus its
337  	 * subpage::writers is 0.  Handle them in a special way.
338  	 */
339  	if (atomic_read(&subpage->writers) == 0) {
340  		spin_unlock_irqrestore(&subpage->lock, flags);
341  		return true;
342  	}
343  
344  	for_each_set_bit_from(bit, subpage->bitmaps, start_bit + nbits) {
345  		clear_bit(bit, subpage->bitmaps);
346  		cleared++;
347  	}
348  	ASSERT(atomic_read(&subpage->writers) >= cleared);
349  	last = atomic_sub_and_test(cleared, &subpage->writers);
350  	spin_unlock_irqrestore(&subpage->lock, flags);
351  	return last;
352  }
353  
354  /*
355   * Lock a folio for delalloc page writeback.
356   *
357   * Return -EAGAIN if the page is not properly initialized.
358   * Return 0 with the page locked, and writer counter updated.
359   *
360   * Even with 0 returned, the page still need extra check to make sure
361   * it's really the correct page, as the caller is using
362   * filemap_get_folios_contig(), which can race with page invalidating.
363   */
btrfs_folio_start_writer_lock(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)364  int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
365  				  struct folio *folio, u64 start, u32 len)
366  {
367  	if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
368  		folio_lock(folio);
369  		return 0;
370  	}
371  	folio_lock(folio);
372  	if (!folio_test_private(folio) || !folio_get_private(folio)) {
373  		folio_unlock(folio);
374  		return -EAGAIN;
375  	}
376  	btrfs_subpage_clamp_range(folio, &start, &len);
377  	btrfs_subpage_start_writer(fs_info, folio, start, len);
378  	return 0;
379  }
380  
381  /*
382   * Handle different locked folios:
383   *
384   * - Non-subpage folio
385   *   Just unlock it.
386   *
387   * - folio locked but without any subpage locked
388   *   This happens either before writepage_delalloc() or the delalloc range is
389   *   already handled by previous folio.
390   *   We can simple unlock it.
391   *
392   * - folio locked with subpage range locked.
393   *   We go through the locked sectors inside the range and clear their locked
394   *   bitmap, reduce the writer lock number, and unlock the page if that's
395   *   the last locked range.
396   */
btrfs_folio_end_writer_lock(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)397  void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
398  				 struct folio *folio, u64 start, u32 len)
399  {
400  	struct btrfs_subpage *subpage = folio_get_private(folio);
401  
402  	ASSERT(folio_test_locked(folio));
403  
404  	if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
405  		folio_unlock(folio);
406  		return;
407  	}
408  
409  	/*
410  	 * For subpage case, there are two types of locked page.  With or
411  	 * without writers number.
412  	 *
413  	 * Since we own the page lock, no one else could touch subpage::writers
414  	 * and we are safe to do several atomic operations without spinlock.
415  	 */
416  	if (atomic_read(&subpage->writers) == 0) {
417  		/* No writers, locked by plain lock_page(). */
418  		folio_unlock(folio);
419  		return;
420  	}
421  
422  	btrfs_subpage_clamp_range(folio, &start, &len);
423  	if (btrfs_subpage_end_and_test_writer(fs_info, folio, start, len))
424  		folio_unlock(folio);
425  }
426  
btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info * fs_info,struct folio * folio,unsigned long bitmap)427  void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
428  					struct folio *folio, unsigned long bitmap)
429  {
430  	struct btrfs_subpage *subpage = folio_get_private(folio);
431  	const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked;
432  	unsigned long flags;
433  	bool last = false;
434  	int cleared = 0;
435  	int bit;
436  
437  	if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
438  		folio_unlock(folio);
439  		return;
440  	}
441  
442  	if (atomic_read(&subpage->writers) == 0) {
443  		/* No writers, locked by plain lock_page(). */
444  		folio_unlock(folio);
445  		return;
446  	}
447  
448  	spin_lock_irqsave(&subpage->lock, flags);
449  	for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) {
450  		if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
451  			cleared++;
452  	}
453  	ASSERT(atomic_read(&subpage->writers) >= cleared);
454  	last = atomic_sub_and_test(cleared, &subpage->writers);
455  	spin_unlock_irqrestore(&subpage->lock, flags);
456  	if (last)
457  		folio_unlock(folio);
458  }
459  
460  #define subpage_test_bitmap_all_set(fs_info, subpage, name)		\
461  	bitmap_test_range_all_set(subpage->bitmaps,			\
462  			fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \
463  			fs_info->sectors_per_page)
464  
465  #define subpage_test_bitmap_all_zero(fs_info, subpage, name)		\
466  	bitmap_test_range_all_zero(subpage->bitmaps,			\
467  			fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \
468  			fs_info->sectors_per_page)
469  
btrfs_subpage_set_uptodate(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)470  void btrfs_subpage_set_uptodate(const struct btrfs_fs_info *fs_info,
471  				struct folio *folio, u64 start, u32 len)
472  {
473  	struct btrfs_subpage *subpage = folio_get_private(folio);
474  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
475  							uptodate, start, len);
476  	unsigned long flags;
477  
478  	spin_lock_irqsave(&subpage->lock, flags);
479  	bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
480  	if (subpage_test_bitmap_all_set(fs_info, subpage, uptodate))
481  		folio_mark_uptodate(folio);
482  	spin_unlock_irqrestore(&subpage->lock, flags);
483  }
484  
btrfs_subpage_clear_uptodate(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)485  void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
486  				  struct folio *folio, u64 start, u32 len)
487  {
488  	struct btrfs_subpage *subpage = folio_get_private(folio);
489  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
490  							uptodate, start, len);
491  	unsigned long flags;
492  
493  	spin_lock_irqsave(&subpage->lock, flags);
494  	bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
495  	folio_clear_uptodate(folio);
496  	spin_unlock_irqrestore(&subpage->lock, flags);
497  }
498  
btrfs_subpage_set_dirty(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)499  void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
500  			     struct folio *folio, u64 start, u32 len)
501  {
502  	struct btrfs_subpage *subpage = folio_get_private(folio);
503  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
504  							dirty, start, len);
505  	unsigned long flags;
506  
507  	spin_lock_irqsave(&subpage->lock, flags);
508  	bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
509  	spin_unlock_irqrestore(&subpage->lock, flags);
510  	folio_mark_dirty(folio);
511  }
512  
513  /*
514   * Extra clear_and_test function for subpage dirty bitmap.
515   *
516   * Return true if we're the last bits in the dirty_bitmap and clear the
517   * dirty_bitmap.
518   * Return false otherwise.
519   *
520   * NOTE: Callers should manually clear page dirty for true case, as we have
521   * extra handling for tree blocks.
522   */
btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)523  bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
524  					struct folio *folio, u64 start, u32 len)
525  {
526  	struct btrfs_subpage *subpage = folio_get_private(folio);
527  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
528  							dirty, start, len);
529  	unsigned long flags;
530  	bool last = false;
531  
532  	spin_lock_irqsave(&subpage->lock, flags);
533  	bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
534  	if (subpage_test_bitmap_all_zero(fs_info, subpage, dirty))
535  		last = true;
536  	spin_unlock_irqrestore(&subpage->lock, flags);
537  	return last;
538  }
539  
btrfs_subpage_clear_dirty(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)540  void btrfs_subpage_clear_dirty(const struct btrfs_fs_info *fs_info,
541  			       struct folio *folio, u64 start, u32 len)
542  {
543  	bool last;
544  
545  	last = btrfs_subpage_clear_and_test_dirty(fs_info, folio, start, len);
546  	if (last)
547  		folio_clear_dirty_for_io(folio);
548  }
549  
btrfs_subpage_set_writeback(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)550  void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info,
551  				 struct folio *folio, u64 start, u32 len)
552  {
553  	struct btrfs_subpage *subpage = folio_get_private(folio);
554  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
555  							writeback, start, len);
556  	unsigned long flags;
557  
558  	spin_lock_irqsave(&subpage->lock, flags);
559  	bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
560  	if (!folio_test_writeback(folio))
561  		folio_start_writeback(folio);
562  	spin_unlock_irqrestore(&subpage->lock, flags);
563  }
564  
btrfs_subpage_clear_writeback(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)565  void btrfs_subpage_clear_writeback(const struct btrfs_fs_info *fs_info,
566  				   struct folio *folio, u64 start, u32 len)
567  {
568  	struct btrfs_subpage *subpage = folio_get_private(folio);
569  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
570  							writeback, start, len);
571  	unsigned long flags;
572  
573  	spin_lock_irqsave(&subpage->lock, flags);
574  	bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
575  	if (subpage_test_bitmap_all_zero(fs_info, subpage, writeback)) {
576  		ASSERT(folio_test_writeback(folio));
577  		folio_end_writeback(folio);
578  	}
579  	spin_unlock_irqrestore(&subpage->lock, flags);
580  }
581  
btrfs_subpage_set_ordered(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)582  void btrfs_subpage_set_ordered(const struct btrfs_fs_info *fs_info,
583  			       struct folio *folio, u64 start, u32 len)
584  {
585  	struct btrfs_subpage *subpage = folio_get_private(folio);
586  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
587  							ordered, start, len);
588  	unsigned long flags;
589  
590  	spin_lock_irqsave(&subpage->lock, flags);
591  	bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
592  	folio_set_ordered(folio);
593  	spin_unlock_irqrestore(&subpage->lock, flags);
594  }
595  
btrfs_subpage_clear_ordered(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)596  void btrfs_subpage_clear_ordered(const struct btrfs_fs_info *fs_info,
597  				 struct folio *folio, u64 start, u32 len)
598  {
599  	struct btrfs_subpage *subpage = folio_get_private(folio);
600  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
601  							ordered, start, len);
602  	unsigned long flags;
603  
604  	spin_lock_irqsave(&subpage->lock, flags);
605  	bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
606  	if (subpage_test_bitmap_all_zero(fs_info, subpage, ordered))
607  		folio_clear_ordered(folio);
608  	spin_unlock_irqrestore(&subpage->lock, flags);
609  }
610  
btrfs_subpage_set_checked(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)611  void btrfs_subpage_set_checked(const struct btrfs_fs_info *fs_info,
612  			       struct folio *folio, u64 start, u32 len)
613  {
614  	struct btrfs_subpage *subpage = folio_get_private(folio);
615  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
616  							checked, start, len);
617  	unsigned long flags;
618  
619  	spin_lock_irqsave(&subpage->lock, flags);
620  	bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
621  	if (subpage_test_bitmap_all_set(fs_info, subpage, checked))
622  		folio_set_checked(folio);
623  	spin_unlock_irqrestore(&subpage->lock, flags);
624  }
625  
btrfs_subpage_clear_checked(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)626  void btrfs_subpage_clear_checked(const struct btrfs_fs_info *fs_info,
627  				 struct folio *folio, u64 start, u32 len)
628  {
629  	struct btrfs_subpage *subpage = folio_get_private(folio);
630  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
631  							checked, start, len);
632  	unsigned long flags;
633  
634  	spin_lock_irqsave(&subpage->lock, flags);
635  	bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
636  	folio_clear_checked(folio);
637  	spin_unlock_irqrestore(&subpage->lock, flags);
638  }
639  
640  /*
641   * Unlike set/clear which is dependent on each page status, for test all bits
642   * are tested in the same way.
643   */
644  #define IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(name)				\
645  bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info,	\
646  			       struct folio *folio, u64 start, u32 len)	\
647  {									\
648  	struct btrfs_subpage *subpage = folio_get_private(folio);	\
649  	unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,	\
650  						name, start, len);	\
651  	unsigned long flags;						\
652  	bool ret;							\
653  									\
654  	spin_lock_irqsave(&subpage->lock, flags);			\
655  	ret = bitmap_test_range_all_set(subpage->bitmaps, start_bit,	\
656  				len >> fs_info->sectorsize_bits);	\
657  	spin_unlock_irqrestore(&subpage->lock, flags);			\
658  	return ret;							\
659  }
660  IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate);
661  IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
662  IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
663  IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
664  IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(checked);
665  
666  /*
667   * Note that, in selftests (extent-io-tests), we can have empty fs_info passed
668   * in.  We only test sectorsize == PAGE_SIZE cases so far, thus we can fall
669   * back to regular sectorsize branch.
670   */
671  #define IMPLEMENT_BTRFS_PAGE_OPS(name, folio_set_func,			\
672  				 folio_clear_func, folio_test_func)	\
673  void btrfs_folio_set_##name(const struct btrfs_fs_info *fs_info,	\
674  			    struct folio *folio, u64 start, u32 len)	\
675  {									\
676  	if (unlikely(!fs_info) ||					\
677  	    !btrfs_is_subpage(fs_info, folio->mapping)) {		\
678  		folio_set_func(folio);					\
679  		return;							\
680  	}								\
681  	btrfs_subpage_set_##name(fs_info, folio, start, len);		\
682  }									\
683  void btrfs_folio_clear_##name(const struct btrfs_fs_info *fs_info,	\
684  			      struct folio *folio, u64 start, u32 len)	\
685  {									\
686  	if (unlikely(!fs_info) ||					\
687  	    !btrfs_is_subpage(fs_info, folio->mapping)) {		\
688  		folio_clear_func(folio);				\
689  		return;							\
690  	}								\
691  	btrfs_subpage_clear_##name(fs_info, folio, start, len);		\
692  }									\
693  bool btrfs_folio_test_##name(const struct btrfs_fs_info *fs_info,	\
694  			     struct folio *folio, u64 start, u32 len)	\
695  {									\
696  	if (unlikely(!fs_info) ||					\
697  	    !btrfs_is_subpage(fs_info, folio->mapping))			\
698  		return folio_test_func(folio);				\
699  	return btrfs_subpage_test_##name(fs_info, folio, start, len);	\
700  }									\
701  void btrfs_folio_clamp_set_##name(const struct btrfs_fs_info *fs_info,	\
702  				  struct folio *folio, u64 start, u32 len) \
703  {									\
704  	if (unlikely(!fs_info) ||					\
705  	    !btrfs_is_subpage(fs_info, folio->mapping)) {		\
706  		folio_set_func(folio);					\
707  		return;							\
708  	}								\
709  	btrfs_subpage_clamp_range(folio, &start, &len);			\
710  	btrfs_subpage_set_##name(fs_info, folio, start, len);		\
711  }									\
712  void btrfs_folio_clamp_clear_##name(const struct btrfs_fs_info *fs_info, \
713  				    struct folio *folio, u64 start, u32 len) \
714  {									\
715  	if (unlikely(!fs_info) ||					\
716  	    !btrfs_is_subpage(fs_info, folio->mapping)) {		\
717  		folio_clear_func(folio);				\
718  		return;							\
719  	}								\
720  	btrfs_subpage_clamp_range(folio, &start, &len);			\
721  	btrfs_subpage_clear_##name(fs_info, folio, start, len);		\
722  }									\
723  bool btrfs_folio_clamp_test_##name(const struct btrfs_fs_info *fs_info,	\
724  				   struct folio *folio, u64 start, u32 len) \
725  {									\
726  	if (unlikely(!fs_info) ||					\
727  	    !btrfs_is_subpage(fs_info, folio->mapping))			\
728  		return folio_test_func(folio);				\
729  	btrfs_subpage_clamp_range(folio, &start, &len);			\
730  	return btrfs_subpage_test_##name(fs_info, folio, start, len);	\
731  }
732  IMPLEMENT_BTRFS_PAGE_OPS(uptodate, folio_mark_uptodate, folio_clear_uptodate,
733  			 folio_test_uptodate);
734  IMPLEMENT_BTRFS_PAGE_OPS(dirty, folio_mark_dirty, folio_clear_dirty_for_io,
735  			 folio_test_dirty);
736  IMPLEMENT_BTRFS_PAGE_OPS(writeback, folio_start_writeback, folio_end_writeback,
737  			 folio_test_writeback);
738  IMPLEMENT_BTRFS_PAGE_OPS(ordered, folio_set_ordered, folio_clear_ordered,
739  			 folio_test_ordered);
740  IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
741  			 folio_test_checked);
742  
743  /*
744   * Make sure not only the page dirty bit is cleared, but also subpage dirty bit
745   * is cleared.
746   */
btrfs_folio_assert_not_dirty(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)747  void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
748  				  struct folio *folio, u64 start, u32 len)
749  {
750  	struct btrfs_subpage *subpage;
751  	unsigned int start_bit;
752  	unsigned int nbits;
753  	unsigned long flags;
754  
755  	if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
756  		return;
757  
758  	if (!btrfs_is_subpage(fs_info, folio->mapping)) {
759  		ASSERT(!folio_test_dirty(folio));
760  		return;
761  	}
762  
763  	start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len);
764  	nbits = len >> fs_info->sectorsize_bits;
765  	subpage = folio_get_private(folio);
766  	ASSERT(subpage);
767  	spin_lock_irqsave(&subpage->lock, flags);
768  	ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
769  	spin_unlock_irqrestore(&subpage->lock, flags);
770  }
771  
772  /*
773   * This is for folio already locked by plain lock_page()/folio_lock(), which
774   * doesn't have any subpage awareness.
775   *
776   * This populates the involved subpage ranges so that subpage helpers can
777   * properly unlock them.
778   */
btrfs_folio_set_writer_lock(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)779  void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
780  				 struct folio *folio, u64 start, u32 len)
781  {
782  	struct btrfs_subpage *subpage;
783  	unsigned long flags;
784  	unsigned int start_bit;
785  	unsigned int nbits;
786  	int ret;
787  
788  	ASSERT(folio_test_locked(folio));
789  	if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping))
790  		return;
791  
792  	subpage = folio_get_private(folio);
793  	start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
794  	nbits = len >> fs_info->sectorsize_bits;
795  	spin_lock_irqsave(&subpage->lock, flags);
796  	/* Target range should not yet be locked. */
797  	ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
798  	bitmap_set(subpage->bitmaps, start_bit, nbits);
799  	ret = atomic_add_return(nbits, &subpage->writers);
800  	ASSERT(ret <= fs_info->sectors_per_page);
801  	spin_unlock_irqrestore(&subpage->lock, flags);
802  }
803  
804  /*
805   * Find any subpage writer locked range inside @folio, starting at file offset
806   * @search_start. The caller should ensure the folio is locked.
807   *
808   * Return true and update @found_start_ret and @found_len_ret to the first
809   * writer locked range.
810   * Return false if there is no writer locked range.
811   */
btrfs_subpage_find_writer_locked(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 search_start,u64 * found_start_ret,u32 * found_len_ret)812  bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
813  				      struct folio *folio, u64 search_start,
814  				      u64 *found_start_ret, u32 *found_len_ret)
815  {
816  	struct btrfs_subpage *subpage = folio_get_private(folio);
817  	const u32 sectors_per_page = fs_info->sectors_per_page;
818  	const unsigned int len = PAGE_SIZE - offset_in_page(search_start);
819  	const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
820  						locked, search_start, len);
821  	const unsigned int locked_bitmap_start = sectors_per_page * btrfs_bitmap_nr_locked;
822  	const unsigned int locked_bitmap_end = locked_bitmap_start + sectors_per_page;
823  	unsigned long flags;
824  	int first_zero;
825  	int first_set;
826  	bool found = false;
827  
828  	ASSERT(folio_test_locked(folio));
829  	spin_lock_irqsave(&subpage->lock, flags);
830  	first_set = find_next_bit(subpage->bitmaps, locked_bitmap_end, start_bit);
831  	if (first_set >= locked_bitmap_end)
832  		goto out;
833  
834  	found = true;
835  
836  	*found_start_ret = folio_pos(folio) +
837  		((first_set - locked_bitmap_start) << fs_info->sectorsize_bits);
838  	/*
839  	 * Since @first_set is ensured to be smaller than locked_bitmap_end
840  	 * here, @found_start_ret should be inside the folio.
841  	 */
842  	ASSERT(*found_start_ret < folio_pos(folio) + PAGE_SIZE);
843  
844  	first_zero = find_next_zero_bit(subpage->bitmaps, locked_bitmap_end, first_set);
845  	*found_len_ret = (first_zero - first_set) << fs_info->sectorsize_bits;
846  out:
847  	spin_unlock_irqrestore(&subpage->lock, flags);
848  	return found;
849  }
850  
851  #define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst)			\
852  {									\
853  	const int sectors_per_page = fs_info->sectors_per_page;		\
854  									\
855  	ASSERT(sectors_per_page < BITS_PER_LONG);			\
856  	*dst = bitmap_read(subpage->bitmaps,				\
857  			   sectors_per_page * btrfs_bitmap_nr_##name,	\
858  			   sectors_per_page);				\
859  }
860  
btrfs_subpage_dump_bitmap(const struct btrfs_fs_info * fs_info,struct folio * folio,u64 start,u32 len)861  void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
862  				      struct folio *folio, u64 start, u32 len)
863  {
864  	struct btrfs_subpage *subpage;
865  	const u32 sectors_per_page = fs_info->sectors_per_page;
866  	unsigned long uptodate_bitmap;
867  	unsigned long dirty_bitmap;
868  	unsigned long writeback_bitmap;
869  	unsigned long ordered_bitmap;
870  	unsigned long checked_bitmap;
871  	unsigned long flags;
872  
873  	ASSERT(folio_test_private(folio) && folio_get_private(folio));
874  	ASSERT(sectors_per_page > 1);
875  	subpage = folio_get_private(folio);
876  
877  	spin_lock_irqsave(&subpage->lock, flags);
878  	GET_SUBPAGE_BITMAP(subpage, fs_info, uptodate, &uptodate_bitmap);
879  	GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, &dirty_bitmap);
880  	GET_SUBPAGE_BITMAP(subpage, fs_info, writeback, &writeback_bitmap);
881  	GET_SUBPAGE_BITMAP(subpage, fs_info, ordered, &ordered_bitmap);
882  	GET_SUBPAGE_BITMAP(subpage, fs_info, checked, &checked_bitmap);
883  	GET_SUBPAGE_BITMAP(subpage, fs_info, locked, &checked_bitmap);
884  	spin_unlock_irqrestore(&subpage->lock, flags);
885  
886  	dump_page(folio_page(folio, 0), "btrfs subpage dump");
887  	btrfs_warn(fs_info,
888  "start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
889  		    start, len, folio_pos(folio),
890  		    sectors_per_page, &uptodate_bitmap,
891  		    sectors_per_page, &dirty_bitmap,
892  		    sectors_per_page, &writeback_bitmap,
893  		    sectors_per_page, &ordered_bitmap,
894  		    sectors_per_page, &checked_bitmap);
895  }
896  
btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info * fs_info,struct folio * folio,unsigned long * ret_bitmap)897  void btrfs_get_subpage_dirty_bitmap(struct btrfs_fs_info *fs_info,
898  				    struct folio *folio,
899  				    unsigned long *ret_bitmap)
900  {
901  	struct btrfs_subpage *subpage;
902  	unsigned long flags;
903  
904  	ASSERT(folio_test_private(folio) && folio_get_private(folio));
905  	ASSERT(fs_info->sectors_per_page > 1);
906  	subpage = folio_get_private(folio);
907  
908  	spin_lock_irqsave(&subpage->lock, flags);
909  	GET_SUBPAGE_BITMAP(subpage, fs_info, dirty, ret_bitmap);
910  	spin_unlock_irqrestore(&subpage->lock, flags);
911  }
912