1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * fs/f2fs/node.c
4   *
5   * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6   *             http://www.samsung.com/
7   */
8  #include <linux/fs.h>
9  #include <linux/f2fs_fs.h>
10  #include <linux/mpage.h>
11  #include <linux/sched/mm.h>
12  #include <linux/blkdev.h>
13  #include <linux/pagevec.h>
14  #include <linux/swap.h>
15  
16  #include "f2fs.h"
17  #include "node.h"
18  #include "segment.h"
19  #include "xattr.h"
20  #include "iostat.h"
21  #include <trace/events/f2fs.h>
22  
23  #define on_f2fs_build_free_nids(nm_i) mutex_is_locked(&(nm_i)->build_lock)
24  
25  static struct kmem_cache *nat_entry_slab;
26  static struct kmem_cache *free_nid_slab;
27  static struct kmem_cache *nat_entry_set_slab;
28  static struct kmem_cache *fsync_node_entry_slab;
29  
30  /*
31   * Check whether the given nid is within node id range.
32   */
f2fs_check_nid_range(struct f2fs_sb_info * sbi,nid_t nid)33  int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
34  {
35  	if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
36  		set_sbi_flag(sbi, SBI_NEED_FSCK);
37  		f2fs_warn(sbi, "%s: out-of-range nid=%x, run fsck to fix.",
38  			  __func__, nid);
39  		f2fs_handle_error(sbi, ERROR_CORRUPTED_INODE);
40  		return -EFSCORRUPTED;
41  	}
42  	return 0;
43  }
44  
f2fs_available_free_memory(struct f2fs_sb_info * sbi,int type)45  bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
46  {
47  	struct f2fs_nm_info *nm_i = NM_I(sbi);
48  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
49  	struct sysinfo val;
50  	unsigned long avail_ram;
51  	unsigned long mem_size = 0;
52  	bool res = false;
53  
54  	if (!nm_i)
55  		return true;
56  
57  	si_meminfo(&val);
58  
59  	/* only uses low memory */
60  	avail_ram = val.totalram - val.totalhigh;
61  
62  	/*
63  	 * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively
64  	 */
65  	if (type == FREE_NIDS) {
66  		mem_size = (nm_i->nid_cnt[FREE_NID] *
67  				sizeof(struct free_nid)) >> PAGE_SHIFT;
68  		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
69  	} else if (type == NAT_ENTRIES) {
70  		mem_size = (nm_i->nat_cnt[TOTAL_NAT] *
71  				sizeof(struct nat_entry)) >> PAGE_SHIFT;
72  		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
73  		if (excess_cached_nats(sbi))
74  			res = false;
75  	} else if (type == DIRTY_DENTS) {
76  		if (sbi->sb->s_bdi->wb.dirty_exceeded)
77  			return false;
78  		mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
79  		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
80  	} else if (type == INO_ENTRIES) {
81  		int i;
82  
83  		for (i = 0; i < MAX_INO_ENTRY; i++)
84  			mem_size += sbi->im[i].ino_num *
85  						sizeof(struct ino_entry);
86  		mem_size >>= PAGE_SHIFT;
87  		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
88  	} else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) {
89  		enum extent_type etype = type == READ_EXTENT_CACHE ?
90  						EX_READ : EX_BLOCK_AGE;
91  		struct extent_tree_info *eti = &sbi->extent_tree[etype];
92  
93  		mem_size = (atomic_read(&eti->total_ext_tree) *
94  				sizeof(struct extent_tree) +
95  				atomic_read(&eti->total_ext_node) *
96  				sizeof(struct extent_node)) >> PAGE_SHIFT;
97  		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
98  	} else if (type == DISCARD_CACHE) {
99  		mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
100  				sizeof(struct discard_cmd)) >> PAGE_SHIFT;
101  		res = mem_size < (avail_ram * nm_i->ram_thresh / 100);
102  	} else if (type == COMPRESS_PAGE) {
103  #ifdef CONFIG_F2FS_FS_COMPRESSION
104  		unsigned long free_ram = val.freeram;
105  
106  		/*
107  		 * free memory is lower than watermark or cached page count
108  		 * exceed threshold, deny caching compress page.
109  		 */
110  		res = (free_ram > avail_ram * sbi->compress_watermark / 100) &&
111  			(COMPRESS_MAPPING(sbi)->nrpages <
112  			 free_ram * sbi->compress_percent / 100);
113  #else
114  		res = false;
115  #endif
116  	} else {
117  		if (!sbi->sb->s_bdi->wb.dirty_exceeded)
118  			return true;
119  	}
120  	return res;
121  }
122  
clear_node_page_dirty(struct page * page)123  static void clear_node_page_dirty(struct page *page)
124  {
125  	if (PageDirty(page)) {
126  		f2fs_clear_page_cache_dirty_tag(page_folio(page));
127  		clear_page_dirty_for_io(page);
128  		dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
129  	}
130  	ClearPageUptodate(page);
131  }
132  
get_current_nat_page(struct f2fs_sb_info * sbi,nid_t nid)133  static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
134  {
135  	return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid));
136  }
137  
get_next_nat_page(struct f2fs_sb_info * sbi,nid_t nid)138  static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
139  {
140  	struct page *src_page;
141  	struct page *dst_page;
142  	pgoff_t dst_off;
143  	void *src_addr;
144  	void *dst_addr;
145  	struct f2fs_nm_info *nm_i = NM_I(sbi);
146  
147  	dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
148  
149  	/* get current nat block page with lock */
150  	src_page = get_current_nat_page(sbi, nid);
151  	if (IS_ERR(src_page))
152  		return src_page;
153  	dst_page = f2fs_grab_meta_page(sbi, dst_off);
154  	f2fs_bug_on(sbi, PageDirty(src_page));
155  
156  	src_addr = page_address(src_page);
157  	dst_addr = page_address(dst_page);
158  	memcpy(dst_addr, src_addr, PAGE_SIZE);
159  	set_page_dirty(dst_page);
160  	f2fs_put_page(src_page, 1);
161  
162  	set_to_next_nat(nm_i, nid);
163  
164  	return dst_page;
165  }
166  
__alloc_nat_entry(struct f2fs_sb_info * sbi,nid_t nid,bool no_fail)167  static struct nat_entry *__alloc_nat_entry(struct f2fs_sb_info *sbi,
168  						nid_t nid, bool no_fail)
169  {
170  	struct nat_entry *new;
171  
172  	new = f2fs_kmem_cache_alloc(nat_entry_slab,
173  					GFP_F2FS_ZERO, no_fail, sbi);
174  	if (new) {
175  		nat_set_nid(new, nid);
176  		nat_reset_flag(new);
177  	}
178  	return new;
179  }
180  
__free_nat_entry(struct nat_entry * e)181  static void __free_nat_entry(struct nat_entry *e)
182  {
183  	kmem_cache_free(nat_entry_slab, e);
184  }
185  
186  /* must be locked by nat_tree_lock */
__init_nat_entry(struct f2fs_nm_info * nm_i,struct nat_entry * ne,struct f2fs_nat_entry * raw_ne,bool no_fail)187  static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
188  	struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
189  {
190  	if (no_fail)
191  		f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
192  	else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne))
193  		return NULL;
194  
195  	if (raw_ne)
196  		node_info_from_raw_nat(&ne->ni, raw_ne);
197  
198  	spin_lock(&nm_i->nat_list_lock);
199  	list_add_tail(&ne->list, &nm_i->nat_entries);
200  	spin_unlock(&nm_i->nat_list_lock);
201  
202  	nm_i->nat_cnt[TOTAL_NAT]++;
203  	nm_i->nat_cnt[RECLAIMABLE_NAT]++;
204  	return ne;
205  }
206  
__lookup_nat_cache(struct f2fs_nm_info * nm_i,nid_t n)207  static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
208  {
209  	struct nat_entry *ne;
210  
211  	ne = radix_tree_lookup(&nm_i->nat_root, n);
212  
213  	/* for recent accessed nat entry, move it to tail of lru list */
214  	if (ne && !get_nat_flag(ne, IS_DIRTY)) {
215  		spin_lock(&nm_i->nat_list_lock);
216  		if (!list_empty(&ne->list))
217  			list_move_tail(&ne->list, &nm_i->nat_entries);
218  		spin_unlock(&nm_i->nat_list_lock);
219  	}
220  
221  	return ne;
222  }
223  
__gang_lookup_nat_cache(struct f2fs_nm_info * nm_i,nid_t start,unsigned int nr,struct nat_entry ** ep)224  static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
225  		nid_t start, unsigned int nr, struct nat_entry **ep)
226  {
227  	return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
228  }
229  
__del_from_nat_cache(struct f2fs_nm_info * nm_i,struct nat_entry * e)230  static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
231  {
232  	radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
233  	nm_i->nat_cnt[TOTAL_NAT]--;
234  	nm_i->nat_cnt[RECLAIMABLE_NAT]--;
235  	__free_nat_entry(e);
236  }
237  
__grab_nat_entry_set(struct f2fs_nm_info * nm_i,struct nat_entry * ne)238  static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
239  							struct nat_entry *ne)
240  {
241  	nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
242  	struct nat_entry_set *head;
243  
244  	head = radix_tree_lookup(&nm_i->nat_set_root, set);
245  	if (!head) {
246  		head = f2fs_kmem_cache_alloc(nat_entry_set_slab,
247  						GFP_NOFS, true, NULL);
248  
249  		INIT_LIST_HEAD(&head->entry_list);
250  		INIT_LIST_HEAD(&head->set_list);
251  		head->set = set;
252  		head->entry_cnt = 0;
253  		f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
254  	}
255  	return head;
256  }
257  
__set_nat_cache_dirty(struct f2fs_nm_info * nm_i,struct nat_entry * ne)258  static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
259  						struct nat_entry *ne)
260  {
261  	struct nat_entry_set *head;
262  	bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR;
263  
264  	if (!new_ne)
265  		head = __grab_nat_entry_set(nm_i, ne);
266  
267  	/*
268  	 * update entry_cnt in below condition:
269  	 * 1. update NEW_ADDR to valid block address;
270  	 * 2. update old block address to new one;
271  	 */
272  	if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) ||
273  				!get_nat_flag(ne, IS_DIRTY)))
274  		head->entry_cnt++;
275  
276  	set_nat_flag(ne, IS_PREALLOC, new_ne);
277  
278  	if (get_nat_flag(ne, IS_DIRTY))
279  		goto refresh_list;
280  
281  	nm_i->nat_cnt[DIRTY_NAT]++;
282  	nm_i->nat_cnt[RECLAIMABLE_NAT]--;
283  	set_nat_flag(ne, IS_DIRTY, true);
284  refresh_list:
285  	spin_lock(&nm_i->nat_list_lock);
286  	if (new_ne)
287  		list_del_init(&ne->list);
288  	else
289  		list_move_tail(&ne->list, &head->entry_list);
290  	spin_unlock(&nm_i->nat_list_lock);
291  }
292  
__clear_nat_cache_dirty(struct f2fs_nm_info * nm_i,struct nat_entry_set * set,struct nat_entry * ne)293  static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
294  		struct nat_entry_set *set, struct nat_entry *ne)
295  {
296  	spin_lock(&nm_i->nat_list_lock);
297  	list_move_tail(&ne->list, &nm_i->nat_entries);
298  	spin_unlock(&nm_i->nat_list_lock);
299  
300  	set_nat_flag(ne, IS_DIRTY, false);
301  	set->entry_cnt--;
302  	nm_i->nat_cnt[DIRTY_NAT]--;
303  	nm_i->nat_cnt[RECLAIMABLE_NAT]++;
304  }
305  
__gang_lookup_nat_set(struct f2fs_nm_info * nm_i,nid_t start,unsigned int nr,struct nat_entry_set ** ep)306  static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
307  		nid_t start, unsigned int nr, struct nat_entry_set **ep)
308  {
309  	return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
310  							start, nr);
311  }
312  
f2fs_in_warm_node_list(struct f2fs_sb_info * sbi,struct page * page)313  bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
314  {
315  	return NODE_MAPPING(sbi) == page->mapping &&
316  			IS_DNODE(page) && is_cold_node(page);
317  }
318  
f2fs_init_fsync_node_info(struct f2fs_sb_info * sbi)319  void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
320  {
321  	spin_lock_init(&sbi->fsync_node_lock);
322  	INIT_LIST_HEAD(&sbi->fsync_node_list);
323  	sbi->fsync_seg_id = 0;
324  	sbi->fsync_node_num = 0;
325  }
326  
f2fs_add_fsync_node_entry(struct f2fs_sb_info * sbi,struct page * page)327  static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
328  							struct page *page)
329  {
330  	struct fsync_node_entry *fn;
331  	unsigned long flags;
332  	unsigned int seq_id;
333  
334  	fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab,
335  					GFP_NOFS, true, NULL);
336  
337  	get_page(page);
338  	fn->page = page;
339  	INIT_LIST_HEAD(&fn->list);
340  
341  	spin_lock_irqsave(&sbi->fsync_node_lock, flags);
342  	list_add_tail(&fn->list, &sbi->fsync_node_list);
343  	fn->seq_id = sbi->fsync_seg_id++;
344  	seq_id = fn->seq_id;
345  	sbi->fsync_node_num++;
346  	spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
347  
348  	return seq_id;
349  }
350  
f2fs_del_fsync_node_entry(struct f2fs_sb_info * sbi,struct page * page)351  void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
352  {
353  	struct fsync_node_entry *fn;
354  	unsigned long flags;
355  
356  	spin_lock_irqsave(&sbi->fsync_node_lock, flags);
357  	list_for_each_entry(fn, &sbi->fsync_node_list, list) {
358  		if (fn->page == page) {
359  			list_del(&fn->list);
360  			sbi->fsync_node_num--;
361  			spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
362  			kmem_cache_free(fsync_node_entry_slab, fn);
363  			put_page(page);
364  			return;
365  		}
366  	}
367  	spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
368  	f2fs_bug_on(sbi, 1);
369  }
370  
f2fs_reset_fsync_node_info(struct f2fs_sb_info * sbi)371  void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
372  {
373  	unsigned long flags;
374  
375  	spin_lock_irqsave(&sbi->fsync_node_lock, flags);
376  	sbi->fsync_seg_id = 0;
377  	spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
378  }
379  
f2fs_need_dentry_mark(struct f2fs_sb_info * sbi,nid_t nid)380  int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
381  {
382  	struct f2fs_nm_info *nm_i = NM_I(sbi);
383  	struct nat_entry *e;
384  	bool need = false;
385  
386  	f2fs_down_read(&nm_i->nat_tree_lock);
387  	e = __lookup_nat_cache(nm_i, nid);
388  	if (e) {
389  		if (!get_nat_flag(e, IS_CHECKPOINTED) &&
390  				!get_nat_flag(e, HAS_FSYNCED_INODE))
391  			need = true;
392  	}
393  	f2fs_up_read(&nm_i->nat_tree_lock);
394  	return need;
395  }
396  
f2fs_is_checkpointed_node(struct f2fs_sb_info * sbi,nid_t nid)397  bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
398  {
399  	struct f2fs_nm_info *nm_i = NM_I(sbi);
400  	struct nat_entry *e;
401  	bool is_cp = true;
402  
403  	f2fs_down_read(&nm_i->nat_tree_lock);
404  	e = __lookup_nat_cache(nm_i, nid);
405  	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
406  		is_cp = false;
407  	f2fs_up_read(&nm_i->nat_tree_lock);
408  	return is_cp;
409  }
410  
f2fs_need_inode_block_update(struct f2fs_sb_info * sbi,nid_t ino)411  bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
412  {
413  	struct f2fs_nm_info *nm_i = NM_I(sbi);
414  	struct nat_entry *e;
415  	bool need_update = true;
416  
417  	f2fs_down_read(&nm_i->nat_tree_lock);
418  	e = __lookup_nat_cache(nm_i, ino);
419  	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
420  			(get_nat_flag(e, IS_CHECKPOINTED) ||
421  			 get_nat_flag(e, HAS_FSYNCED_INODE)))
422  		need_update = false;
423  	f2fs_up_read(&nm_i->nat_tree_lock);
424  	return need_update;
425  }
426  
427  /* must be locked by nat_tree_lock */
cache_nat_entry(struct f2fs_sb_info * sbi,nid_t nid,struct f2fs_nat_entry * ne)428  static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
429  						struct f2fs_nat_entry *ne)
430  {
431  	struct f2fs_nm_info *nm_i = NM_I(sbi);
432  	struct nat_entry *new, *e;
433  
434  	/* Let's mitigate lock contention of nat_tree_lock during checkpoint */
435  	if (f2fs_rwsem_is_locked(&sbi->cp_global_sem))
436  		return;
437  
438  	new = __alloc_nat_entry(sbi, nid, false);
439  	if (!new)
440  		return;
441  
442  	f2fs_down_write(&nm_i->nat_tree_lock);
443  	e = __lookup_nat_cache(nm_i, nid);
444  	if (!e)
445  		e = __init_nat_entry(nm_i, new, ne, false);
446  	else
447  		f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
448  				nat_get_blkaddr(e) !=
449  					le32_to_cpu(ne->block_addr) ||
450  				nat_get_version(e) != ne->version);
451  	f2fs_up_write(&nm_i->nat_tree_lock);
452  	if (e != new)
453  		__free_nat_entry(new);
454  }
455  
set_node_addr(struct f2fs_sb_info * sbi,struct node_info * ni,block_t new_blkaddr,bool fsync_done)456  static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
457  			block_t new_blkaddr, bool fsync_done)
458  {
459  	struct f2fs_nm_info *nm_i = NM_I(sbi);
460  	struct nat_entry *e;
461  	struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true);
462  
463  	f2fs_down_write(&nm_i->nat_tree_lock);
464  	e = __lookup_nat_cache(nm_i, ni->nid);
465  	if (!e) {
466  		e = __init_nat_entry(nm_i, new, NULL, true);
467  		copy_node_info(&e->ni, ni);
468  		f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
469  	} else if (new_blkaddr == NEW_ADDR) {
470  		/*
471  		 * when nid is reallocated,
472  		 * previous nat entry can be remained in nat cache.
473  		 * So, reinitialize it with new information.
474  		 */
475  		copy_node_info(&e->ni, ni);
476  		f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
477  	}
478  	/* let's free early to reduce memory consumption */
479  	if (e != new)
480  		__free_nat_entry(new);
481  
482  	/* sanity check */
483  	f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
484  	f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
485  			new_blkaddr == NULL_ADDR);
486  	f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
487  			new_blkaddr == NEW_ADDR);
488  	f2fs_bug_on(sbi, __is_valid_data_blkaddr(nat_get_blkaddr(e)) &&
489  			new_blkaddr == NEW_ADDR);
490  
491  	/* increment version no as node is removed */
492  	if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
493  		unsigned char version = nat_get_version(e);
494  
495  		nat_set_version(e, inc_node_version(version));
496  	}
497  
498  	/* change address */
499  	nat_set_blkaddr(e, new_blkaddr);
500  	if (!__is_valid_data_blkaddr(new_blkaddr))
501  		set_nat_flag(e, IS_CHECKPOINTED, false);
502  	__set_nat_cache_dirty(nm_i, e);
503  
504  	/* update fsync_mark if its inode nat entry is still alive */
505  	if (ni->nid != ni->ino)
506  		e = __lookup_nat_cache(nm_i, ni->ino);
507  	if (e) {
508  		if (fsync_done && ni->nid == ni->ino)
509  			set_nat_flag(e, HAS_FSYNCED_INODE, true);
510  		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
511  	}
512  	f2fs_up_write(&nm_i->nat_tree_lock);
513  }
514  
f2fs_try_to_free_nats(struct f2fs_sb_info * sbi,int nr_shrink)515  int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
516  {
517  	struct f2fs_nm_info *nm_i = NM_I(sbi);
518  	int nr = nr_shrink;
519  
520  	if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock))
521  		return 0;
522  
523  	spin_lock(&nm_i->nat_list_lock);
524  	while (nr_shrink) {
525  		struct nat_entry *ne;
526  
527  		if (list_empty(&nm_i->nat_entries))
528  			break;
529  
530  		ne = list_first_entry(&nm_i->nat_entries,
531  					struct nat_entry, list);
532  		list_del(&ne->list);
533  		spin_unlock(&nm_i->nat_list_lock);
534  
535  		__del_from_nat_cache(nm_i, ne);
536  		nr_shrink--;
537  
538  		spin_lock(&nm_i->nat_list_lock);
539  	}
540  	spin_unlock(&nm_i->nat_list_lock);
541  
542  	f2fs_up_write(&nm_i->nat_tree_lock);
543  	return nr - nr_shrink;
544  }
545  
f2fs_get_node_info(struct f2fs_sb_info * sbi,nid_t nid,struct node_info * ni,bool checkpoint_context)546  int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
547  				struct node_info *ni, bool checkpoint_context)
548  {
549  	struct f2fs_nm_info *nm_i = NM_I(sbi);
550  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
551  	struct f2fs_journal *journal = curseg->journal;
552  	nid_t start_nid = START_NID(nid);
553  	struct f2fs_nat_block *nat_blk;
554  	struct page *page = NULL;
555  	struct f2fs_nat_entry ne;
556  	struct nat_entry *e;
557  	pgoff_t index;
558  	block_t blkaddr;
559  	int i;
560  
561  	ni->nid = nid;
562  retry:
563  	/* Check nat cache */
564  	f2fs_down_read(&nm_i->nat_tree_lock);
565  	e = __lookup_nat_cache(nm_i, nid);
566  	if (e) {
567  		ni->ino = nat_get_ino(e);
568  		ni->blk_addr = nat_get_blkaddr(e);
569  		ni->version = nat_get_version(e);
570  		f2fs_up_read(&nm_i->nat_tree_lock);
571  		return 0;
572  	}
573  
574  	/*
575  	 * Check current segment summary by trying to grab journal_rwsem first.
576  	 * This sem is on the critical path on the checkpoint requiring the above
577  	 * nat_tree_lock. Therefore, we should retry, if we failed to grab here
578  	 * while not bothering checkpoint.
579  	 */
580  	if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) {
581  		down_read(&curseg->journal_rwsem);
582  	} else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) ||
583  				!down_read_trylock(&curseg->journal_rwsem)) {
584  		f2fs_up_read(&nm_i->nat_tree_lock);
585  		goto retry;
586  	}
587  
588  	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
589  	if (i >= 0) {
590  		ne = nat_in_journal(journal, i);
591  		node_info_from_raw_nat(ni, &ne);
592  	}
593  	up_read(&curseg->journal_rwsem);
594  	if (i >= 0) {
595  		f2fs_up_read(&nm_i->nat_tree_lock);
596  		goto cache;
597  	}
598  
599  	/* Fill node_info from nat page */
600  	index = current_nat_addr(sbi, nid);
601  	f2fs_up_read(&nm_i->nat_tree_lock);
602  
603  	page = f2fs_get_meta_page(sbi, index);
604  	if (IS_ERR(page))
605  		return PTR_ERR(page);
606  
607  	nat_blk = (struct f2fs_nat_block *)page_address(page);
608  	ne = nat_blk->entries[nid - start_nid];
609  	node_info_from_raw_nat(ni, &ne);
610  	f2fs_put_page(page, 1);
611  cache:
612  	blkaddr = le32_to_cpu(ne.block_addr);
613  	if (__is_valid_data_blkaddr(blkaddr) &&
614  		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
615  		return -EFAULT;
616  
617  	/* cache nat entry */
618  	cache_nat_entry(sbi, nid, &ne);
619  	return 0;
620  }
621  
622  /*
623   * readahead MAX_RA_NODE number of node pages.
624   */
f2fs_ra_node_pages(struct page * parent,int start,int n)625  static void f2fs_ra_node_pages(struct page *parent, int start, int n)
626  {
627  	struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
628  	struct blk_plug plug;
629  	int i, end;
630  	nid_t nid;
631  
632  	blk_start_plug(&plug);
633  
634  	/* Then, try readahead for siblings of the desired node */
635  	end = start + n;
636  	end = min(end, (int)NIDS_PER_BLOCK);
637  	for (i = start; i < end; i++) {
638  		nid = get_nid(parent, i, false);
639  		f2fs_ra_node_page(sbi, nid);
640  	}
641  
642  	blk_finish_plug(&plug);
643  }
644  
f2fs_get_next_page_offset(struct dnode_of_data * dn,pgoff_t pgofs)645  pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
646  {
647  	const long direct_index = ADDRS_PER_INODE(dn->inode);
648  	const long direct_blks = ADDRS_PER_BLOCK(dn->inode);
649  	const long indirect_blks = ADDRS_PER_BLOCK(dn->inode) * NIDS_PER_BLOCK;
650  	unsigned int skipped_unit = ADDRS_PER_BLOCK(dn->inode);
651  	int cur_level = dn->cur_level;
652  	int max_level = dn->max_level;
653  	pgoff_t base = 0;
654  
655  	if (!dn->max_level)
656  		return pgofs + 1;
657  
658  	while (max_level-- > cur_level)
659  		skipped_unit *= NIDS_PER_BLOCK;
660  
661  	switch (dn->max_level) {
662  	case 3:
663  		base += 2 * indirect_blks;
664  		fallthrough;
665  	case 2:
666  		base += 2 * direct_blks;
667  		fallthrough;
668  	case 1:
669  		base += direct_index;
670  		break;
671  	default:
672  		f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
673  	}
674  
675  	return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
676  }
677  
678  /*
679   * The maximum depth is four.
680   * Offset[0] will have raw inode offset.
681   */
get_node_path(struct inode * inode,long block,int offset[4],unsigned int noffset[4])682  static int get_node_path(struct inode *inode, long block,
683  				int offset[4], unsigned int noffset[4])
684  {
685  	const long direct_index = ADDRS_PER_INODE(inode);
686  	const long direct_blks = ADDRS_PER_BLOCK(inode);
687  	const long dptrs_per_blk = NIDS_PER_BLOCK;
688  	const long indirect_blks = ADDRS_PER_BLOCK(inode) * NIDS_PER_BLOCK;
689  	const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
690  	int n = 0;
691  	int level = 0;
692  
693  	noffset[0] = 0;
694  
695  	if (block < direct_index) {
696  		offset[n] = block;
697  		goto got;
698  	}
699  	block -= direct_index;
700  	if (block < direct_blks) {
701  		offset[n++] = NODE_DIR1_BLOCK;
702  		noffset[n] = 1;
703  		offset[n] = block;
704  		level = 1;
705  		goto got;
706  	}
707  	block -= direct_blks;
708  	if (block < direct_blks) {
709  		offset[n++] = NODE_DIR2_BLOCK;
710  		noffset[n] = 2;
711  		offset[n] = block;
712  		level = 1;
713  		goto got;
714  	}
715  	block -= direct_blks;
716  	if (block < indirect_blks) {
717  		offset[n++] = NODE_IND1_BLOCK;
718  		noffset[n] = 3;
719  		offset[n++] = block / direct_blks;
720  		noffset[n] = 4 + offset[n - 1];
721  		offset[n] = block % direct_blks;
722  		level = 2;
723  		goto got;
724  	}
725  	block -= indirect_blks;
726  	if (block < indirect_blks) {
727  		offset[n++] = NODE_IND2_BLOCK;
728  		noffset[n] = 4 + dptrs_per_blk;
729  		offset[n++] = block / direct_blks;
730  		noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
731  		offset[n] = block % direct_blks;
732  		level = 2;
733  		goto got;
734  	}
735  	block -= indirect_blks;
736  	if (block < dindirect_blks) {
737  		offset[n++] = NODE_DIND_BLOCK;
738  		noffset[n] = 5 + (dptrs_per_blk * 2);
739  		offset[n++] = block / indirect_blks;
740  		noffset[n] = 6 + (dptrs_per_blk * 2) +
741  			      offset[n - 1] * (dptrs_per_blk + 1);
742  		offset[n++] = (block / direct_blks) % dptrs_per_blk;
743  		noffset[n] = 7 + (dptrs_per_blk * 2) +
744  			      offset[n - 2] * (dptrs_per_blk + 1) +
745  			      offset[n - 1];
746  		offset[n] = block % direct_blks;
747  		level = 3;
748  		goto got;
749  	} else {
750  		return -E2BIG;
751  	}
752  got:
753  	return level;
754  }
755  
756  /*
757   * Caller should call f2fs_put_dnode(dn).
758   * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
759   * f2fs_unlock_op() only if mode is set with ALLOC_NODE.
760   */
f2fs_get_dnode_of_data(struct dnode_of_data * dn,pgoff_t index,int mode)761  int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
762  {
763  	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
764  	struct page *npage[4];
765  	struct page *parent = NULL;
766  	int offset[4];
767  	unsigned int noffset[4];
768  	nid_t nids[4];
769  	int level, i = 0;
770  	int err = 0;
771  
772  	level = get_node_path(dn->inode, index, offset, noffset);
773  	if (level < 0)
774  		return level;
775  
776  	nids[0] = dn->inode->i_ino;
777  	npage[0] = dn->inode_page;
778  
779  	if (!npage[0]) {
780  		npage[0] = f2fs_get_node_page(sbi, nids[0]);
781  		if (IS_ERR(npage[0]))
782  			return PTR_ERR(npage[0]);
783  	}
784  
785  	/* if inline_data is set, should not report any block indices */
786  	if (f2fs_has_inline_data(dn->inode) && index) {
787  		err = -ENOENT;
788  		f2fs_put_page(npage[0], 1);
789  		goto release_out;
790  	}
791  
792  	parent = npage[0];
793  	if (level != 0)
794  		nids[1] = get_nid(parent, offset[0], true);
795  	dn->inode_page = npage[0];
796  	dn->inode_page_locked = true;
797  
798  	/* get indirect or direct nodes */
799  	for (i = 1; i <= level; i++) {
800  		bool done = false;
801  
802  		if (!nids[i] && mode == ALLOC_NODE) {
803  			/* alloc new node */
804  			if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
805  				err = -ENOSPC;
806  				goto release_pages;
807  			}
808  
809  			dn->nid = nids[i];
810  			npage[i] = f2fs_new_node_page(dn, noffset[i]);
811  			if (IS_ERR(npage[i])) {
812  				f2fs_alloc_nid_failed(sbi, nids[i]);
813  				err = PTR_ERR(npage[i]);
814  				goto release_pages;
815  			}
816  
817  			set_nid(parent, offset[i - 1], nids[i], i == 1);
818  			f2fs_alloc_nid_done(sbi, nids[i]);
819  			done = true;
820  		} else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
821  			npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]);
822  			if (IS_ERR(npage[i])) {
823  				err = PTR_ERR(npage[i]);
824  				goto release_pages;
825  			}
826  			done = true;
827  		}
828  		if (i == 1) {
829  			dn->inode_page_locked = false;
830  			unlock_page(parent);
831  		} else {
832  			f2fs_put_page(parent, 1);
833  		}
834  
835  		if (!done) {
836  			npage[i] = f2fs_get_node_page(sbi, nids[i]);
837  			if (IS_ERR(npage[i])) {
838  				err = PTR_ERR(npage[i]);
839  				f2fs_put_page(npage[0], 0);
840  				goto release_out;
841  			}
842  		}
843  		if (i < level) {
844  			parent = npage[i];
845  			nids[i + 1] = get_nid(parent, offset[i], false);
846  		}
847  	}
848  	dn->nid = nids[level];
849  	dn->ofs_in_node = offset[level];
850  	dn->node_page = npage[level];
851  	dn->data_blkaddr = f2fs_data_blkaddr(dn);
852  
853  	if (is_inode_flag_set(dn->inode, FI_COMPRESSED_FILE) &&
854  					f2fs_sb_has_readonly(sbi)) {
855  		unsigned int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
856  		unsigned int ofs_in_node = dn->ofs_in_node;
857  		pgoff_t fofs = index;
858  		unsigned int c_len;
859  		block_t blkaddr;
860  
861  		/* should align fofs and ofs_in_node to cluster_size */
862  		if (fofs % cluster_size) {
863  			fofs = round_down(fofs, cluster_size);
864  			ofs_in_node = round_down(ofs_in_node, cluster_size);
865  		}
866  
867  		c_len = f2fs_cluster_blocks_are_contiguous(dn, ofs_in_node);
868  		if (!c_len)
869  			goto out;
870  
871  		blkaddr = data_blkaddr(dn->inode, dn->node_page, ofs_in_node);
872  		if (blkaddr == COMPRESS_ADDR)
873  			blkaddr = data_blkaddr(dn->inode, dn->node_page,
874  						ofs_in_node + 1);
875  
876  		f2fs_update_read_extent_tree_range_compressed(dn->inode,
877  					fofs, blkaddr, cluster_size, c_len);
878  	}
879  out:
880  	return 0;
881  
882  release_pages:
883  	f2fs_put_page(parent, 1);
884  	if (i > 1)
885  		f2fs_put_page(npage[0], 0);
886  release_out:
887  	dn->inode_page = NULL;
888  	dn->node_page = NULL;
889  	if (err == -ENOENT) {
890  		dn->cur_level = i;
891  		dn->max_level = level;
892  		dn->ofs_in_node = offset[level];
893  	}
894  	return err;
895  }
896  
truncate_node(struct dnode_of_data * dn)897  static int truncate_node(struct dnode_of_data *dn)
898  {
899  	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
900  	struct node_info ni;
901  	int err;
902  	pgoff_t index;
903  
904  	err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
905  	if (err)
906  		return err;
907  
908  	/* Deallocate node address */
909  	f2fs_invalidate_blocks(sbi, ni.blk_addr);
910  	dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
911  	set_node_addr(sbi, &ni, NULL_ADDR, false);
912  
913  	if (dn->nid == dn->inode->i_ino) {
914  		f2fs_remove_orphan_inode(sbi, dn->nid);
915  		dec_valid_inode_count(sbi);
916  		f2fs_inode_synced(dn->inode);
917  	}
918  
919  	clear_node_page_dirty(dn->node_page);
920  	set_sbi_flag(sbi, SBI_IS_DIRTY);
921  
922  	index = page_folio(dn->node_page)->index;
923  	f2fs_put_page(dn->node_page, 1);
924  
925  	invalidate_mapping_pages(NODE_MAPPING(sbi),
926  			index, index);
927  
928  	dn->node_page = NULL;
929  	trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
930  
931  	return 0;
932  }
933  
truncate_dnode(struct dnode_of_data * dn)934  static int truncate_dnode(struct dnode_of_data *dn)
935  {
936  	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
937  	struct page *page;
938  	int err;
939  
940  	if (dn->nid == 0)
941  		return 1;
942  
943  	/* get direct node */
944  	page = f2fs_get_node_page(sbi, dn->nid);
945  	if (PTR_ERR(page) == -ENOENT)
946  		return 1;
947  	else if (IS_ERR(page))
948  		return PTR_ERR(page);
949  
950  	if (IS_INODE(page) || ino_of_node(page) != dn->inode->i_ino) {
951  		f2fs_err(sbi, "incorrect node reference, ino: %lu, nid: %u, ino_of_node: %u",
952  				dn->inode->i_ino, dn->nid, ino_of_node(page));
953  		set_sbi_flag(sbi, SBI_NEED_FSCK);
954  		f2fs_handle_error(sbi, ERROR_INVALID_NODE_REFERENCE);
955  		f2fs_put_page(page, 1);
956  		return -EFSCORRUPTED;
957  	}
958  
959  	/* Make dnode_of_data for parameter */
960  	dn->node_page = page;
961  	dn->ofs_in_node = 0;
962  	f2fs_truncate_data_blocks_range(dn, ADDRS_PER_BLOCK(dn->inode));
963  	err = truncate_node(dn);
964  	if (err) {
965  		f2fs_put_page(page, 1);
966  		return err;
967  	}
968  
969  	return 1;
970  }
971  
truncate_nodes(struct dnode_of_data * dn,unsigned int nofs,int ofs,int depth)972  static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
973  						int ofs, int depth)
974  {
975  	struct dnode_of_data rdn = *dn;
976  	struct page *page;
977  	struct f2fs_node *rn;
978  	nid_t child_nid;
979  	unsigned int child_nofs;
980  	int freed = 0;
981  	int i, ret;
982  
983  	if (dn->nid == 0)
984  		return NIDS_PER_BLOCK + 1;
985  
986  	trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);
987  
988  	page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
989  	if (IS_ERR(page)) {
990  		trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
991  		return PTR_ERR(page);
992  	}
993  
994  	f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK);
995  
996  	rn = F2FS_NODE(page);
997  	if (depth < 3) {
998  		for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
999  			child_nid = le32_to_cpu(rn->in.nid[i]);
1000  			if (child_nid == 0)
1001  				continue;
1002  			rdn.nid = child_nid;
1003  			ret = truncate_dnode(&rdn);
1004  			if (ret < 0)
1005  				goto out_err;
1006  			if (set_nid(page, i, 0, false))
1007  				dn->node_changed = true;
1008  		}
1009  	} else {
1010  		child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
1011  		for (i = ofs; i < NIDS_PER_BLOCK; i++) {
1012  			child_nid = le32_to_cpu(rn->in.nid[i]);
1013  			if (child_nid == 0) {
1014  				child_nofs += NIDS_PER_BLOCK + 1;
1015  				continue;
1016  			}
1017  			rdn.nid = child_nid;
1018  			ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
1019  			if (ret == (NIDS_PER_BLOCK + 1)) {
1020  				if (set_nid(page, i, 0, false))
1021  					dn->node_changed = true;
1022  				child_nofs += ret;
1023  			} else if (ret < 0 && ret != -ENOENT) {
1024  				goto out_err;
1025  			}
1026  		}
1027  		freed = child_nofs;
1028  	}
1029  
1030  	if (!ofs) {
1031  		/* remove current indirect node */
1032  		dn->node_page = page;
1033  		ret = truncate_node(dn);
1034  		if (ret)
1035  			goto out_err;
1036  		freed++;
1037  	} else {
1038  		f2fs_put_page(page, 1);
1039  	}
1040  	trace_f2fs_truncate_nodes_exit(dn->inode, freed);
1041  	return freed;
1042  
1043  out_err:
1044  	f2fs_put_page(page, 1);
1045  	trace_f2fs_truncate_nodes_exit(dn->inode, ret);
1046  	return ret;
1047  }
1048  
truncate_partial_nodes(struct dnode_of_data * dn,struct f2fs_inode * ri,int * offset,int depth)1049  static int truncate_partial_nodes(struct dnode_of_data *dn,
1050  			struct f2fs_inode *ri, int *offset, int depth)
1051  {
1052  	struct page *pages[2];
1053  	nid_t nid[3];
1054  	nid_t child_nid;
1055  	int err = 0;
1056  	int i;
1057  	int idx = depth - 2;
1058  
1059  	nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
1060  	if (!nid[0])
1061  		return 0;
1062  
1063  	/* get indirect nodes in the path */
1064  	for (i = 0; i < idx + 1; i++) {
1065  		/* reference count'll be increased */
1066  		pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]);
1067  		if (IS_ERR(pages[i])) {
1068  			err = PTR_ERR(pages[i]);
1069  			idx = i - 1;
1070  			goto fail;
1071  		}
1072  		nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
1073  	}
1074  
1075  	f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
1076  
1077  	/* free direct nodes linked to a partial indirect node */
1078  	for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
1079  		child_nid = get_nid(pages[idx], i, false);
1080  		if (!child_nid)
1081  			continue;
1082  		dn->nid = child_nid;
1083  		err = truncate_dnode(dn);
1084  		if (err < 0)
1085  			goto fail;
1086  		if (set_nid(pages[idx], i, 0, false))
1087  			dn->node_changed = true;
1088  	}
1089  
1090  	if (offset[idx + 1] == 0) {
1091  		dn->node_page = pages[idx];
1092  		dn->nid = nid[idx];
1093  		err = truncate_node(dn);
1094  		if (err)
1095  			goto fail;
1096  	} else {
1097  		f2fs_put_page(pages[idx], 1);
1098  	}
1099  	offset[idx]++;
1100  	offset[idx + 1] = 0;
1101  	idx--;
1102  fail:
1103  	for (i = idx; i >= 0; i--)
1104  		f2fs_put_page(pages[i], 1);
1105  
1106  	trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);
1107  
1108  	return err;
1109  }
1110  
1111  /*
1112   * All the block addresses of data and nodes should be nullified.
1113   */
f2fs_truncate_inode_blocks(struct inode * inode,pgoff_t from)1114  int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
1115  {
1116  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1117  	int err = 0, cont = 1;
1118  	int level, offset[4], noffset[4];
1119  	unsigned int nofs = 0;
1120  	struct f2fs_inode *ri;
1121  	struct dnode_of_data dn;
1122  	struct page *page;
1123  
1124  	trace_f2fs_truncate_inode_blocks_enter(inode, from);
1125  
1126  	level = get_node_path(inode, from, offset, noffset);
1127  	if (level < 0) {
1128  		trace_f2fs_truncate_inode_blocks_exit(inode, level);
1129  		return level;
1130  	}
1131  
1132  	page = f2fs_get_node_page(sbi, inode->i_ino);
1133  	if (IS_ERR(page)) {
1134  		trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
1135  		return PTR_ERR(page);
1136  	}
1137  
1138  	set_new_dnode(&dn, inode, page, NULL, 0);
1139  	unlock_page(page);
1140  
1141  	ri = F2FS_INODE(page);
1142  	switch (level) {
1143  	case 0:
1144  	case 1:
1145  		nofs = noffset[1];
1146  		break;
1147  	case 2:
1148  		nofs = noffset[1];
1149  		if (!offset[level - 1])
1150  			goto skip_partial;
1151  		err = truncate_partial_nodes(&dn, ri, offset, level);
1152  		if (err < 0 && err != -ENOENT)
1153  			goto fail;
1154  		nofs += 1 + NIDS_PER_BLOCK;
1155  		break;
1156  	case 3:
1157  		nofs = 5 + 2 * NIDS_PER_BLOCK;
1158  		if (!offset[level - 1])
1159  			goto skip_partial;
1160  		err = truncate_partial_nodes(&dn, ri, offset, level);
1161  		if (err < 0 && err != -ENOENT)
1162  			goto fail;
1163  		break;
1164  	default:
1165  		BUG();
1166  	}
1167  
1168  skip_partial:
1169  	while (cont) {
1170  		dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
1171  		switch (offset[0]) {
1172  		case NODE_DIR1_BLOCK:
1173  		case NODE_DIR2_BLOCK:
1174  			err = truncate_dnode(&dn);
1175  			break;
1176  
1177  		case NODE_IND1_BLOCK:
1178  		case NODE_IND2_BLOCK:
1179  			err = truncate_nodes(&dn, nofs, offset[1], 2);
1180  			break;
1181  
1182  		case NODE_DIND_BLOCK:
1183  			err = truncate_nodes(&dn, nofs, offset[1], 3);
1184  			cont = 0;
1185  			break;
1186  
1187  		default:
1188  			BUG();
1189  		}
1190  		if (err == -ENOENT) {
1191  			set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK);
1192  			f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
1193  			f2fs_err_ratelimited(sbi,
1194  				"truncate node fail, ino:%lu, nid:%u, "
1195  				"offset[0]:%d, offset[1]:%d, nofs:%d",
1196  				inode->i_ino, dn.nid, offset[0],
1197  				offset[1], nofs);
1198  			err = 0;
1199  		}
1200  		if (err < 0)
1201  			goto fail;
1202  		if (offset[1] == 0 &&
1203  				ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
1204  			lock_page(page);
1205  			BUG_ON(page->mapping != NODE_MAPPING(sbi));
1206  			f2fs_wait_on_page_writeback(page, NODE, true, true);
1207  			ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
1208  			set_page_dirty(page);
1209  			unlock_page(page);
1210  		}
1211  		offset[1] = 0;
1212  		offset[0]++;
1213  		nofs += err;
1214  	}
1215  fail:
1216  	f2fs_put_page(page, 0);
1217  	trace_f2fs_truncate_inode_blocks_exit(inode, err);
1218  	return err > 0 ? 0 : err;
1219  }
1220  
1221  /* caller must lock inode page */
f2fs_truncate_xattr_node(struct inode * inode)1222  int f2fs_truncate_xattr_node(struct inode *inode)
1223  {
1224  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1225  	nid_t nid = F2FS_I(inode)->i_xattr_nid;
1226  	struct dnode_of_data dn;
1227  	struct page *npage;
1228  	int err;
1229  
1230  	if (!nid)
1231  		return 0;
1232  
1233  	npage = f2fs_get_node_page(sbi, nid);
1234  	if (IS_ERR(npage))
1235  		return PTR_ERR(npage);
1236  
1237  	set_new_dnode(&dn, inode, NULL, npage, nid);
1238  	err = truncate_node(&dn);
1239  	if (err) {
1240  		f2fs_put_page(npage, 1);
1241  		return err;
1242  	}
1243  
1244  	f2fs_i_xnid_write(inode, 0);
1245  
1246  	return 0;
1247  }
1248  
1249  /*
1250   * Caller should grab and release a rwsem by calling f2fs_lock_op() and
1251   * f2fs_unlock_op().
1252   */
f2fs_remove_inode_page(struct inode * inode)1253  int f2fs_remove_inode_page(struct inode *inode)
1254  {
1255  	struct dnode_of_data dn;
1256  	int err;
1257  
1258  	set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
1259  	err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
1260  	if (err)
1261  		return err;
1262  
1263  	err = f2fs_truncate_xattr_node(inode);
1264  	if (err) {
1265  		f2fs_put_dnode(&dn);
1266  		return err;
1267  	}
1268  
1269  	/* remove potential inline_data blocks */
1270  	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
1271  				S_ISLNK(inode->i_mode))
1272  		f2fs_truncate_data_blocks_range(&dn, 1);
1273  
1274  	/* 0 is possible, after f2fs_new_inode() has failed */
1275  	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
1276  		f2fs_put_dnode(&dn);
1277  		return -EIO;
1278  	}
1279  
1280  	if (unlikely(inode->i_blocks != 0 && inode->i_blocks != 8)) {
1281  		f2fs_warn(F2FS_I_SB(inode),
1282  			"f2fs_remove_inode_page: inconsistent i_blocks, ino:%lu, iblocks:%llu",
1283  			inode->i_ino, (unsigned long long)inode->i_blocks);
1284  		set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK);
1285  	}
1286  
1287  	/* will put inode & node pages */
1288  	err = truncate_node(&dn);
1289  	if (err) {
1290  		f2fs_put_dnode(&dn);
1291  		return err;
1292  	}
1293  	return 0;
1294  }
1295  
f2fs_new_inode_page(struct inode * inode)1296  struct page *f2fs_new_inode_page(struct inode *inode)
1297  {
1298  	struct dnode_of_data dn;
1299  
1300  	/* allocate inode page for new inode */
1301  	set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
1302  
1303  	/* caller should f2fs_put_page(page, 1); */
1304  	return f2fs_new_node_page(&dn, 0);
1305  }
1306  
f2fs_new_node_page(struct dnode_of_data * dn,unsigned int ofs)1307  struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
1308  {
1309  	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1310  	struct node_info new_ni;
1311  	struct page *page;
1312  	int err;
1313  
1314  	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1315  		return ERR_PTR(-EPERM);
1316  
1317  	page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
1318  	if (!page)
1319  		return ERR_PTR(-ENOMEM);
1320  
1321  	if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
1322  		goto fail;
1323  
1324  #ifdef CONFIG_F2FS_CHECK_FS
1325  	err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false);
1326  	if (err) {
1327  		dec_valid_node_count(sbi, dn->inode, !ofs);
1328  		goto fail;
1329  	}
1330  	if (unlikely(new_ni.blk_addr != NULL_ADDR)) {
1331  		err = -EFSCORRUPTED;
1332  		dec_valid_node_count(sbi, dn->inode, !ofs);
1333  		set_sbi_flag(sbi, SBI_NEED_FSCK);
1334  		f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR);
1335  		goto fail;
1336  	}
1337  #endif
1338  	new_ni.nid = dn->nid;
1339  	new_ni.ino = dn->inode->i_ino;
1340  	new_ni.blk_addr = NULL_ADDR;
1341  	new_ni.flag = 0;
1342  	new_ni.version = 0;
1343  	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1344  
1345  	f2fs_wait_on_page_writeback(page, NODE, true, true);
1346  	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
1347  	set_cold_node(page, S_ISDIR(dn->inode->i_mode));
1348  	if (!PageUptodate(page))
1349  		SetPageUptodate(page);
1350  	if (set_page_dirty(page))
1351  		dn->node_changed = true;
1352  
1353  	if (f2fs_has_xattr_block(ofs))
1354  		f2fs_i_xnid_write(dn->inode, dn->nid);
1355  
1356  	if (ofs == 0)
1357  		inc_valid_inode_count(sbi);
1358  	return page;
1359  fail:
1360  	clear_node_page_dirty(page);
1361  	f2fs_put_page(page, 1);
1362  	return ERR_PTR(err);
1363  }
1364  
1365  /*
1366   * Caller should do after getting the following values.
1367   * 0: f2fs_put_page(page, 0)
1368   * LOCKED_PAGE or error: f2fs_put_page(page, 1)
1369   */
read_node_page(struct page * page,blk_opf_t op_flags)1370  static int read_node_page(struct page *page, blk_opf_t op_flags)
1371  {
1372  	struct folio *folio = page_folio(page);
1373  	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1374  	struct node_info ni;
1375  	struct f2fs_io_info fio = {
1376  		.sbi = sbi,
1377  		.type = NODE,
1378  		.op = REQ_OP_READ,
1379  		.op_flags = op_flags,
1380  		.page = page,
1381  		.encrypted_page = NULL,
1382  	};
1383  	int err;
1384  
1385  	if (folio_test_uptodate(folio)) {
1386  		if (!f2fs_inode_chksum_verify(sbi, page)) {
1387  			folio_clear_uptodate(folio);
1388  			return -EFSBADCRC;
1389  		}
1390  		return LOCKED_PAGE;
1391  	}
1392  
1393  	err = f2fs_get_node_info(sbi, folio->index, &ni, false);
1394  	if (err)
1395  		return err;
1396  
1397  	/* NEW_ADDR can be seen, after cp_error drops some dirty node pages */
1398  	if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) {
1399  		folio_clear_uptodate(folio);
1400  		return -ENOENT;
1401  	}
1402  
1403  	fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
1404  
1405  	err = f2fs_submit_page_bio(&fio);
1406  
1407  	if (!err)
1408  		f2fs_update_iostat(sbi, NULL, FS_NODE_READ_IO, F2FS_BLKSIZE);
1409  
1410  	return err;
1411  }
1412  
1413  /*
1414   * Readahead a node page
1415   */
f2fs_ra_node_page(struct f2fs_sb_info * sbi,nid_t nid)1416  void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
1417  {
1418  	struct page *apage;
1419  	int err;
1420  
1421  	if (!nid)
1422  		return;
1423  	if (f2fs_check_nid_range(sbi, nid))
1424  		return;
1425  
1426  	apage = xa_load(&NODE_MAPPING(sbi)->i_pages, nid);
1427  	if (apage)
1428  		return;
1429  
1430  	apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
1431  	if (!apage)
1432  		return;
1433  
1434  	err = read_node_page(apage, REQ_RAHEAD);
1435  	f2fs_put_page(apage, err ? 1 : 0);
1436  }
1437  
__get_node_page(struct f2fs_sb_info * sbi,pgoff_t nid,struct page * parent,int start)1438  static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
1439  					struct page *parent, int start)
1440  {
1441  	struct page *page;
1442  	int err;
1443  
1444  	if (!nid)
1445  		return ERR_PTR(-ENOENT);
1446  	if (f2fs_check_nid_range(sbi, nid))
1447  		return ERR_PTR(-EINVAL);
1448  repeat:
1449  	page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
1450  	if (!page)
1451  		return ERR_PTR(-ENOMEM);
1452  
1453  	err = read_node_page(page, 0);
1454  	if (err < 0) {
1455  		goto out_put_err;
1456  	} else if (err == LOCKED_PAGE) {
1457  		err = 0;
1458  		goto page_hit;
1459  	}
1460  
1461  	if (parent)
1462  		f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE);
1463  
1464  	lock_page(page);
1465  
1466  	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1467  		f2fs_put_page(page, 1);
1468  		goto repeat;
1469  	}
1470  
1471  	if (unlikely(!PageUptodate(page))) {
1472  		err = -EIO;
1473  		goto out_err;
1474  	}
1475  
1476  	if (!f2fs_inode_chksum_verify(sbi, page)) {
1477  		err = -EFSBADCRC;
1478  		goto out_err;
1479  	}
1480  page_hit:
1481  	if (likely(nid == nid_of_node(page)))
1482  		return page;
1483  
1484  	f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
1485  			  nid, nid_of_node(page), ino_of_node(page),
1486  			  ofs_of_node(page), cpver_of_node(page),
1487  			  next_blkaddr_of_node(page));
1488  	set_sbi_flag(sbi, SBI_NEED_FSCK);
1489  	f2fs_handle_error(sbi, ERROR_INCONSISTENT_FOOTER);
1490  	err = -EFSCORRUPTED;
1491  out_err:
1492  	ClearPageUptodate(page);
1493  out_put_err:
1494  	/* ENOENT comes from read_node_page which is not an error. */
1495  	if (err != -ENOENT)
1496  		f2fs_handle_page_eio(sbi, page_folio(page), NODE);
1497  	f2fs_put_page(page, 1);
1498  	return ERR_PTR(err);
1499  }
1500  
f2fs_get_node_page(struct f2fs_sb_info * sbi,pgoff_t nid)1501  struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
1502  {
1503  	return __get_node_page(sbi, nid, NULL, 0);
1504  }
1505  
f2fs_get_node_page_ra(struct page * parent,int start)1506  struct page *f2fs_get_node_page_ra(struct page *parent, int start)
1507  {
1508  	struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
1509  	nid_t nid = get_nid(parent, start, false);
1510  
1511  	return __get_node_page(sbi, nid, parent, start);
1512  }
1513  
flush_inline_data(struct f2fs_sb_info * sbi,nid_t ino)1514  static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
1515  {
1516  	struct inode *inode;
1517  	struct page *page;
1518  	int ret;
1519  
1520  	/* should flush inline_data before evict_inode */
1521  	inode = ilookup(sbi->sb, ino);
1522  	if (!inode)
1523  		return;
1524  
1525  	page = f2fs_pagecache_get_page(inode->i_mapping, 0,
1526  					FGP_LOCK|FGP_NOWAIT, 0);
1527  	if (!page)
1528  		goto iput_out;
1529  
1530  	if (!PageUptodate(page))
1531  		goto page_out;
1532  
1533  	if (!PageDirty(page))
1534  		goto page_out;
1535  
1536  	if (!clear_page_dirty_for_io(page))
1537  		goto page_out;
1538  
1539  	ret = f2fs_write_inline_data(inode, page_folio(page));
1540  	inode_dec_dirty_pages(inode);
1541  	f2fs_remove_dirty_inode(inode);
1542  	if (ret)
1543  		set_page_dirty(page);
1544  page_out:
1545  	f2fs_put_page(page, 1);
1546  iput_out:
1547  	iput(inode);
1548  }
1549  
last_fsync_dnode(struct f2fs_sb_info * sbi,nid_t ino)1550  static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
1551  {
1552  	pgoff_t index;
1553  	struct folio_batch fbatch;
1554  	struct page *last_page = NULL;
1555  	int nr_folios;
1556  
1557  	folio_batch_init(&fbatch);
1558  	index = 0;
1559  
1560  	while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
1561  					(pgoff_t)-1, PAGECACHE_TAG_DIRTY,
1562  					&fbatch))) {
1563  		int i;
1564  
1565  		for (i = 0; i < nr_folios; i++) {
1566  			struct page *page = &fbatch.folios[i]->page;
1567  
1568  			if (unlikely(f2fs_cp_error(sbi))) {
1569  				f2fs_put_page(last_page, 0);
1570  				folio_batch_release(&fbatch);
1571  				return ERR_PTR(-EIO);
1572  			}
1573  
1574  			if (!IS_DNODE(page) || !is_cold_node(page))
1575  				continue;
1576  			if (ino_of_node(page) != ino)
1577  				continue;
1578  
1579  			lock_page(page);
1580  
1581  			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1582  continue_unlock:
1583  				unlock_page(page);
1584  				continue;
1585  			}
1586  			if (ino_of_node(page) != ino)
1587  				goto continue_unlock;
1588  
1589  			if (!PageDirty(page)) {
1590  				/* someone wrote it for us */
1591  				goto continue_unlock;
1592  			}
1593  
1594  			if (last_page)
1595  				f2fs_put_page(last_page, 0);
1596  
1597  			get_page(page);
1598  			last_page = page;
1599  			unlock_page(page);
1600  		}
1601  		folio_batch_release(&fbatch);
1602  		cond_resched();
1603  	}
1604  	return last_page;
1605  }
1606  
__write_node_page(struct page * page,bool atomic,bool * submitted,struct writeback_control * wbc,bool do_balance,enum iostat_type io_type,unsigned int * seq_id)1607  static int __write_node_page(struct page *page, bool atomic, bool *submitted,
1608  				struct writeback_control *wbc, bool do_balance,
1609  				enum iostat_type io_type, unsigned int *seq_id)
1610  {
1611  	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1612  	struct folio *folio = page_folio(page);
1613  	nid_t nid;
1614  	struct node_info ni;
1615  	struct f2fs_io_info fio = {
1616  		.sbi = sbi,
1617  		.ino = ino_of_node(page),
1618  		.type = NODE,
1619  		.op = REQ_OP_WRITE,
1620  		.op_flags = wbc_to_write_flags(wbc),
1621  		.page = page,
1622  		.encrypted_page = NULL,
1623  		.submitted = 0,
1624  		.io_type = io_type,
1625  		.io_wbc = wbc,
1626  	};
1627  	unsigned int seq;
1628  
1629  	trace_f2fs_writepage(folio, NODE);
1630  
1631  	if (unlikely(f2fs_cp_error(sbi))) {
1632  		/* keep node pages in remount-ro mode */
1633  		if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
1634  			goto redirty_out;
1635  		folio_clear_uptodate(folio);
1636  		dec_page_count(sbi, F2FS_DIRTY_NODES);
1637  		folio_unlock(folio);
1638  		return 0;
1639  	}
1640  
1641  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1642  		goto redirty_out;
1643  
1644  	if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
1645  			wbc->sync_mode == WB_SYNC_NONE &&
1646  			IS_DNODE(page) && is_cold_node(page))
1647  		goto redirty_out;
1648  
1649  	/* get old block addr of this node page */
1650  	nid = nid_of_node(page);
1651  	f2fs_bug_on(sbi, folio->index != nid);
1652  
1653  	if (f2fs_get_node_info(sbi, nid, &ni, !do_balance))
1654  		goto redirty_out;
1655  
1656  	if (wbc->for_reclaim) {
1657  		if (!f2fs_down_read_trylock(&sbi->node_write))
1658  			goto redirty_out;
1659  	} else {
1660  		f2fs_down_read(&sbi->node_write);
1661  	}
1662  
1663  	/* This page is already truncated */
1664  	if (unlikely(ni.blk_addr == NULL_ADDR)) {
1665  		folio_clear_uptodate(folio);
1666  		dec_page_count(sbi, F2FS_DIRTY_NODES);
1667  		f2fs_up_read(&sbi->node_write);
1668  		folio_unlock(folio);
1669  		return 0;
1670  	}
1671  
1672  	if (__is_valid_data_blkaddr(ni.blk_addr) &&
1673  		!f2fs_is_valid_blkaddr(sbi, ni.blk_addr,
1674  					DATA_GENERIC_ENHANCE)) {
1675  		f2fs_up_read(&sbi->node_write);
1676  		goto redirty_out;
1677  	}
1678  
1679  	if (atomic && !test_opt(sbi, NOBARRIER))
1680  		fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
1681  
1682  	/* should add to global list before clearing PAGECACHE status */
1683  	if (f2fs_in_warm_node_list(sbi, page)) {
1684  		seq = f2fs_add_fsync_node_entry(sbi, page);
1685  		if (seq_id)
1686  			*seq_id = seq;
1687  	}
1688  
1689  	folio_start_writeback(folio);
1690  
1691  	fio.old_blkaddr = ni.blk_addr;
1692  	f2fs_do_write_node_page(nid, &fio);
1693  	set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
1694  	dec_page_count(sbi, F2FS_DIRTY_NODES);
1695  	f2fs_up_read(&sbi->node_write);
1696  
1697  	if (wbc->for_reclaim) {
1698  		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE);
1699  		submitted = NULL;
1700  	}
1701  
1702  	folio_unlock(folio);
1703  
1704  	if (unlikely(f2fs_cp_error(sbi))) {
1705  		f2fs_submit_merged_write(sbi, NODE);
1706  		submitted = NULL;
1707  	}
1708  	if (submitted)
1709  		*submitted = fio.submitted;
1710  
1711  	if (do_balance)
1712  		f2fs_balance_fs(sbi, false);
1713  	return 0;
1714  
1715  redirty_out:
1716  	folio_redirty_for_writepage(wbc, folio);
1717  	return AOP_WRITEPAGE_ACTIVATE;
1718  }
1719  
f2fs_move_node_page(struct page * node_page,int gc_type)1720  int f2fs_move_node_page(struct page *node_page, int gc_type)
1721  {
1722  	int err = 0;
1723  
1724  	if (gc_type == FG_GC) {
1725  		struct writeback_control wbc = {
1726  			.sync_mode = WB_SYNC_ALL,
1727  			.nr_to_write = 1,
1728  			.for_reclaim = 0,
1729  		};
1730  
1731  		f2fs_wait_on_page_writeback(node_page, NODE, true, true);
1732  
1733  		set_page_dirty(node_page);
1734  
1735  		if (!clear_page_dirty_for_io(node_page)) {
1736  			err = -EAGAIN;
1737  			goto out_page;
1738  		}
1739  
1740  		if (__write_node_page(node_page, false, NULL,
1741  					&wbc, false, FS_GC_NODE_IO, NULL)) {
1742  			err = -EAGAIN;
1743  			unlock_page(node_page);
1744  		}
1745  		goto release_page;
1746  	} else {
1747  		/* set page dirty and write it */
1748  		if (!folio_test_writeback(page_folio(node_page)))
1749  			set_page_dirty(node_page);
1750  	}
1751  out_page:
1752  	unlock_page(node_page);
1753  release_page:
1754  	f2fs_put_page(node_page, 0);
1755  	return err;
1756  }
1757  
f2fs_write_node_page(struct page * page,struct writeback_control * wbc)1758  static int f2fs_write_node_page(struct page *page,
1759  				struct writeback_control *wbc)
1760  {
1761  	return __write_node_page(page, false, NULL, wbc, false,
1762  						FS_NODE_IO, NULL);
1763  }
1764  
f2fs_fsync_node_pages(struct f2fs_sb_info * sbi,struct inode * inode,struct writeback_control * wbc,bool atomic,unsigned int * seq_id)1765  int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
1766  			struct writeback_control *wbc, bool atomic,
1767  			unsigned int *seq_id)
1768  {
1769  	pgoff_t index;
1770  	struct folio_batch fbatch;
1771  	int ret = 0;
1772  	struct page *last_page = NULL;
1773  	bool marked = false;
1774  	nid_t ino = inode->i_ino;
1775  	int nr_folios;
1776  	int nwritten = 0;
1777  
1778  	if (atomic) {
1779  		last_page = last_fsync_dnode(sbi, ino);
1780  		if (IS_ERR_OR_NULL(last_page))
1781  			return PTR_ERR_OR_ZERO(last_page);
1782  	}
1783  retry:
1784  	folio_batch_init(&fbatch);
1785  	index = 0;
1786  
1787  	while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
1788  					(pgoff_t)-1, PAGECACHE_TAG_DIRTY,
1789  					&fbatch))) {
1790  		int i;
1791  
1792  		for (i = 0; i < nr_folios; i++) {
1793  			struct page *page = &fbatch.folios[i]->page;
1794  			bool submitted = false;
1795  
1796  			if (unlikely(f2fs_cp_error(sbi))) {
1797  				f2fs_put_page(last_page, 0);
1798  				folio_batch_release(&fbatch);
1799  				ret = -EIO;
1800  				goto out;
1801  			}
1802  
1803  			if (!IS_DNODE(page) || !is_cold_node(page))
1804  				continue;
1805  			if (ino_of_node(page) != ino)
1806  				continue;
1807  
1808  			lock_page(page);
1809  
1810  			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1811  continue_unlock:
1812  				unlock_page(page);
1813  				continue;
1814  			}
1815  			if (ino_of_node(page) != ino)
1816  				goto continue_unlock;
1817  
1818  			if (!PageDirty(page) && page != last_page) {
1819  				/* someone wrote it for us */
1820  				goto continue_unlock;
1821  			}
1822  
1823  			f2fs_wait_on_page_writeback(page, NODE, true, true);
1824  
1825  			set_fsync_mark(page, 0);
1826  			set_dentry_mark(page, 0);
1827  
1828  			if (!atomic || page == last_page) {
1829  				set_fsync_mark(page, 1);
1830  				percpu_counter_inc(&sbi->rf_node_block_count);
1831  				if (IS_INODE(page)) {
1832  					if (is_inode_flag_set(inode,
1833  								FI_DIRTY_INODE))
1834  						f2fs_update_inode(inode, page);
1835  					set_dentry_mark(page,
1836  						f2fs_need_dentry_mark(sbi, ino));
1837  				}
1838  				/* may be written by other thread */
1839  				if (!PageDirty(page))
1840  					set_page_dirty(page);
1841  			}
1842  
1843  			if (!clear_page_dirty_for_io(page))
1844  				goto continue_unlock;
1845  
1846  			ret = __write_node_page(page, atomic &&
1847  						page == last_page,
1848  						&submitted, wbc, true,
1849  						FS_NODE_IO, seq_id);
1850  			if (ret) {
1851  				unlock_page(page);
1852  				f2fs_put_page(last_page, 0);
1853  				break;
1854  			} else if (submitted) {
1855  				nwritten++;
1856  			}
1857  
1858  			if (page == last_page) {
1859  				f2fs_put_page(page, 0);
1860  				marked = true;
1861  				break;
1862  			}
1863  		}
1864  		folio_batch_release(&fbatch);
1865  		cond_resched();
1866  
1867  		if (ret || marked)
1868  			break;
1869  	}
1870  	if (!ret && atomic && !marked) {
1871  		f2fs_debug(sbi, "Retry to write fsync mark: ino=%u, idx=%lx",
1872  			   ino, page_folio(last_page)->index);
1873  		lock_page(last_page);
1874  		f2fs_wait_on_page_writeback(last_page, NODE, true, true);
1875  		set_page_dirty(last_page);
1876  		unlock_page(last_page);
1877  		goto retry;
1878  	}
1879  out:
1880  	if (nwritten)
1881  		f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
1882  	return ret ? -EIO : 0;
1883  }
1884  
f2fs_match_ino(struct inode * inode,unsigned long ino,void * data)1885  static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
1886  {
1887  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1888  	bool clean;
1889  
1890  	if (inode->i_ino != ino)
1891  		return 0;
1892  
1893  	if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
1894  		return 0;
1895  
1896  	spin_lock(&sbi->inode_lock[DIRTY_META]);
1897  	clean = list_empty(&F2FS_I(inode)->gdirty_list);
1898  	spin_unlock(&sbi->inode_lock[DIRTY_META]);
1899  
1900  	if (clean)
1901  		return 0;
1902  
1903  	inode = igrab(inode);
1904  	if (!inode)
1905  		return 0;
1906  	return 1;
1907  }
1908  
flush_dirty_inode(struct page * page)1909  static bool flush_dirty_inode(struct page *page)
1910  {
1911  	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1912  	struct inode *inode;
1913  	nid_t ino = ino_of_node(page);
1914  
1915  	inode = find_inode_nowait(sbi->sb, ino, f2fs_match_ino, NULL);
1916  	if (!inode)
1917  		return false;
1918  
1919  	f2fs_update_inode(inode, page);
1920  	unlock_page(page);
1921  
1922  	iput(inode);
1923  	return true;
1924  }
1925  
f2fs_flush_inline_data(struct f2fs_sb_info * sbi)1926  void f2fs_flush_inline_data(struct f2fs_sb_info *sbi)
1927  {
1928  	pgoff_t index = 0;
1929  	struct folio_batch fbatch;
1930  	int nr_folios;
1931  
1932  	folio_batch_init(&fbatch);
1933  
1934  	while ((nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi), &index,
1935  					(pgoff_t)-1, PAGECACHE_TAG_DIRTY,
1936  					&fbatch))) {
1937  		int i;
1938  
1939  		for (i = 0; i < nr_folios; i++) {
1940  			struct page *page = &fbatch.folios[i]->page;
1941  
1942  			if (!IS_INODE(page))
1943  				continue;
1944  
1945  			lock_page(page);
1946  
1947  			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1948  continue_unlock:
1949  				unlock_page(page);
1950  				continue;
1951  			}
1952  
1953  			if (!PageDirty(page)) {
1954  				/* someone wrote it for us */
1955  				goto continue_unlock;
1956  			}
1957  
1958  			/* flush inline_data, if it's async context. */
1959  			if (page_private_inline(page)) {
1960  				clear_page_private_inline(page);
1961  				unlock_page(page);
1962  				flush_inline_data(sbi, ino_of_node(page));
1963  				continue;
1964  			}
1965  			unlock_page(page);
1966  		}
1967  		folio_batch_release(&fbatch);
1968  		cond_resched();
1969  	}
1970  }
1971  
f2fs_sync_node_pages(struct f2fs_sb_info * sbi,struct writeback_control * wbc,bool do_balance,enum iostat_type io_type)1972  int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
1973  				struct writeback_control *wbc,
1974  				bool do_balance, enum iostat_type io_type)
1975  {
1976  	pgoff_t index;
1977  	struct folio_batch fbatch;
1978  	int step = 0;
1979  	int nwritten = 0;
1980  	int ret = 0;
1981  	int nr_folios, done = 0;
1982  
1983  	folio_batch_init(&fbatch);
1984  
1985  next_step:
1986  	index = 0;
1987  
1988  	while (!done && (nr_folios = filemap_get_folios_tag(NODE_MAPPING(sbi),
1989  				&index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY,
1990  				&fbatch))) {
1991  		int i;
1992  
1993  		for (i = 0; i < nr_folios; i++) {
1994  			struct page *page = &fbatch.folios[i]->page;
1995  			bool submitted = false;
1996  
1997  			/* give a priority to WB_SYNC threads */
1998  			if (atomic_read(&sbi->wb_sync_req[NODE]) &&
1999  					wbc->sync_mode == WB_SYNC_NONE) {
2000  				done = 1;
2001  				break;
2002  			}
2003  
2004  			/*
2005  			 * flushing sequence with step:
2006  			 * 0. indirect nodes
2007  			 * 1. dentry dnodes
2008  			 * 2. file dnodes
2009  			 */
2010  			if (step == 0 && IS_DNODE(page))
2011  				continue;
2012  			if (step == 1 && (!IS_DNODE(page) ||
2013  						is_cold_node(page)))
2014  				continue;
2015  			if (step == 2 && (!IS_DNODE(page) ||
2016  						!is_cold_node(page)))
2017  				continue;
2018  lock_node:
2019  			if (wbc->sync_mode == WB_SYNC_ALL)
2020  				lock_page(page);
2021  			else if (!trylock_page(page))
2022  				continue;
2023  
2024  			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
2025  continue_unlock:
2026  				unlock_page(page);
2027  				continue;
2028  			}
2029  
2030  			if (!PageDirty(page)) {
2031  				/* someone wrote it for us */
2032  				goto continue_unlock;
2033  			}
2034  
2035  			/* flush inline_data/inode, if it's async context. */
2036  			if (!do_balance)
2037  				goto write_node;
2038  
2039  			/* flush inline_data */
2040  			if (page_private_inline(page)) {
2041  				clear_page_private_inline(page);
2042  				unlock_page(page);
2043  				flush_inline_data(sbi, ino_of_node(page));
2044  				goto lock_node;
2045  			}
2046  
2047  			/* flush dirty inode */
2048  			if (IS_INODE(page) && flush_dirty_inode(page))
2049  				goto lock_node;
2050  write_node:
2051  			f2fs_wait_on_page_writeback(page, NODE, true, true);
2052  
2053  			if (!clear_page_dirty_for_io(page))
2054  				goto continue_unlock;
2055  
2056  			set_fsync_mark(page, 0);
2057  			set_dentry_mark(page, 0);
2058  
2059  			ret = __write_node_page(page, false, &submitted,
2060  						wbc, do_balance, io_type, NULL);
2061  			if (ret)
2062  				unlock_page(page);
2063  			else if (submitted)
2064  				nwritten++;
2065  
2066  			if (--wbc->nr_to_write == 0)
2067  				break;
2068  		}
2069  		folio_batch_release(&fbatch);
2070  		cond_resched();
2071  
2072  		if (wbc->nr_to_write == 0) {
2073  			step = 2;
2074  			break;
2075  		}
2076  	}
2077  
2078  	if (step < 2) {
2079  		if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2080  				wbc->sync_mode == WB_SYNC_NONE && step == 1)
2081  			goto out;
2082  		step++;
2083  		goto next_step;
2084  	}
2085  out:
2086  	if (nwritten)
2087  		f2fs_submit_merged_write(sbi, NODE);
2088  
2089  	if (unlikely(f2fs_cp_error(sbi)))
2090  		return -EIO;
2091  	return ret;
2092  }
2093  
f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info * sbi,unsigned int seq_id)2094  int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
2095  						unsigned int seq_id)
2096  {
2097  	struct fsync_node_entry *fn;
2098  	struct page *page;
2099  	struct list_head *head = &sbi->fsync_node_list;
2100  	unsigned long flags;
2101  	unsigned int cur_seq_id = 0;
2102  
2103  	while (seq_id && cur_seq_id < seq_id) {
2104  		spin_lock_irqsave(&sbi->fsync_node_lock, flags);
2105  		if (list_empty(head)) {
2106  			spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
2107  			break;
2108  		}
2109  		fn = list_first_entry(head, struct fsync_node_entry, list);
2110  		if (fn->seq_id > seq_id) {
2111  			spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
2112  			break;
2113  		}
2114  		cur_seq_id = fn->seq_id;
2115  		page = fn->page;
2116  		get_page(page);
2117  		spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
2118  
2119  		f2fs_wait_on_page_writeback(page, NODE, true, false);
2120  
2121  		put_page(page);
2122  	}
2123  
2124  	return filemap_check_errors(NODE_MAPPING(sbi));
2125  }
2126  
f2fs_write_node_pages(struct address_space * mapping,struct writeback_control * wbc)2127  static int f2fs_write_node_pages(struct address_space *mapping,
2128  			    struct writeback_control *wbc)
2129  {
2130  	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2131  	struct blk_plug plug;
2132  	long diff;
2133  
2134  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2135  		goto skip_write;
2136  
2137  	/* balancing f2fs's metadata in background */
2138  	f2fs_balance_fs_bg(sbi, true);
2139  
2140  	/* collect a number of dirty node pages and write together */
2141  	if (wbc->sync_mode != WB_SYNC_ALL &&
2142  			get_pages(sbi, F2FS_DIRTY_NODES) <
2143  					nr_pages_to_skip(sbi, NODE))
2144  		goto skip_write;
2145  
2146  	if (wbc->sync_mode == WB_SYNC_ALL)
2147  		atomic_inc(&sbi->wb_sync_req[NODE]);
2148  	else if (atomic_read(&sbi->wb_sync_req[NODE])) {
2149  		/* to avoid potential deadlock */
2150  		if (current->plug)
2151  			blk_finish_plug(current->plug);
2152  		goto skip_write;
2153  	}
2154  
2155  	trace_f2fs_writepages(mapping->host, wbc, NODE);
2156  
2157  	diff = nr_pages_to_write(sbi, NODE, wbc);
2158  	blk_start_plug(&plug);
2159  	f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO);
2160  	blk_finish_plug(&plug);
2161  	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
2162  
2163  	if (wbc->sync_mode == WB_SYNC_ALL)
2164  		atomic_dec(&sbi->wb_sync_req[NODE]);
2165  	return 0;
2166  
2167  skip_write:
2168  	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
2169  	trace_f2fs_writepages(mapping->host, wbc, NODE);
2170  	return 0;
2171  }
2172  
f2fs_dirty_node_folio(struct address_space * mapping,struct folio * folio)2173  static bool f2fs_dirty_node_folio(struct address_space *mapping,
2174  		struct folio *folio)
2175  {
2176  	trace_f2fs_set_page_dirty(folio, NODE);
2177  
2178  	if (!folio_test_uptodate(folio))
2179  		folio_mark_uptodate(folio);
2180  #ifdef CONFIG_F2FS_CHECK_FS
2181  	if (IS_INODE(&folio->page))
2182  		f2fs_inode_chksum_set(F2FS_M_SB(mapping), &folio->page);
2183  #endif
2184  	if (filemap_dirty_folio(mapping, folio)) {
2185  		inc_page_count(F2FS_M_SB(mapping), F2FS_DIRTY_NODES);
2186  		set_page_private_reference(&folio->page);
2187  		return true;
2188  	}
2189  	return false;
2190  }
2191  
2192  /*
2193   * Structure of the f2fs node operations
2194   */
2195  const struct address_space_operations f2fs_node_aops = {
2196  	.writepage	= f2fs_write_node_page,
2197  	.writepages	= f2fs_write_node_pages,
2198  	.dirty_folio	= f2fs_dirty_node_folio,
2199  	.invalidate_folio = f2fs_invalidate_folio,
2200  	.release_folio	= f2fs_release_folio,
2201  	.migrate_folio	= filemap_migrate_folio,
2202  };
2203  
__lookup_free_nid_list(struct f2fs_nm_info * nm_i,nid_t n)2204  static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
2205  						nid_t n)
2206  {
2207  	return radix_tree_lookup(&nm_i->free_nid_root, n);
2208  }
2209  
__insert_free_nid(struct f2fs_sb_info * sbi,struct free_nid * i)2210  static int __insert_free_nid(struct f2fs_sb_info *sbi,
2211  				struct free_nid *i)
2212  {
2213  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2214  	int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
2215  
2216  	if (err)
2217  		return err;
2218  
2219  	nm_i->nid_cnt[FREE_NID]++;
2220  	list_add_tail(&i->list, &nm_i->free_nid_list);
2221  	return 0;
2222  }
2223  
__remove_free_nid(struct f2fs_sb_info * sbi,struct free_nid * i,enum nid_state state)2224  static void __remove_free_nid(struct f2fs_sb_info *sbi,
2225  			struct free_nid *i, enum nid_state state)
2226  {
2227  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2228  
2229  	f2fs_bug_on(sbi, state != i->state);
2230  	nm_i->nid_cnt[state]--;
2231  	if (state == FREE_NID)
2232  		list_del(&i->list);
2233  	radix_tree_delete(&nm_i->free_nid_root, i->nid);
2234  }
2235  
__move_free_nid(struct f2fs_sb_info * sbi,struct free_nid * i,enum nid_state org_state,enum nid_state dst_state)2236  static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
2237  			enum nid_state org_state, enum nid_state dst_state)
2238  {
2239  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2240  
2241  	f2fs_bug_on(sbi, org_state != i->state);
2242  	i->state = dst_state;
2243  	nm_i->nid_cnt[org_state]--;
2244  	nm_i->nid_cnt[dst_state]++;
2245  
2246  	switch (dst_state) {
2247  	case PREALLOC_NID:
2248  		list_del(&i->list);
2249  		break;
2250  	case FREE_NID:
2251  		list_add_tail(&i->list, &nm_i->free_nid_list);
2252  		break;
2253  	default:
2254  		BUG_ON(1);
2255  	}
2256  }
2257  
f2fs_nat_bitmap_enabled(struct f2fs_sb_info * sbi)2258  bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi)
2259  {
2260  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2261  	unsigned int i;
2262  	bool ret = true;
2263  
2264  	f2fs_down_read(&nm_i->nat_tree_lock);
2265  	for (i = 0; i < nm_i->nat_blocks; i++) {
2266  		if (!test_bit_le(i, nm_i->nat_block_bitmap)) {
2267  			ret = false;
2268  			break;
2269  		}
2270  	}
2271  	f2fs_up_read(&nm_i->nat_tree_lock);
2272  
2273  	return ret;
2274  }
2275  
update_free_nid_bitmap(struct f2fs_sb_info * sbi,nid_t nid,bool set,bool build)2276  static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
2277  							bool set, bool build)
2278  {
2279  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2280  	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
2281  	unsigned int nid_ofs = nid - START_NID(nid);
2282  
2283  	if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
2284  		return;
2285  
2286  	if (set) {
2287  		if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
2288  			return;
2289  		__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
2290  		nm_i->free_nid_count[nat_ofs]++;
2291  	} else {
2292  		if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
2293  			return;
2294  		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
2295  		if (!build)
2296  			nm_i->free_nid_count[nat_ofs]--;
2297  	}
2298  }
2299  
2300  /* return if the nid is recognized as free */
add_free_nid(struct f2fs_sb_info * sbi,nid_t nid,bool build,bool update)2301  static bool add_free_nid(struct f2fs_sb_info *sbi,
2302  				nid_t nid, bool build, bool update)
2303  {
2304  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2305  	struct free_nid *i, *e;
2306  	struct nat_entry *ne;
2307  	int err = -EINVAL;
2308  	bool ret = false;
2309  
2310  	/* 0 nid should not be used */
2311  	if (unlikely(nid == 0))
2312  		return false;
2313  
2314  	if (unlikely(f2fs_check_nid_range(sbi, nid)))
2315  		return false;
2316  
2317  	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS, true, NULL);
2318  	i->nid = nid;
2319  	i->state = FREE_NID;
2320  
2321  	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
2322  
2323  	spin_lock(&nm_i->nid_list_lock);
2324  
2325  	if (build) {
2326  		/*
2327  		 *   Thread A             Thread B
2328  		 *  - f2fs_create
2329  		 *   - f2fs_new_inode
2330  		 *    - f2fs_alloc_nid
2331  		 *     - __insert_nid_to_list(PREALLOC_NID)
2332  		 *                     - f2fs_balance_fs_bg
2333  		 *                      - f2fs_build_free_nids
2334  		 *                       - __f2fs_build_free_nids
2335  		 *                        - scan_nat_page
2336  		 *                         - add_free_nid
2337  		 *                          - __lookup_nat_cache
2338  		 *  - f2fs_add_link
2339  		 *   - f2fs_init_inode_metadata
2340  		 *    - f2fs_new_inode_page
2341  		 *     - f2fs_new_node_page
2342  		 *      - set_node_addr
2343  		 *  - f2fs_alloc_nid_done
2344  		 *   - __remove_nid_from_list(PREALLOC_NID)
2345  		 *                         - __insert_nid_to_list(FREE_NID)
2346  		 */
2347  		ne = __lookup_nat_cache(nm_i, nid);
2348  		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
2349  				nat_get_blkaddr(ne) != NULL_ADDR))
2350  			goto err_out;
2351  
2352  		e = __lookup_free_nid_list(nm_i, nid);
2353  		if (e) {
2354  			if (e->state == FREE_NID)
2355  				ret = true;
2356  			goto err_out;
2357  		}
2358  	}
2359  	ret = true;
2360  	err = __insert_free_nid(sbi, i);
2361  err_out:
2362  	if (update) {
2363  		update_free_nid_bitmap(sbi, nid, ret, build);
2364  		if (!build)
2365  			nm_i->available_nids++;
2366  	}
2367  	spin_unlock(&nm_i->nid_list_lock);
2368  	radix_tree_preload_end();
2369  
2370  	if (err)
2371  		kmem_cache_free(free_nid_slab, i);
2372  	return ret;
2373  }
2374  
remove_free_nid(struct f2fs_sb_info * sbi,nid_t nid)2375  static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
2376  {
2377  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2378  	struct free_nid *i;
2379  	bool need_free = false;
2380  
2381  	spin_lock(&nm_i->nid_list_lock);
2382  	i = __lookup_free_nid_list(nm_i, nid);
2383  	if (i && i->state == FREE_NID) {
2384  		__remove_free_nid(sbi, i, FREE_NID);
2385  		need_free = true;
2386  	}
2387  	spin_unlock(&nm_i->nid_list_lock);
2388  
2389  	if (need_free)
2390  		kmem_cache_free(free_nid_slab, i);
2391  }
2392  
scan_nat_page(struct f2fs_sb_info * sbi,struct page * nat_page,nid_t start_nid)2393  static int scan_nat_page(struct f2fs_sb_info *sbi,
2394  			struct page *nat_page, nid_t start_nid)
2395  {
2396  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2397  	struct f2fs_nat_block *nat_blk = page_address(nat_page);
2398  	block_t blk_addr;
2399  	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
2400  	int i;
2401  
2402  	__set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
2403  
2404  	i = start_nid % NAT_ENTRY_PER_BLOCK;
2405  
2406  	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
2407  		if (unlikely(start_nid >= nm_i->max_nid))
2408  			break;
2409  
2410  		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
2411  
2412  		if (blk_addr == NEW_ADDR)
2413  			return -EFSCORRUPTED;
2414  
2415  		if (blk_addr == NULL_ADDR) {
2416  			add_free_nid(sbi, start_nid, true, true);
2417  		} else {
2418  			spin_lock(&NM_I(sbi)->nid_list_lock);
2419  			update_free_nid_bitmap(sbi, start_nid, false, true);
2420  			spin_unlock(&NM_I(sbi)->nid_list_lock);
2421  		}
2422  	}
2423  
2424  	return 0;
2425  }
2426  
scan_curseg_cache(struct f2fs_sb_info * sbi)2427  static void scan_curseg_cache(struct f2fs_sb_info *sbi)
2428  {
2429  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2430  	struct f2fs_journal *journal = curseg->journal;
2431  	int i;
2432  
2433  	down_read(&curseg->journal_rwsem);
2434  	for (i = 0; i < nats_in_cursum(journal); i++) {
2435  		block_t addr;
2436  		nid_t nid;
2437  
2438  		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
2439  		nid = le32_to_cpu(nid_in_journal(journal, i));
2440  		if (addr == NULL_ADDR)
2441  			add_free_nid(sbi, nid, true, false);
2442  		else
2443  			remove_free_nid(sbi, nid);
2444  	}
2445  	up_read(&curseg->journal_rwsem);
2446  }
2447  
scan_free_nid_bits(struct f2fs_sb_info * sbi)2448  static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
2449  {
2450  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2451  	unsigned int i, idx;
2452  	nid_t nid;
2453  
2454  	f2fs_down_read(&nm_i->nat_tree_lock);
2455  
2456  	for (i = 0; i < nm_i->nat_blocks; i++) {
2457  		if (!test_bit_le(i, nm_i->nat_block_bitmap))
2458  			continue;
2459  		if (!nm_i->free_nid_count[i])
2460  			continue;
2461  		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
2462  			idx = find_next_bit_le(nm_i->free_nid_bitmap[i],
2463  						NAT_ENTRY_PER_BLOCK, idx);
2464  			if (idx >= NAT_ENTRY_PER_BLOCK)
2465  				break;
2466  
2467  			nid = i * NAT_ENTRY_PER_BLOCK + idx;
2468  			add_free_nid(sbi, nid, true, false);
2469  
2470  			if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
2471  				goto out;
2472  		}
2473  	}
2474  out:
2475  	scan_curseg_cache(sbi);
2476  
2477  	f2fs_up_read(&nm_i->nat_tree_lock);
2478  }
2479  
__f2fs_build_free_nids(struct f2fs_sb_info * sbi,bool sync,bool mount)2480  static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
2481  						bool sync, bool mount)
2482  {
2483  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2484  	int i = 0, ret;
2485  	nid_t nid = nm_i->next_scan_nid;
2486  
2487  	if (unlikely(nid >= nm_i->max_nid))
2488  		nid = 0;
2489  
2490  	if (unlikely(nid % NAT_ENTRY_PER_BLOCK))
2491  		nid = NAT_BLOCK_OFFSET(nid) * NAT_ENTRY_PER_BLOCK;
2492  
2493  	/* Enough entries */
2494  	if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
2495  		return 0;
2496  
2497  	if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
2498  		return 0;
2499  
2500  	if (!mount) {
2501  		/* try to find free nids in free_nid_bitmap */
2502  		scan_free_nid_bits(sbi);
2503  
2504  		if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
2505  			return 0;
2506  	}
2507  
2508  	/* readahead nat pages to be scanned */
2509  	f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
2510  							META_NAT, true);
2511  
2512  	f2fs_down_read(&nm_i->nat_tree_lock);
2513  
2514  	while (1) {
2515  		if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
2516  						nm_i->nat_block_bitmap)) {
2517  			struct page *page = get_current_nat_page(sbi, nid);
2518  
2519  			if (IS_ERR(page)) {
2520  				ret = PTR_ERR(page);
2521  			} else {
2522  				ret = scan_nat_page(sbi, page, nid);
2523  				f2fs_put_page(page, 1);
2524  			}
2525  
2526  			if (ret) {
2527  				f2fs_up_read(&nm_i->nat_tree_lock);
2528  
2529  				if (ret == -EFSCORRUPTED) {
2530  					f2fs_err(sbi, "NAT is corrupt, run fsck to fix it");
2531  					set_sbi_flag(sbi, SBI_NEED_FSCK);
2532  					f2fs_handle_error(sbi,
2533  						ERROR_INCONSISTENT_NAT);
2534  				}
2535  
2536  				return ret;
2537  			}
2538  		}
2539  
2540  		nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
2541  		if (unlikely(nid >= nm_i->max_nid))
2542  			nid = 0;
2543  
2544  		if (++i >= FREE_NID_PAGES)
2545  			break;
2546  	}
2547  
2548  	/* go to the next free nat pages to find free nids abundantly */
2549  	nm_i->next_scan_nid = nid;
2550  
2551  	/* find free nids from current sum_pages */
2552  	scan_curseg_cache(sbi);
2553  
2554  	f2fs_up_read(&nm_i->nat_tree_lock);
2555  
2556  	f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
2557  					nm_i->ra_nid_pages, META_NAT, false);
2558  
2559  	return 0;
2560  }
2561  
f2fs_build_free_nids(struct f2fs_sb_info * sbi,bool sync,bool mount)2562  int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
2563  {
2564  	int ret;
2565  
2566  	mutex_lock(&NM_I(sbi)->build_lock);
2567  	ret = __f2fs_build_free_nids(sbi, sync, mount);
2568  	mutex_unlock(&NM_I(sbi)->build_lock);
2569  
2570  	return ret;
2571  }
2572  
2573  /*
2574   * If this function returns success, caller can obtain a new nid
2575   * from second parameter of this function.
2576   * The returned nid could be used ino as well as nid when inode is created.
2577   */
f2fs_alloc_nid(struct f2fs_sb_info * sbi,nid_t * nid)2578  bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
2579  {
2580  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2581  	struct free_nid *i = NULL;
2582  retry:
2583  	if (time_to_inject(sbi, FAULT_ALLOC_NID))
2584  		return false;
2585  
2586  	spin_lock(&nm_i->nid_list_lock);
2587  
2588  	if (unlikely(nm_i->available_nids == 0)) {
2589  		spin_unlock(&nm_i->nid_list_lock);
2590  		return false;
2591  	}
2592  
2593  	/* We should not use stale free nids created by f2fs_build_free_nids */
2594  	if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) {
2595  		f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
2596  		i = list_first_entry(&nm_i->free_nid_list,
2597  					struct free_nid, list);
2598  		*nid = i->nid;
2599  
2600  		__move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
2601  		nm_i->available_nids--;
2602  
2603  		update_free_nid_bitmap(sbi, *nid, false, false);
2604  
2605  		spin_unlock(&nm_i->nid_list_lock);
2606  		return true;
2607  	}
2608  	spin_unlock(&nm_i->nid_list_lock);
2609  
2610  	/* Let's scan nat pages and its caches to get free nids */
2611  	if (!f2fs_build_free_nids(sbi, true, false))
2612  		goto retry;
2613  	return false;
2614  }
2615  
2616  /*
2617   * f2fs_alloc_nid() should be called prior to this function.
2618   */
f2fs_alloc_nid_done(struct f2fs_sb_info * sbi,nid_t nid)2619  void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
2620  {
2621  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2622  	struct free_nid *i;
2623  
2624  	spin_lock(&nm_i->nid_list_lock);
2625  	i = __lookup_free_nid_list(nm_i, nid);
2626  	f2fs_bug_on(sbi, !i);
2627  	__remove_free_nid(sbi, i, PREALLOC_NID);
2628  	spin_unlock(&nm_i->nid_list_lock);
2629  
2630  	kmem_cache_free(free_nid_slab, i);
2631  }
2632  
2633  /*
2634   * f2fs_alloc_nid() should be called prior to this function.
2635   */
f2fs_alloc_nid_failed(struct f2fs_sb_info * sbi,nid_t nid)2636  void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
2637  {
2638  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2639  	struct free_nid *i;
2640  	bool need_free = false;
2641  
2642  	if (!nid)
2643  		return;
2644  
2645  	spin_lock(&nm_i->nid_list_lock);
2646  	i = __lookup_free_nid_list(nm_i, nid);
2647  	f2fs_bug_on(sbi, !i);
2648  
2649  	if (!f2fs_available_free_memory(sbi, FREE_NIDS)) {
2650  		__remove_free_nid(sbi, i, PREALLOC_NID);
2651  		need_free = true;
2652  	} else {
2653  		__move_free_nid(sbi, i, PREALLOC_NID, FREE_NID);
2654  	}
2655  
2656  	nm_i->available_nids++;
2657  
2658  	update_free_nid_bitmap(sbi, nid, true, false);
2659  
2660  	spin_unlock(&nm_i->nid_list_lock);
2661  
2662  	if (need_free)
2663  		kmem_cache_free(free_nid_slab, i);
2664  }
2665  
f2fs_try_to_free_nids(struct f2fs_sb_info * sbi,int nr_shrink)2666  int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
2667  {
2668  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2669  	int nr = nr_shrink;
2670  
2671  	if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
2672  		return 0;
2673  
2674  	if (!mutex_trylock(&nm_i->build_lock))
2675  		return 0;
2676  
2677  	while (nr_shrink && nm_i->nid_cnt[FREE_NID] > MAX_FREE_NIDS) {
2678  		struct free_nid *i, *next;
2679  		unsigned int batch = SHRINK_NID_BATCH_SIZE;
2680  
2681  		spin_lock(&nm_i->nid_list_lock);
2682  		list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
2683  			if (!nr_shrink || !batch ||
2684  				nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
2685  				break;
2686  			__remove_free_nid(sbi, i, FREE_NID);
2687  			kmem_cache_free(free_nid_slab, i);
2688  			nr_shrink--;
2689  			batch--;
2690  		}
2691  		spin_unlock(&nm_i->nid_list_lock);
2692  	}
2693  
2694  	mutex_unlock(&nm_i->build_lock);
2695  
2696  	return nr - nr_shrink;
2697  }
2698  
f2fs_recover_inline_xattr(struct inode * inode,struct page * page)2699  int f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
2700  {
2701  	void *src_addr, *dst_addr;
2702  	size_t inline_size;
2703  	struct page *ipage;
2704  	struct f2fs_inode *ri;
2705  
2706  	ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
2707  	if (IS_ERR(ipage))
2708  		return PTR_ERR(ipage);
2709  
2710  	ri = F2FS_INODE(page);
2711  	if (ri->i_inline & F2FS_INLINE_XATTR) {
2712  		if (!f2fs_has_inline_xattr(inode)) {
2713  			set_inode_flag(inode, FI_INLINE_XATTR);
2714  			stat_inc_inline_xattr(inode);
2715  		}
2716  	} else {
2717  		if (f2fs_has_inline_xattr(inode)) {
2718  			stat_dec_inline_xattr(inode);
2719  			clear_inode_flag(inode, FI_INLINE_XATTR);
2720  		}
2721  		goto update_inode;
2722  	}
2723  
2724  	dst_addr = inline_xattr_addr(inode, ipage);
2725  	src_addr = inline_xattr_addr(inode, page);
2726  	inline_size = inline_xattr_size(inode);
2727  
2728  	f2fs_wait_on_page_writeback(ipage, NODE, true, true);
2729  	memcpy(dst_addr, src_addr, inline_size);
2730  update_inode:
2731  	f2fs_update_inode(inode, ipage);
2732  	f2fs_put_page(ipage, 1);
2733  	return 0;
2734  }
2735  
f2fs_recover_xattr_data(struct inode * inode,struct page * page)2736  int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
2737  {
2738  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2739  	nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
2740  	nid_t new_xnid;
2741  	struct dnode_of_data dn;
2742  	struct node_info ni;
2743  	struct page *xpage;
2744  	int err;
2745  
2746  	if (!prev_xnid)
2747  		goto recover_xnid;
2748  
2749  	/* 1: invalidate the previous xattr nid */
2750  	err = f2fs_get_node_info(sbi, prev_xnid, &ni, false);
2751  	if (err)
2752  		return err;
2753  
2754  	f2fs_invalidate_blocks(sbi, ni.blk_addr);
2755  	dec_valid_node_count(sbi, inode, false);
2756  	set_node_addr(sbi, &ni, NULL_ADDR, false);
2757  
2758  recover_xnid:
2759  	/* 2: update xattr nid in inode */
2760  	if (!f2fs_alloc_nid(sbi, &new_xnid))
2761  		return -ENOSPC;
2762  
2763  	set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
2764  	xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
2765  	if (IS_ERR(xpage)) {
2766  		f2fs_alloc_nid_failed(sbi, new_xnid);
2767  		return PTR_ERR(xpage);
2768  	}
2769  
2770  	f2fs_alloc_nid_done(sbi, new_xnid);
2771  	f2fs_update_inode_page(inode);
2772  
2773  	/* 3: update and set xattr node page dirty */
2774  	if (page) {
2775  		memcpy(F2FS_NODE(xpage), F2FS_NODE(page),
2776  				VALID_XATTR_BLOCK_SIZE);
2777  		set_page_dirty(xpage);
2778  	}
2779  	f2fs_put_page(xpage, 1);
2780  
2781  	return 0;
2782  }
2783  
f2fs_recover_inode_page(struct f2fs_sb_info * sbi,struct page * page)2784  int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
2785  {
2786  	struct f2fs_inode *src, *dst;
2787  	nid_t ino = ino_of_node(page);
2788  	struct node_info old_ni, new_ni;
2789  	struct page *ipage;
2790  	int err;
2791  
2792  	err = f2fs_get_node_info(sbi, ino, &old_ni, false);
2793  	if (err)
2794  		return err;
2795  
2796  	if (unlikely(old_ni.blk_addr != NULL_ADDR))
2797  		return -EINVAL;
2798  retry:
2799  	ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
2800  	if (!ipage) {
2801  		memalloc_retry_wait(GFP_NOFS);
2802  		goto retry;
2803  	}
2804  
2805  	/* Should not use this inode from free nid list */
2806  	remove_free_nid(sbi, ino);
2807  
2808  	if (!PageUptodate(ipage))
2809  		SetPageUptodate(ipage);
2810  	fill_node_footer(ipage, ino, ino, 0, true);
2811  	set_cold_node(ipage, false);
2812  
2813  	src = F2FS_INODE(page);
2814  	dst = F2FS_INODE(ipage);
2815  
2816  	memcpy(dst, src, offsetof(struct f2fs_inode, i_ext));
2817  	dst->i_size = 0;
2818  	dst->i_blocks = cpu_to_le64(1);
2819  	dst->i_links = cpu_to_le32(1);
2820  	dst->i_xattr_nid = 0;
2821  	dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
2822  	if (dst->i_inline & F2FS_EXTRA_ATTR) {
2823  		dst->i_extra_isize = src->i_extra_isize;
2824  
2825  		if (f2fs_sb_has_flexible_inline_xattr(sbi) &&
2826  			F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2827  							i_inline_xattr_size))
2828  			dst->i_inline_xattr_size = src->i_inline_xattr_size;
2829  
2830  		if (f2fs_sb_has_project_quota(sbi) &&
2831  			F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2832  								i_projid))
2833  			dst->i_projid = src->i_projid;
2834  
2835  		if (f2fs_sb_has_inode_crtime(sbi) &&
2836  			F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
2837  							i_crtime_nsec)) {
2838  			dst->i_crtime = src->i_crtime;
2839  			dst->i_crtime_nsec = src->i_crtime_nsec;
2840  		}
2841  	}
2842  
2843  	new_ni = old_ni;
2844  	new_ni.ino = ino;
2845  
2846  	if (unlikely(inc_valid_node_count(sbi, NULL, true)))
2847  		WARN_ON(1);
2848  	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
2849  	inc_valid_inode_count(sbi);
2850  	set_page_dirty(ipage);
2851  	f2fs_put_page(ipage, 1);
2852  	return 0;
2853  }
2854  
f2fs_restore_node_summary(struct f2fs_sb_info * sbi,unsigned int segno,struct f2fs_summary_block * sum)2855  int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
2856  			unsigned int segno, struct f2fs_summary_block *sum)
2857  {
2858  	struct f2fs_node *rn;
2859  	struct f2fs_summary *sum_entry;
2860  	block_t addr;
2861  	int i, idx, last_offset, nrpages;
2862  
2863  	/* scan the node segment */
2864  	last_offset = BLKS_PER_SEG(sbi);
2865  	addr = START_BLOCK(sbi, segno);
2866  	sum_entry = &sum->entries[0];
2867  
2868  	for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
2869  		nrpages = bio_max_segs(last_offset - i);
2870  
2871  		/* readahead node pages */
2872  		f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
2873  
2874  		for (idx = addr; idx < addr + nrpages; idx++) {
2875  			struct page *page = f2fs_get_tmp_page(sbi, idx);
2876  
2877  			if (IS_ERR(page))
2878  				return PTR_ERR(page);
2879  
2880  			rn = F2FS_NODE(page);
2881  			sum_entry->nid = rn->footer.nid;
2882  			sum_entry->version = 0;
2883  			sum_entry->ofs_in_node = 0;
2884  			sum_entry++;
2885  			f2fs_put_page(page, 1);
2886  		}
2887  
2888  		invalidate_mapping_pages(META_MAPPING(sbi), addr,
2889  							addr + nrpages);
2890  	}
2891  	return 0;
2892  }
2893  
remove_nats_in_journal(struct f2fs_sb_info * sbi)2894  static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
2895  {
2896  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2897  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2898  	struct f2fs_journal *journal = curseg->journal;
2899  	int i;
2900  
2901  	down_write(&curseg->journal_rwsem);
2902  	for (i = 0; i < nats_in_cursum(journal); i++) {
2903  		struct nat_entry *ne;
2904  		struct f2fs_nat_entry raw_ne;
2905  		nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
2906  
2907  		if (f2fs_check_nid_range(sbi, nid))
2908  			continue;
2909  
2910  		raw_ne = nat_in_journal(journal, i);
2911  
2912  		ne = __lookup_nat_cache(nm_i, nid);
2913  		if (!ne) {
2914  			ne = __alloc_nat_entry(sbi, nid, true);
2915  			__init_nat_entry(nm_i, ne, &raw_ne, true);
2916  		}
2917  
2918  		/*
2919  		 * if a free nat in journal has not been used after last
2920  		 * checkpoint, we should remove it from available nids,
2921  		 * since later we will add it again.
2922  		 */
2923  		if (!get_nat_flag(ne, IS_DIRTY) &&
2924  				le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
2925  			spin_lock(&nm_i->nid_list_lock);
2926  			nm_i->available_nids--;
2927  			spin_unlock(&nm_i->nid_list_lock);
2928  		}
2929  
2930  		__set_nat_cache_dirty(nm_i, ne);
2931  	}
2932  	update_nats_in_cursum(journal, -i);
2933  	up_write(&curseg->journal_rwsem);
2934  }
2935  
__adjust_nat_entry_set(struct nat_entry_set * nes,struct list_head * head,int max)2936  static void __adjust_nat_entry_set(struct nat_entry_set *nes,
2937  						struct list_head *head, int max)
2938  {
2939  	struct nat_entry_set *cur;
2940  
2941  	if (nes->entry_cnt >= max)
2942  		goto add_out;
2943  
2944  	list_for_each_entry(cur, head, set_list) {
2945  		if (cur->entry_cnt >= nes->entry_cnt) {
2946  			list_add(&nes->set_list, cur->set_list.prev);
2947  			return;
2948  		}
2949  	}
2950  add_out:
2951  	list_add_tail(&nes->set_list, head);
2952  }
2953  
__update_nat_bits(struct f2fs_nm_info * nm_i,unsigned int nat_ofs,unsigned int valid)2954  static void __update_nat_bits(struct f2fs_nm_info *nm_i, unsigned int nat_ofs,
2955  							unsigned int valid)
2956  {
2957  	if (valid == 0) {
2958  		__set_bit_le(nat_ofs, nm_i->empty_nat_bits);
2959  		__clear_bit_le(nat_ofs, nm_i->full_nat_bits);
2960  		return;
2961  	}
2962  
2963  	__clear_bit_le(nat_ofs, nm_i->empty_nat_bits);
2964  	if (valid == NAT_ENTRY_PER_BLOCK)
2965  		__set_bit_le(nat_ofs, nm_i->full_nat_bits);
2966  	else
2967  		__clear_bit_le(nat_ofs, nm_i->full_nat_bits);
2968  }
2969  
update_nat_bits(struct f2fs_sb_info * sbi,nid_t start_nid,struct page * page)2970  static void update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2971  						struct page *page)
2972  {
2973  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2974  	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
2975  	struct f2fs_nat_block *nat_blk = page_address(page);
2976  	int valid = 0;
2977  	int i = 0;
2978  
2979  	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
2980  		return;
2981  
2982  	if (nat_index == 0) {
2983  		valid = 1;
2984  		i = 1;
2985  	}
2986  	for (; i < NAT_ENTRY_PER_BLOCK; i++) {
2987  		if (le32_to_cpu(nat_blk->entries[i].block_addr) != NULL_ADDR)
2988  			valid++;
2989  	}
2990  
2991  	__update_nat_bits(nm_i, nat_index, valid);
2992  }
2993  
f2fs_enable_nat_bits(struct f2fs_sb_info * sbi)2994  void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi)
2995  {
2996  	struct f2fs_nm_info *nm_i = NM_I(sbi);
2997  	unsigned int nat_ofs;
2998  
2999  	f2fs_down_read(&nm_i->nat_tree_lock);
3000  
3001  	for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) {
3002  		unsigned int valid = 0, nid_ofs = 0;
3003  
3004  		/* handle nid zero due to it should never be used */
3005  		if (unlikely(nat_ofs == 0)) {
3006  			valid = 1;
3007  			nid_ofs = 1;
3008  		}
3009  
3010  		for (; nid_ofs < NAT_ENTRY_PER_BLOCK; nid_ofs++) {
3011  			if (!test_bit_le(nid_ofs,
3012  					nm_i->free_nid_bitmap[nat_ofs]))
3013  				valid++;
3014  		}
3015  
3016  		__update_nat_bits(nm_i, nat_ofs, valid);
3017  	}
3018  
3019  	f2fs_up_read(&nm_i->nat_tree_lock);
3020  }
3021  
__flush_nat_entry_set(struct f2fs_sb_info * sbi,struct nat_entry_set * set,struct cp_control * cpc)3022  static int __flush_nat_entry_set(struct f2fs_sb_info *sbi,
3023  		struct nat_entry_set *set, struct cp_control *cpc)
3024  {
3025  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
3026  	struct f2fs_journal *journal = curseg->journal;
3027  	nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
3028  	bool to_journal = true;
3029  	struct f2fs_nat_block *nat_blk;
3030  	struct nat_entry *ne, *cur;
3031  	struct page *page = NULL;
3032  
3033  	/*
3034  	 * there are two steps to flush nat entries:
3035  	 * #1, flush nat entries to journal in current hot data summary block.
3036  	 * #2, flush nat entries to nat page.
3037  	 */
3038  	if ((cpc->reason & CP_UMOUNT) ||
3039  		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
3040  		to_journal = false;
3041  
3042  	if (to_journal) {
3043  		down_write(&curseg->journal_rwsem);
3044  	} else {
3045  		page = get_next_nat_page(sbi, start_nid);
3046  		if (IS_ERR(page))
3047  			return PTR_ERR(page);
3048  
3049  		nat_blk = page_address(page);
3050  		f2fs_bug_on(sbi, !nat_blk);
3051  	}
3052  
3053  	/* flush dirty nats in nat entry set */
3054  	list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
3055  		struct f2fs_nat_entry *raw_ne;
3056  		nid_t nid = nat_get_nid(ne);
3057  		int offset;
3058  
3059  		f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
3060  
3061  		if (to_journal) {
3062  			offset = f2fs_lookup_journal_in_cursum(journal,
3063  							NAT_JOURNAL, nid, 1);
3064  			f2fs_bug_on(sbi, offset < 0);
3065  			raw_ne = &nat_in_journal(journal, offset);
3066  			nid_in_journal(journal, offset) = cpu_to_le32(nid);
3067  		} else {
3068  			raw_ne = &nat_blk->entries[nid - start_nid];
3069  		}
3070  		raw_nat_from_node_info(raw_ne, &ne->ni);
3071  		nat_reset_flag(ne);
3072  		__clear_nat_cache_dirty(NM_I(sbi), set, ne);
3073  		if (nat_get_blkaddr(ne) == NULL_ADDR) {
3074  			add_free_nid(sbi, nid, false, true);
3075  		} else {
3076  			spin_lock(&NM_I(sbi)->nid_list_lock);
3077  			update_free_nid_bitmap(sbi, nid, false, false);
3078  			spin_unlock(&NM_I(sbi)->nid_list_lock);
3079  		}
3080  	}
3081  
3082  	if (to_journal) {
3083  		up_write(&curseg->journal_rwsem);
3084  	} else {
3085  		update_nat_bits(sbi, start_nid, page);
3086  		f2fs_put_page(page, 1);
3087  	}
3088  
3089  	/* Allow dirty nats by node block allocation in write_begin */
3090  	if (!set->entry_cnt) {
3091  		radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
3092  		kmem_cache_free(nat_entry_set_slab, set);
3093  	}
3094  	return 0;
3095  }
3096  
3097  /*
3098   * This function is called during the checkpointing process.
3099   */
f2fs_flush_nat_entries(struct f2fs_sb_info * sbi,struct cp_control * cpc)3100  int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3101  {
3102  	struct f2fs_nm_info *nm_i = NM_I(sbi);
3103  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
3104  	struct f2fs_journal *journal = curseg->journal;
3105  	struct nat_entry_set *setvec[NAT_VEC_SIZE];
3106  	struct nat_entry_set *set, *tmp;
3107  	unsigned int found;
3108  	nid_t set_idx = 0;
3109  	LIST_HEAD(sets);
3110  	int err = 0;
3111  
3112  	/*
3113  	 * during unmount, let's flush nat_bits before checking
3114  	 * nat_cnt[DIRTY_NAT].
3115  	 */
3116  	if (cpc->reason & CP_UMOUNT) {
3117  		f2fs_down_write(&nm_i->nat_tree_lock);
3118  		remove_nats_in_journal(sbi);
3119  		f2fs_up_write(&nm_i->nat_tree_lock);
3120  	}
3121  
3122  	if (!nm_i->nat_cnt[DIRTY_NAT])
3123  		return 0;
3124  
3125  	f2fs_down_write(&nm_i->nat_tree_lock);
3126  
3127  	/*
3128  	 * if there are no enough space in journal to store dirty nat
3129  	 * entries, remove all entries from journal and merge them
3130  	 * into nat entry set.
3131  	 */
3132  	if (cpc->reason & CP_UMOUNT ||
3133  		!__has_cursum_space(journal,
3134  			nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
3135  		remove_nats_in_journal(sbi);
3136  
3137  	while ((found = __gang_lookup_nat_set(nm_i,
3138  					set_idx, NAT_VEC_SIZE, setvec))) {
3139  		unsigned idx;
3140  
3141  		set_idx = setvec[found - 1]->set + 1;
3142  		for (idx = 0; idx < found; idx++)
3143  			__adjust_nat_entry_set(setvec[idx], &sets,
3144  						MAX_NAT_JENTRIES(journal));
3145  	}
3146  
3147  	/* flush dirty nats in nat entry set */
3148  	list_for_each_entry_safe(set, tmp, &sets, set_list) {
3149  		err = __flush_nat_entry_set(sbi, set, cpc);
3150  		if (err)
3151  			break;
3152  	}
3153  
3154  	f2fs_up_write(&nm_i->nat_tree_lock);
3155  	/* Allow dirty nats by node block allocation in write_begin */
3156  
3157  	return err;
3158  }
3159  
__get_nat_bitmaps(struct f2fs_sb_info * sbi)3160  static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
3161  {
3162  	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3163  	struct f2fs_nm_info *nm_i = NM_I(sbi);
3164  	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
3165  	unsigned int i;
3166  	__u64 cp_ver = cur_cp_version(ckpt);
3167  	block_t nat_bits_addr;
3168  
3169  	nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
3170  	nm_i->nat_bits = f2fs_kvzalloc(sbi,
3171  			F2FS_BLK_TO_BYTES(nm_i->nat_bits_blocks), GFP_KERNEL);
3172  	if (!nm_i->nat_bits)
3173  		return -ENOMEM;
3174  
3175  	nm_i->full_nat_bits = nm_i->nat_bits + 8;
3176  	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;
3177  
3178  	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
3179  		return 0;
3180  
3181  	nat_bits_addr = __start_cp_addr(sbi) + BLKS_PER_SEG(sbi) -
3182  						nm_i->nat_bits_blocks;
3183  	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
3184  		struct page *page;
3185  
3186  		page = f2fs_get_meta_page(sbi, nat_bits_addr++);
3187  		if (IS_ERR(page))
3188  			return PTR_ERR(page);
3189  
3190  		memcpy(nm_i->nat_bits + F2FS_BLK_TO_BYTES(i),
3191  					page_address(page), F2FS_BLKSIZE);
3192  		f2fs_put_page(page, 1);
3193  	}
3194  
3195  	cp_ver |= (cur_cp_crc(ckpt) << 32);
3196  	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
3197  		clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG);
3198  		f2fs_notice(sbi, "Disable nat_bits due to incorrect cp_ver (%llu, %llu)",
3199  			cp_ver, le64_to_cpu(*(__le64 *)nm_i->nat_bits));
3200  		return 0;
3201  	}
3202  
3203  	f2fs_notice(sbi, "Found nat_bits in checkpoint");
3204  	return 0;
3205  }
3206  
load_free_nid_bitmap(struct f2fs_sb_info * sbi)3207  static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
3208  {
3209  	struct f2fs_nm_info *nm_i = NM_I(sbi);
3210  	unsigned int i = 0;
3211  	nid_t nid, last_nid;
3212  
3213  	if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG))
3214  		return;
3215  
3216  	for (i = 0; i < nm_i->nat_blocks; i++) {
3217  		i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
3218  		if (i >= nm_i->nat_blocks)
3219  			break;
3220  
3221  		__set_bit_le(i, nm_i->nat_block_bitmap);
3222  
3223  		nid = i * NAT_ENTRY_PER_BLOCK;
3224  		last_nid = nid + NAT_ENTRY_PER_BLOCK;
3225  
3226  		spin_lock(&NM_I(sbi)->nid_list_lock);
3227  		for (; nid < last_nid; nid++)
3228  			update_free_nid_bitmap(sbi, nid, true, true);
3229  		spin_unlock(&NM_I(sbi)->nid_list_lock);
3230  	}
3231  
3232  	for (i = 0; i < nm_i->nat_blocks; i++) {
3233  		i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
3234  		if (i >= nm_i->nat_blocks)
3235  			break;
3236  
3237  		__set_bit_le(i, nm_i->nat_block_bitmap);
3238  	}
3239  }
3240  
init_node_manager(struct f2fs_sb_info * sbi)3241  static int init_node_manager(struct f2fs_sb_info *sbi)
3242  {
3243  	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
3244  	struct f2fs_nm_info *nm_i = NM_I(sbi);
3245  	unsigned char *version_bitmap;
3246  	unsigned int nat_segs;
3247  	int err;
3248  
3249  	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);
3250  
3251  	/* segment_count_nat includes pair segment so divide to 2. */
3252  	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
3253  	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
3254  	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
3255  
3256  	/* not used nids: 0, node, meta, (and root counted as valid node) */
3257  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
3258  						F2FS_RESERVED_NODE_NUM;
3259  	nm_i->nid_cnt[FREE_NID] = 0;
3260  	nm_i->nid_cnt[PREALLOC_NID] = 0;
3261  	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
3262  	nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
3263  	nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
3264  	nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS;
3265  
3266  	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
3267  	INIT_LIST_HEAD(&nm_i->free_nid_list);
3268  	INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
3269  	INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
3270  	INIT_LIST_HEAD(&nm_i->nat_entries);
3271  	spin_lock_init(&nm_i->nat_list_lock);
3272  
3273  	mutex_init(&nm_i->build_lock);
3274  	spin_lock_init(&nm_i->nid_list_lock);
3275  	init_f2fs_rwsem(&nm_i->nat_tree_lock);
3276  
3277  	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
3278  	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
3279  	version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
3280  	nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
3281  					GFP_KERNEL);
3282  	if (!nm_i->nat_bitmap)
3283  		return -ENOMEM;
3284  
3285  	err = __get_nat_bitmaps(sbi);
3286  	if (err)
3287  		return err;
3288  
3289  #ifdef CONFIG_F2FS_CHECK_FS
3290  	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
3291  					GFP_KERNEL);
3292  	if (!nm_i->nat_bitmap_mir)
3293  		return -ENOMEM;
3294  #endif
3295  
3296  	return 0;
3297  }
3298  
init_free_nid_cache(struct f2fs_sb_info * sbi)3299  static int init_free_nid_cache(struct f2fs_sb_info *sbi)
3300  {
3301  	struct f2fs_nm_info *nm_i = NM_I(sbi);
3302  	int i;
3303  
3304  	nm_i->free_nid_bitmap =
3305  		f2fs_kvzalloc(sbi, array_size(sizeof(unsigned char *),
3306  					      nm_i->nat_blocks),
3307  			      GFP_KERNEL);
3308  	if (!nm_i->free_nid_bitmap)
3309  		return -ENOMEM;
3310  
3311  	for (i = 0; i < nm_i->nat_blocks; i++) {
3312  		nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
3313  			f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
3314  		if (!nm_i->free_nid_bitmap[i])
3315  			return -ENOMEM;
3316  	}
3317  
3318  	nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8,
3319  								GFP_KERNEL);
3320  	if (!nm_i->nat_block_bitmap)
3321  		return -ENOMEM;
3322  
3323  	nm_i->free_nid_count =
3324  		f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short),
3325  					      nm_i->nat_blocks),
3326  			      GFP_KERNEL);
3327  	if (!nm_i->free_nid_count)
3328  		return -ENOMEM;
3329  	return 0;
3330  }
3331  
f2fs_build_node_manager(struct f2fs_sb_info * sbi)3332  int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
3333  {
3334  	int err;
3335  
3336  	sbi->nm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_nm_info),
3337  							GFP_KERNEL);
3338  	if (!sbi->nm_info)
3339  		return -ENOMEM;
3340  
3341  	err = init_node_manager(sbi);
3342  	if (err)
3343  		return err;
3344  
3345  	err = init_free_nid_cache(sbi);
3346  	if (err)
3347  		return err;
3348  
3349  	/* load free nid status from nat_bits table */
3350  	load_free_nid_bitmap(sbi);
3351  
3352  	return f2fs_build_free_nids(sbi, true, true);
3353  }
3354  
f2fs_destroy_node_manager(struct f2fs_sb_info * sbi)3355  void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
3356  {
3357  	struct f2fs_nm_info *nm_i = NM_I(sbi);
3358  	struct free_nid *i, *next_i;
3359  	void *vec[NAT_VEC_SIZE];
3360  	struct nat_entry **natvec = (struct nat_entry **)vec;
3361  	struct nat_entry_set **setvec = (struct nat_entry_set **)vec;
3362  	nid_t nid = 0;
3363  	unsigned int found;
3364  
3365  	if (!nm_i)
3366  		return;
3367  
3368  	/* destroy free nid list */
3369  	spin_lock(&nm_i->nid_list_lock);
3370  	list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
3371  		__remove_free_nid(sbi, i, FREE_NID);
3372  		spin_unlock(&nm_i->nid_list_lock);
3373  		kmem_cache_free(free_nid_slab, i);
3374  		spin_lock(&nm_i->nid_list_lock);
3375  	}
3376  	f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]);
3377  	f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]);
3378  	f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list));
3379  	spin_unlock(&nm_i->nid_list_lock);
3380  
3381  	/* destroy nat cache */
3382  	f2fs_down_write(&nm_i->nat_tree_lock);
3383  	while ((found = __gang_lookup_nat_cache(nm_i,
3384  					nid, NAT_VEC_SIZE, natvec))) {
3385  		unsigned idx;
3386  
3387  		nid = nat_get_nid(natvec[found - 1]) + 1;
3388  		for (idx = 0; idx < found; idx++) {
3389  			spin_lock(&nm_i->nat_list_lock);
3390  			list_del(&natvec[idx]->list);
3391  			spin_unlock(&nm_i->nat_list_lock);
3392  
3393  			__del_from_nat_cache(nm_i, natvec[idx]);
3394  		}
3395  	}
3396  	f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]);
3397  
3398  	/* destroy nat set cache */
3399  	nid = 0;
3400  	memset(vec, 0, sizeof(void *) * NAT_VEC_SIZE);
3401  	while ((found = __gang_lookup_nat_set(nm_i,
3402  					nid, NAT_VEC_SIZE, setvec))) {
3403  		unsigned idx;
3404  
3405  		nid = setvec[found - 1]->set + 1;
3406  		for (idx = 0; idx < found; idx++) {
3407  			/* entry_cnt is not zero, when cp_error was occurred */
3408  			f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
3409  			radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
3410  			kmem_cache_free(nat_entry_set_slab, setvec[idx]);
3411  		}
3412  	}
3413  	f2fs_up_write(&nm_i->nat_tree_lock);
3414  
3415  	kvfree(nm_i->nat_block_bitmap);
3416  	if (nm_i->free_nid_bitmap) {
3417  		int i;
3418  
3419  		for (i = 0; i < nm_i->nat_blocks; i++)
3420  			kvfree(nm_i->free_nid_bitmap[i]);
3421  		kvfree(nm_i->free_nid_bitmap);
3422  	}
3423  	kvfree(nm_i->free_nid_count);
3424  
3425  	kvfree(nm_i->nat_bitmap);
3426  	kvfree(nm_i->nat_bits);
3427  #ifdef CONFIG_F2FS_CHECK_FS
3428  	kvfree(nm_i->nat_bitmap_mir);
3429  #endif
3430  	sbi->nm_info = NULL;
3431  	kfree(nm_i);
3432  }
3433  
f2fs_create_node_manager_caches(void)3434  int __init f2fs_create_node_manager_caches(void)
3435  {
3436  	nat_entry_slab = f2fs_kmem_cache_create("f2fs_nat_entry",
3437  			sizeof(struct nat_entry));
3438  	if (!nat_entry_slab)
3439  		goto fail;
3440  
3441  	free_nid_slab = f2fs_kmem_cache_create("f2fs_free_nid",
3442  			sizeof(struct free_nid));
3443  	if (!free_nid_slab)
3444  		goto destroy_nat_entry;
3445  
3446  	nat_entry_set_slab = f2fs_kmem_cache_create("f2fs_nat_entry_set",
3447  			sizeof(struct nat_entry_set));
3448  	if (!nat_entry_set_slab)
3449  		goto destroy_free_nid;
3450  
3451  	fsync_node_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_node_entry",
3452  			sizeof(struct fsync_node_entry));
3453  	if (!fsync_node_entry_slab)
3454  		goto destroy_nat_entry_set;
3455  	return 0;
3456  
3457  destroy_nat_entry_set:
3458  	kmem_cache_destroy(nat_entry_set_slab);
3459  destroy_free_nid:
3460  	kmem_cache_destroy(free_nid_slab);
3461  destroy_nat_entry:
3462  	kmem_cache_destroy(nat_entry_slab);
3463  fail:
3464  	return -ENOMEM;
3465  }
3466  
f2fs_destroy_node_manager_caches(void)3467  void f2fs_destroy_node_manager_caches(void)
3468  {
3469  	kmem_cache_destroy(fsync_node_entry_slab);
3470  	kmem_cache_destroy(nat_entry_set_slab);
3471  	kmem_cache_destroy(free_nid_slab);
3472  	kmem_cache_destroy(nat_entry_slab);
3473  }
3474