1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * f2fs extent cache support
4   *
5   * Copyright (c) 2015 Motorola Mobility
6   * Copyright (c) 2015 Samsung Electronics
7   * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
8   *          Chao Yu <chao2.yu@samsung.com>
9   *
10   * block_age-based extent cache added by:
11   * Copyright (c) 2022 xiaomi Co., Ltd.
12   *             http://www.xiaomi.com/
13   */
14  
15  #include <linux/fs.h>
16  #include <linux/f2fs_fs.h>
17  
18  #include "f2fs.h"
19  #include "node.h"
20  #include <trace/events/f2fs.h>
21  
sanity_check_extent_cache(struct inode * inode,struct page * ipage)22  bool sanity_check_extent_cache(struct inode *inode, struct page *ipage)
23  {
24  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
25  	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
26  	struct extent_info ei;
27  
28  	get_read_extent_info(&ei, i_ext);
29  
30  	if (!ei.len)
31  		return true;
32  
33  	if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
34  	    !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
35  					DATA_GENERIC_ENHANCE)) {
36  		f2fs_warn(sbi, "%s: inode (ino=%lx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
37  			  __func__, inode->i_ino,
38  			  ei.blk, ei.fofs, ei.len);
39  		return false;
40  	}
41  	return true;
42  }
43  
__set_extent_info(struct extent_info * ei,unsigned int fofs,unsigned int len,block_t blk,bool keep_clen,unsigned long age,unsigned long last_blocks,enum extent_type type)44  static void __set_extent_info(struct extent_info *ei,
45  				unsigned int fofs, unsigned int len,
46  				block_t blk, bool keep_clen,
47  				unsigned long age, unsigned long last_blocks,
48  				enum extent_type type)
49  {
50  	ei->fofs = fofs;
51  	ei->len = len;
52  
53  	if (type == EX_READ) {
54  		ei->blk = blk;
55  		if (keep_clen)
56  			return;
57  #ifdef CONFIG_F2FS_FS_COMPRESSION
58  		ei->c_len = 0;
59  #endif
60  	} else if (type == EX_BLOCK_AGE) {
61  		ei->age = age;
62  		ei->last_blocks = last_blocks;
63  	}
64  }
65  
__init_may_extent_tree(struct inode * inode,enum extent_type type)66  static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
67  {
68  	if (type == EX_READ)
69  		return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
70  			S_ISREG(inode->i_mode);
71  	if (type == EX_BLOCK_AGE)
72  		return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
73  			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
74  	return false;
75  }
76  
__may_extent_tree(struct inode * inode,enum extent_type type)77  static bool __may_extent_tree(struct inode *inode, enum extent_type type)
78  {
79  	/*
80  	 * for recovered files during mount do not create extents
81  	 * if shrinker is not registered.
82  	 */
83  	if (list_empty(&F2FS_I_SB(inode)->s_list))
84  		return false;
85  
86  	if (!__init_may_extent_tree(inode, type))
87  		return false;
88  
89  	if (type == EX_READ) {
90  		if (is_inode_flag_set(inode, FI_NO_EXTENT))
91  			return false;
92  		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
93  				 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
94  			return false;
95  	} else if (type == EX_BLOCK_AGE) {
96  		if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
97  			return false;
98  		if (file_is_cold(inode))
99  			return false;
100  	}
101  	return true;
102  }
103  
__try_update_largest_extent(struct extent_tree * et,struct extent_node * en)104  static void __try_update_largest_extent(struct extent_tree *et,
105  						struct extent_node *en)
106  {
107  	if (et->type != EX_READ)
108  		return;
109  	if (en->ei.len <= et->largest.len)
110  		return;
111  
112  	et->largest = en->ei;
113  	et->largest_updated = true;
114  }
115  
__is_extent_mergeable(struct extent_info * back,struct extent_info * front,enum extent_type type)116  static bool __is_extent_mergeable(struct extent_info *back,
117  		struct extent_info *front, enum extent_type type)
118  {
119  	if (type == EX_READ) {
120  #ifdef CONFIG_F2FS_FS_COMPRESSION
121  		if (back->c_len && back->len != back->c_len)
122  			return false;
123  		if (front->c_len && front->len != front->c_len)
124  			return false;
125  #endif
126  		return (back->fofs + back->len == front->fofs &&
127  				back->blk + back->len == front->blk);
128  	} else if (type == EX_BLOCK_AGE) {
129  		return (back->fofs + back->len == front->fofs &&
130  			abs(back->age - front->age) <= SAME_AGE_REGION &&
131  			abs(back->last_blocks - front->last_blocks) <=
132  							SAME_AGE_REGION);
133  	}
134  	return false;
135  }
136  
__is_back_mergeable(struct extent_info * cur,struct extent_info * back,enum extent_type type)137  static bool __is_back_mergeable(struct extent_info *cur,
138  		struct extent_info *back, enum extent_type type)
139  {
140  	return __is_extent_mergeable(back, cur, type);
141  }
142  
__is_front_mergeable(struct extent_info * cur,struct extent_info * front,enum extent_type type)143  static bool __is_front_mergeable(struct extent_info *cur,
144  		struct extent_info *front, enum extent_type type)
145  {
146  	return __is_extent_mergeable(cur, front, type);
147  }
148  
__lookup_extent_node(struct rb_root_cached * root,struct extent_node * cached_en,unsigned int fofs)149  static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
150  			struct extent_node *cached_en, unsigned int fofs)
151  {
152  	struct rb_node *node = root->rb_root.rb_node;
153  	struct extent_node *en;
154  
155  	/* check a cached entry */
156  	if (cached_en && cached_en->ei.fofs <= fofs &&
157  			cached_en->ei.fofs + cached_en->ei.len > fofs)
158  		return cached_en;
159  
160  	/* check rb_tree */
161  	while (node) {
162  		en = rb_entry(node, struct extent_node, rb_node);
163  
164  		if (fofs < en->ei.fofs)
165  			node = node->rb_left;
166  		else if (fofs >= en->ei.fofs + en->ei.len)
167  			node = node->rb_right;
168  		else
169  			return en;
170  	}
171  	return NULL;
172  }
173  
174  /*
175   * lookup rb entry in position of @fofs in rb-tree,
176   * if hit, return the entry, otherwise, return NULL
177   * @prev_ex: extent before fofs
178   * @next_ex: extent after fofs
179   * @insert_p: insert point for new extent at fofs
180   * in order to simplify the insertion after.
181   * tree must stay unchanged between lookup and insertion.
182   */
__lookup_extent_node_ret(struct rb_root_cached * root,struct extent_node * cached_en,unsigned int fofs,struct extent_node ** prev_entry,struct extent_node ** next_entry,struct rb_node *** insert_p,struct rb_node ** insert_parent,bool * leftmost)183  static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
184  				struct extent_node *cached_en,
185  				unsigned int fofs,
186  				struct extent_node **prev_entry,
187  				struct extent_node **next_entry,
188  				struct rb_node ***insert_p,
189  				struct rb_node **insert_parent,
190  				bool *leftmost)
191  {
192  	struct rb_node **pnode = &root->rb_root.rb_node;
193  	struct rb_node *parent = NULL, *tmp_node;
194  	struct extent_node *en = cached_en;
195  
196  	*insert_p = NULL;
197  	*insert_parent = NULL;
198  	*prev_entry = NULL;
199  	*next_entry = NULL;
200  
201  	if (RB_EMPTY_ROOT(&root->rb_root))
202  		return NULL;
203  
204  	if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
205  		goto lookup_neighbors;
206  
207  	*leftmost = true;
208  
209  	while (*pnode) {
210  		parent = *pnode;
211  		en = rb_entry(*pnode, struct extent_node, rb_node);
212  
213  		if (fofs < en->ei.fofs) {
214  			pnode = &(*pnode)->rb_left;
215  		} else if (fofs >= en->ei.fofs + en->ei.len) {
216  			pnode = &(*pnode)->rb_right;
217  			*leftmost = false;
218  		} else {
219  			goto lookup_neighbors;
220  		}
221  	}
222  
223  	*insert_p = pnode;
224  	*insert_parent = parent;
225  
226  	en = rb_entry(parent, struct extent_node, rb_node);
227  	tmp_node = parent;
228  	if (parent && fofs > en->ei.fofs)
229  		tmp_node = rb_next(parent);
230  	*next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
231  
232  	tmp_node = parent;
233  	if (parent && fofs < en->ei.fofs)
234  		tmp_node = rb_prev(parent);
235  	*prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
236  	return NULL;
237  
238  lookup_neighbors:
239  	if (fofs == en->ei.fofs) {
240  		/* lookup prev node for merging backward later */
241  		tmp_node = rb_prev(&en->rb_node);
242  		*prev_entry = rb_entry_safe(tmp_node,
243  					struct extent_node, rb_node);
244  	}
245  	if (fofs == en->ei.fofs + en->ei.len - 1) {
246  		/* lookup next node for merging frontward later */
247  		tmp_node = rb_next(&en->rb_node);
248  		*next_entry = rb_entry_safe(tmp_node,
249  					struct extent_node, rb_node);
250  	}
251  	return en;
252  }
253  
254  static struct kmem_cache *extent_tree_slab;
255  static struct kmem_cache *extent_node_slab;
256  
__attach_extent_node(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_info * ei,struct rb_node * parent,struct rb_node ** p,bool leftmost)257  static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
258  				struct extent_tree *et, struct extent_info *ei,
259  				struct rb_node *parent, struct rb_node **p,
260  				bool leftmost)
261  {
262  	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
263  	struct extent_node *en;
264  
265  	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
266  	if (!en)
267  		return NULL;
268  
269  	en->ei = *ei;
270  	INIT_LIST_HEAD(&en->list);
271  	en->et = et;
272  
273  	rb_link_node(&en->rb_node, parent, p);
274  	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
275  	atomic_inc(&et->node_cnt);
276  	atomic_inc(&eti->total_ext_node);
277  	return en;
278  }
279  
__detach_extent_node(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_node * en)280  static void __detach_extent_node(struct f2fs_sb_info *sbi,
281  				struct extent_tree *et, struct extent_node *en)
282  {
283  	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
284  
285  	rb_erase_cached(&en->rb_node, &et->root);
286  	atomic_dec(&et->node_cnt);
287  	atomic_dec(&eti->total_ext_node);
288  
289  	if (et->cached_en == en)
290  		et->cached_en = NULL;
291  	kmem_cache_free(extent_node_slab, en);
292  }
293  
294  /*
295   * Flow to release an extent_node:
296   * 1. list_del_init
297   * 2. __detach_extent_node
298   * 3. kmem_cache_free.
299   */
__release_extent_node(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_node * en)300  static void __release_extent_node(struct f2fs_sb_info *sbi,
301  			struct extent_tree *et, struct extent_node *en)
302  {
303  	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
304  
305  	spin_lock(&eti->extent_lock);
306  	f2fs_bug_on(sbi, list_empty(&en->list));
307  	list_del_init(&en->list);
308  	spin_unlock(&eti->extent_lock);
309  
310  	__detach_extent_node(sbi, et, en);
311  }
312  
__grab_extent_tree(struct inode * inode,enum extent_type type)313  static struct extent_tree *__grab_extent_tree(struct inode *inode,
314  						enum extent_type type)
315  {
316  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
317  	struct extent_tree_info *eti = &sbi->extent_tree[type];
318  	struct extent_tree *et;
319  	nid_t ino = inode->i_ino;
320  
321  	mutex_lock(&eti->extent_tree_lock);
322  	et = radix_tree_lookup(&eti->extent_tree_root, ino);
323  	if (!et) {
324  		et = f2fs_kmem_cache_alloc(extent_tree_slab,
325  					GFP_NOFS, true, NULL);
326  		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
327  		memset(et, 0, sizeof(struct extent_tree));
328  		et->ino = ino;
329  		et->type = type;
330  		et->root = RB_ROOT_CACHED;
331  		et->cached_en = NULL;
332  		rwlock_init(&et->lock);
333  		INIT_LIST_HEAD(&et->list);
334  		atomic_set(&et->node_cnt, 0);
335  		atomic_inc(&eti->total_ext_tree);
336  	} else {
337  		atomic_dec(&eti->total_zombie_tree);
338  		list_del_init(&et->list);
339  	}
340  	mutex_unlock(&eti->extent_tree_lock);
341  
342  	/* never died until evict_inode */
343  	F2FS_I(inode)->extent_tree[type] = et;
344  
345  	return et;
346  }
347  
__free_extent_tree(struct f2fs_sb_info * sbi,struct extent_tree * et)348  static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
349  					struct extent_tree *et)
350  {
351  	struct rb_node *node, *next;
352  	struct extent_node *en;
353  	unsigned int count = atomic_read(&et->node_cnt);
354  
355  	node = rb_first_cached(&et->root);
356  	while (node) {
357  		next = rb_next(node);
358  		en = rb_entry(node, struct extent_node, rb_node);
359  		__release_extent_node(sbi, et, en);
360  		node = next;
361  	}
362  
363  	return count - atomic_read(&et->node_cnt);
364  }
365  
__drop_largest_extent(struct extent_tree * et,pgoff_t fofs,unsigned int len)366  static void __drop_largest_extent(struct extent_tree *et,
367  					pgoff_t fofs, unsigned int len)
368  {
369  	if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
370  			fofs + len > et->largest.fofs) {
371  		et->largest.len = 0;
372  		et->largest_updated = true;
373  	}
374  }
375  
f2fs_init_read_extent_tree(struct inode * inode,struct page * ipage)376  void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
377  {
378  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
379  	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
380  	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
381  	struct extent_tree *et;
382  	struct extent_node *en;
383  	struct extent_info ei;
384  
385  	if (!__may_extent_tree(inode, EX_READ)) {
386  		/* drop largest read extent */
387  		if (i_ext->len) {
388  			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
389  			i_ext->len = 0;
390  			set_page_dirty(ipage);
391  		}
392  		set_inode_flag(inode, FI_NO_EXTENT);
393  		return;
394  	}
395  
396  	et = __grab_extent_tree(inode, EX_READ);
397  
398  	get_read_extent_info(&ei, i_ext);
399  
400  	write_lock(&et->lock);
401  	if (atomic_read(&et->node_cnt) || !ei.len)
402  		goto skip;
403  
404  	en = __attach_extent_node(sbi, et, &ei, NULL,
405  				&et->root.rb_root.rb_node, true);
406  	if (en) {
407  		et->largest = en->ei;
408  		et->cached_en = en;
409  
410  		spin_lock(&eti->extent_lock);
411  		list_add_tail(&en->list, &eti->extent_list);
412  		spin_unlock(&eti->extent_lock);
413  	}
414  skip:
415  	/* Let's drop, if checkpoint got corrupted. */
416  	if (f2fs_cp_error(sbi)) {
417  		et->largest.len = 0;
418  		et->largest_updated = true;
419  	}
420  	write_unlock(&et->lock);
421  }
422  
f2fs_init_age_extent_tree(struct inode * inode)423  void f2fs_init_age_extent_tree(struct inode *inode)
424  {
425  	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
426  		return;
427  	__grab_extent_tree(inode, EX_BLOCK_AGE);
428  }
429  
f2fs_init_extent_tree(struct inode * inode)430  void f2fs_init_extent_tree(struct inode *inode)
431  {
432  	/* initialize read cache */
433  	if (__init_may_extent_tree(inode, EX_READ))
434  		__grab_extent_tree(inode, EX_READ);
435  
436  	/* initialize block age cache */
437  	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
438  		__grab_extent_tree(inode, EX_BLOCK_AGE);
439  }
440  
__lookup_extent_tree(struct inode * inode,pgoff_t pgofs,struct extent_info * ei,enum extent_type type)441  static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
442  			struct extent_info *ei, enum extent_type type)
443  {
444  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
445  	struct extent_tree_info *eti = &sbi->extent_tree[type];
446  	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
447  	struct extent_node *en;
448  	bool ret = false;
449  
450  	if (!et)
451  		return false;
452  
453  	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
454  
455  	read_lock(&et->lock);
456  
457  	if (type == EX_READ &&
458  			et->largest.fofs <= pgofs &&
459  			(pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
460  		*ei = et->largest;
461  		ret = true;
462  		stat_inc_largest_node_hit(sbi);
463  		goto out;
464  	}
465  
466  	en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
467  	if (!en)
468  		goto out;
469  
470  	if (en == et->cached_en)
471  		stat_inc_cached_node_hit(sbi, type);
472  	else
473  		stat_inc_rbtree_node_hit(sbi, type);
474  
475  	*ei = en->ei;
476  	spin_lock(&eti->extent_lock);
477  	if (!list_empty(&en->list)) {
478  		list_move_tail(&en->list, &eti->extent_list);
479  		et->cached_en = en;
480  	}
481  	spin_unlock(&eti->extent_lock);
482  	ret = true;
483  out:
484  	stat_inc_total_hit(sbi, type);
485  	read_unlock(&et->lock);
486  
487  	if (type == EX_READ)
488  		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
489  	else if (type == EX_BLOCK_AGE)
490  		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
491  	return ret;
492  }
493  
__try_merge_extent_node(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_info * ei,struct extent_node * prev_ex,struct extent_node * next_ex)494  static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
495  				struct extent_tree *et, struct extent_info *ei,
496  				struct extent_node *prev_ex,
497  				struct extent_node *next_ex)
498  {
499  	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
500  	struct extent_node *en = NULL;
501  
502  	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
503  		prev_ex->ei.len += ei->len;
504  		ei = &prev_ex->ei;
505  		en = prev_ex;
506  	}
507  
508  	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
509  		next_ex->ei.fofs = ei->fofs;
510  		next_ex->ei.len += ei->len;
511  		if (et->type == EX_READ)
512  			next_ex->ei.blk = ei->blk;
513  		if (en)
514  			__release_extent_node(sbi, et, prev_ex);
515  
516  		en = next_ex;
517  	}
518  
519  	if (!en)
520  		return NULL;
521  
522  	__try_update_largest_extent(et, en);
523  
524  	spin_lock(&eti->extent_lock);
525  	if (!list_empty(&en->list)) {
526  		list_move_tail(&en->list, &eti->extent_list);
527  		et->cached_en = en;
528  	}
529  	spin_unlock(&eti->extent_lock);
530  	return en;
531  }
532  
__insert_extent_tree(struct f2fs_sb_info * sbi,struct extent_tree * et,struct extent_info * ei,struct rb_node ** insert_p,struct rb_node * insert_parent,bool leftmost)533  static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
534  				struct extent_tree *et, struct extent_info *ei,
535  				struct rb_node **insert_p,
536  				struct rb_node *insert_parent,
537  				bool leftmost)
538  {
539  	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
540  	struct rb_node **p = &et->root.rb_root.rb_node;
541  	struct rb_node *parent = NULL;
542  	struct extent_node *en = NULL;
543  
544  	if (insert_p && insert_parent) {
545  		parent = insert_parent;
546  		p = insert_p;
547  		goto do_insert;
548  	}
549  
550  	leftmost = true;
551  
552  	/* look up extent_node in the rb tree */
553  	while (*p) {
554  		parent = *p;
555  		en = rb_entry(parent, struct extent_node, rb_node);
556  
557  		if (ei->fofs < en->ei.fofs) {
558  			p = &(*p)->rb_left;
559  		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
560  			p = &(*p)->rb_right;
561  			leftmost = false;
562  		} else {
563  			f2fs_bug_on(sbi, 1);
564  		}
565  	}
566  
567  do_insert:
568  	en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
569  	if (!en)
570  		return NULL;
571  
572  	__try_update_largest_extent(et, en);
573  
574  	/* update in global extent list */
575  	spin_lock(&eti->extent_lock);
576  	list_add_tail(&en->list, &eti->extent_list);
577  	et->cached_en = en;
578  	spin_unlock(&eti->extent_lock);
579  	return en;
580  }
581  
__update_extent_tree_range(struct inode * inode,struct extent_info * tei,enum extent_type type)582  static void __update_extent_tree_range(struct inode *inode,
583  			struct extent_info *tei, enum extent_type type)
584  {
585  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
586  	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
587  	struct extent_node *en = NULL, *en1 = NULL;
588  	struct extent_node *prev_en = NULL, *next_en = NULL;
589  	struct extent_info ei, dei, prev;
590  	struct rb_node **insert_p = NULL, *insert_parent = NULL;
591  	unsigned int fofs = tei->fofs, len = tei->len;
592  	unsigned int end = fofs + len;
593  	bool updated = false;
594  	bool leftmost = false;
595  
596  	if (!et)
597  		return;
598  
599  	if (type == EX_READ)
600  		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
601  						tei->blk, 0);
602  	else if (type == EX_BLOCK_AGE)
603  		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
604  						tei->age, tei->last_blocks);
605  
606  	write_lock(&et->lock);
607  
608  	if (type == EX_READ) {
609  		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
610  			write_unlock(&et->lock);
611  			return;
612  		}
613  
614  		prev = et->largest;
615  		dei.len = 0;
616  
617  		/*
618  		 * drop largest extent before lookup, in case it's already
619  		 * been shrunk from extent tree
620  		 */
621  		__drop_largest_extent(et, fofs, len);
622  	}
623  
624  	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
625  	en = __lookup_extent_node_ret(&et->root,
626  					et->cached_en, fofs,
627  					&prev_en, &next_en,
628  					&insert_p, &insert_parent,
629  					&leftmost);
630  	if (!en)
631  		en = next_en;
632  
633  	/* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
634  	while (en && en->ei.fofs < end) {
635  		unsigned int org_end;
636  		int parts = 0;	/* # of parts current extent split into */
637  
638  		next_en = en1 = NULL;
639  
640  		dei = en->ei;
641  		org_end = dei.fofs + dei.len;
642  		f2fs_bug_on(sbi, fofs >= org_end);
643  
644  		if (fofs > dei.fofs && (type != EX_READ ||
645  				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
646  			en->ei.len = fofs - en->ei.fofs;
647  			prev_en = en;
648  			parts = 1;
649  		}
650  
651  		if (end < org_end && (type != EX_READ ||
652  				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
653  			if (parts) {
654  				__set_extent_info(&ei,
655  					end, org_end - end,
656  					end - dei.fofs + dei.blk, false,
657  					dei.age, dei.last_blocks,
658  					type);
659  				en1 = __insert_extent_tree(sbi, et, &ei,
660  							NULL, NULL, true);
661  				next_en = en1;
662  			} else {
663  				__set_extent_info(&en->ei,
664  					end, en->ei.len - (end - dei.fofs),
665  					en->ei.blk + (end - dei.fofs), true,
666  					dei.age, dei.last_blocks,
667  					type);
668  				next_en = en;
669  			}
670  			parts++;
671  		}
672  
673  		if (!next_en) {
674  			struct rb_node *node = rb_next(&en->rb_node);
675  
676  			next_en = rb_entry_safe(node, struct extent_node,
677  						rb_node);
678  		}
679  
680  		if (parts)
681  			__try_update_largest_extent(et, en);
682  		else
683  			__release_extent_node(sbi, et, en);
684  
685  		/*
686  		 * if original extent is split into zero or two parts, extent
687  		 * tree has been altered by deletion or insertion, therefore
688  		 * invalidate pointers regard to tree.
689  		 */
690  		if (parts != 1) {
691  			insert_p = NULL;
692  			insert_parent = NULL;
693  		}
694  		en = next_en;
695  	}
696  
697  	if (type == EX_BLOCK_AGE)
698  		goto update_age_extent_cache;
699  
700  	/* 3. update extent in read extent cache */
701  	BUG_ON(type != EX_READ);
702  
703  	if (tei->blk) {
704  		__set_extent_info(&ei, fofs, len, tei->blk, false,
705  				  0, 0, EX_READ);
706  		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
707  			__insert_extent_tree(sbi, et, &ei,
708  					insert_p, insert_parent, leftmost);
709  
710  		/* give up extent_cache, if split and small updates happen */
711  		if (dei.len >= 1 &&
712  				prev.len < F2FS_MIN_EXTENT_LEN &&
713  				et->largest.len < F2FS_MIN_EXTENT_LEN) {
714  			et->largest.len = 0;
715  			et->largest_updated = true;
716  			set_inode_flag(inode, FI_NO_EXTENT);
717  		}
718  	}
719  
720  	if (is_inode_flag_set(inode, FI_NO_EXTENT))
721  		__free_extent_tree(sbi, et);
722  
723  	if (et->largest_updated) {
724  		et->largest_updated = false;
725  		updated = true;
726  	}
727  	goto out_read_extent_cache;
728  update_age_extent_cache:
729  	if (!tei->last_blocks)
730  		goto out_read_extent_cache;
731  
732  	__set_extent_info(&ei, fofs, len, 0, false,
733  			tei->age, tei->last_blocks, EX_BLOCK_AGE);
734  	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
735  		__insert_extent_tree(sbi, et, &ei,
736  					insert_p, insert_parent, leftmost);
737  out_read_extent_cache:
738  	write_unlock(&et->lock);
739  
740  	if (updated)
741  		f2fs_mark_inode_dirty_sync(inode, true);
742  }
743  
744  #ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs_update_read_extent_tree_range_compressed(struct inode * inode,pgoff_t fofs,block_t blkaddr,unsigned int llen,unsigned int c_len)745  void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
746  				pgoff_t fofs, block_t blkaddr, unsigned int llen,
747  				unsigned int c_len)
748  {
749  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
750  	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
751  	struct extent_node *en = NULL;
752  	struct extent_node *prev_en = NULL, *next_en = NULL;
753  	struct extent_info ei;
754  	struct rb_node **insert_p = NULL, *insert_parent = NULL;
755  	bool leftmost = false;
756  
757  	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
758  						blkaddr, c_len);
759  
760  	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
761  	if (is_inode_flag_set(inode, FI_NO_EXTENT))
762  		return;
763  
764  	write_lock(&et->lock);
765  
766  	en = __lookup_extent_node_ret(&et->root,
767  					et->cached_en, fofs,
768  					&prev_en, &next_en,
769  					&insert_p, &insert_parent,
770  					&leftmost);
771  	if (en)
772  		goto unlock_out;
773  
774  	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
775  	ei.c_len = c_len;
776  
777  	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
778  		__insert_extent_tree(sbi, et, &ei,
779  				insert_p, insert_parent, leftmost);
780  unlock_out:
781  	write_unlock(&et->lock);
782  }
783  #endif
784  
__calculate_block_age(struct f2fs_sb_info * sbi,unsigned long long new,unsigned long long old)785  static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
786  						unsigned long long new,
787  						unsigned long long old)
788  {
789  	unsigned int rem_old, rem_new;
790  	unsigned long long res;
791  	unsigned int weight = sbi->last_age_weight;
792  
793  	res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
794  		+ div_u64_rem(old, 100, &rem_old) * weight;
795  
796  	if (rem_new)
797  		res += rem_new * (100 - weight) / 100;
798  	if (rem_old)
799  		res += rem_old * weight / 100;
800  
801  	return res;
802  }
803  
804  /* This returns a new age and allocated blocks in ei */
__get_new_block_age(struct inode * inode,struct extent_info * ei,block_t blkaddr)805  static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
806  						block_t blkaddr)
807  {
808  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
809  	loff_t f_size = i_size_read(inode);
810  	unsigned long long cur_blocks =
811  				atomic64_read(&sbi->allocated_data_blocks);
812  	struct extent_info tei = *ei;	/* only fofs and len are valid */
813  
814  	/*
815  	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
816  	 * file block even in seq write. So don't record age for newly last file
817  	 * block here.
818  	 */
819  	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
820  			blkaddr == NEW_ADDR)
821  		return -EINVAL;
822  
823  	if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
824  		unsigned long long cur_age;
825  
826  		if (cur_blocks >= tei.last_blocks)
827  			cur_age = cur_blocks - tei.last_blocks;
828  		else
829  			/* allocated_data_blocks overflow */
830  			cur_age = ULLONG_MAX - tei.last_blocks + cur_blocks;
831  
832  		if (tei.age)
833  			ei->age = __calculate_block_age(sbi, cur_age, tei.age);
834  		else
835  			ei->age = cur_age;
836  		ei->last_blocks = cur_blocks;
837  		WARN_ON(ei->age > cur_blocks);
838  		return 0;
839  	}
840  
841  	f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
842  
843  	/* the data block was allocated for the first time */
844  	if (blkaddr == NEW_ADDR)
845  		goto out;
846  
847  	if (__is_valid_data_blkaddr(blkaddr) &&
848  	    !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
849  		return -EINVAL;
850  out:
851  	/*
852  	 * init block age with zero, this can happen when the block age extent
853  	 * was reclaimed due to memory constraint or system reboot
854  	 */
855  	ei->age = 0;
856  	ei->last_blocks = cur_blocks;
857  	return 0;
858  }
859  
__update_extent_cache(struct dnode_of_data * dn,enum extent_type type)860  static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
861  {
862  	struct extent_info ei = {};
863  
864  	if (!__may_extent_tree(dn->inode, type))
865  		return;
866  
867  	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
868  								dn->ofs_in_node;
869  	ei.len = 1;
870  
871  	if (type == EX_READ) {
872  		if (dn->data_blkaddr == NEW_ADDR)
873  			ei.blk = NULL_ADDR;
874  		else
875  			ei.blk = dn->data_blkaddr;
876  	} else if (type == EX_BLOCK_AGE) {
877  		if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
878  			return;
879  	}
880  	__update_extent_tree_range(dn->inode, &ei, type);
881  }
882  
__shrink_extent_tree(struct f2fs_sb_info * sbi,int nr_shrink,enum extent_type type)883  static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
884  					enum extent_type type)
885  {
886  	struct extent_tree_info *eti = &sbi->extent_tree[type];
887  	struct extent_tree *et, *next;
888  	struct extent_node *en;
889  	unsigned int node_cnt = 0, tree_cnt = 0;
890  	int remained;
891  
892  	if (!atomic_read(&eti->total_zombie_tree))
893  		goto free_node;
894  
895  	if (!mutex_trylock(&eti->extent_tree_lock))
896  		goto out;
897  
898  	/* 1. remove unreferenced extent tree */
899  	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
900  		if (atomic_read(&et->node_cnt)) {
901  			write_lock(&et->lock);
902  			node_cnt += __free_extent_tree(sbi, et);
903  			write_unlock(&et->lock);
904  		}
905  		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
906  		list_del_init(&et->list);
907  		radix_tree_delete(&eti->extent_tree_root, et->ino);
908  		kmem_cache_free(extent_tree_slab, et);
909  		atomic_dec(&eti->total_ext_tree);
910  		atomic_dec(&eti->total_zombie_tree);
911  		tree_cnt++;
912  
913  		if (node_cnt + tree_cnt >= nr_shrink)
914  			goto unlock_out;
915  		cond_resched();
916  	}
917  	mutex_unlock(&eti->extent_tree_lock);
918  
919  free_node:
920  	/* 2. remove LRU extent entries */
921  	if (!mutex_trylock(&eti->extent_tree_lock))
922  		goto out;
923  
924  	remained = nr_shrink - (node_cnt + tree_cnt);
925  
926  	spin_lock(&eti->extent_lock);
927  	for (; remained > 0; remained--) {
928  		if (list_empty(&eti->extent_list))
929  			break;
930  		en = list_first_entry(&eti->extent_list,
931  					struct extent_node, list);
932  		et = en->et;
933  		if (!write_trylock(&et->lock)) {
934  			/* refresh this extent node's position in extent list */
935  			list_move_tail(&en->list, &eti->extent_list);
936  			continue;
937  		}
938  
939  		list_del_init(&en->list);
940  		spin_unlock(&eti->extent_lock);
941  
942  		__detach_extent_node(sbi, et, en);
943  
944  		write_unlock(&et->lock);
945  		node_cnt++;
946  		spin_lock(&eti->extent_lock);
947  	}
948  	spin_unlock(&eti->extent_lock);
949  
950  unlock_out:
951  	mutex_unlock(&eti->extent_tree_lock);
952  out:
953  	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
954  
955  	return node_cnt + tree_cnt;
956  }
957  
958  /* read extent cache operations */
f2fs_lookup_read_extent_cache(struct inode * inode,pgoff_t pgofs,struct extent_info * ei)959  bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
960  				struct extent_info *ei)
961  {
962  	if (!__may_extent_tree(inode, EX_READ))
963  		return false;
964  
965  	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
966  }
967  
f2fs_lookup_read_extent_cache_block(struct inode * inode,pgoff_t index,block_t * blkaddr)968  bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
969  				block_t *blkaddr)
970  {
971  	struct extent_info ei = {};
972  
973  	if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
974  		return false;
975  	*blkaddr = ei.blk + index - ei.fofs;
976  	return true;
977  }
978  
f2fs_update_read_extent_cache(struct dnode_of_data * dn)979  void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
980  {
981  	return __update_extent_cache(dn, EX_READ);
982  }
983  
f2fs_update_read_extent_cache_range(struct dnode_of_data * dn,pgoff_t fofs,block_t blkaddr,unsigned int len)984  void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
985  				pgoff_t fofs, block_t blkaddr, unsigned int len)
986  {
987  	struct extent_info ei = {
988  		.fofs = fofs,
989  		.len = len,
990  		.blk = blkaddr,
991  	};
992  
993  	if (!__may_extent_tree(dn->inode, EX_READ))
994  		return;
995  
996  	__update_extent_tree_range(dn->inode, &ei, EX_READ);
997  }
998  
f2fs_shrink_read_extent_tree(struct f2fs_sb_info * sbi,int nr_shrink)999  unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1000  {
1001  	if (!test_opt(sbi, READ_EXTENT_CACHE))
1002  		return 0;
1003  
1004  	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1005  }
1006  
1007  /* block age extent cache operations */
f2fs_lookup_age_extent_cache(struct inode * inode,pgoff_t pgofs,struct extent_info * ei)1008  bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1009  				struct extent_info *ei)
1010  {
1011  	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1012  		return false;
1013  
1014  	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1015  }
1016  
f2fs_update_age_extent_cache(struct dnode_of_data * dn)1017  void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1018  {
1019  	return __update_extent_cache(dn, EX_BLOCK_AGE);
1020  }
1021  
f2fs_update_age_extent_cache_range(struct dnode_of_data * dn,pgoff_t fofs,unsigned int len)1022  void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1023  				pgoff_t fofs, unsigned int len)
1024  {
1025  	struct extent_info ei = {
1026  		.fofs = fofs,
1027  		.len = len,
1028  	};
1029  
1030  	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1031  		return;
1032  
1033  	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1034  }
1035  
f2fs_shrink_age_extent_tree(struct f2fs_sb_info * sbi,int nr_shrink)1036  unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1037  {
1038  	if (!test_opt(sbi, AGE_EXTENT_CACHE))
1039  		return 0;
1040  
1041  	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1042  }
1043  
__destroy_extent_node(struct inode * inode,enum extent_type type)1044  static unsigned int __destroy_extent_node(struct inode *inode,
1045  					enum extent_type type)
1046  {
1047  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1048  	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1049  	unsigned int node_cnt = 0;
1050  
1051  	if (!et || !atomic_read(&et->node_cnt))
1052  		return 0;
1053  
1054  	write_lock(&et->lock);
1055  	node_cnt = __free_extent_tree(sbi, et);
1056  	write_unlock(&et->lock);
1057  
1058  	return node_cnt;
1059  }
1060  
f2fs_destroy_extent_node(struct inode * inode)1061  void f2fs_destroy_extent_node(struct inode *inode)
1062  {
1063  	__destroy_extent_node(inode, EX_READ);
1064  	__destroy_extent_node(inode, EX_BLOCK_AGE);
1065  }
1066  
__drop_extent_tree(struct inode * inode,enum extent_type type)1067  static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1068  {
1069  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1070  	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1071  	bool updated = false;
1072  
1073  	if (!__may_extent_tree(inode, type))
1074  		return;
1075  
1076  	write_lock(&et->lock);
1077  	__free_extent_tree(sbi, et);
1078  	if (type == EX_READ) {
1079  		set_inode_flag(inode, FI_NO_EXTENT);
1080  		if (et->largest.len) {
1081  			et->largest.len = 0;
1082  			updated = true;
1083  		}
1084  	}
1085  	write_unlock(&et->lock);
1086  	if (updated)
1087  		f2fs_mark_inode_dirty_sync(inode, true);
1088  }
1089  
f2fs_drop_extent_tree(struct inode * inode)1090  void f2fs_drop_extent_tree(struct inode *inode)
1091  {
1092  	__drop_extent_tree(inode, EX_READ);
1093  	__drop_extent_tree(inode, EX_BLOCK_AGE);
1094  }
1095  
__destroy_extent_tree(struct inode * inode,enum extent_type type)1096  static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1097  {
1098  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1099  	struct extent_tree_info *eti = &sbi->extent_tree[type];
1100  	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1101  	unsigned int node_cnt = 0;
1102  
1103  	if (!et)
1104  		return;
1105  
1106  	if (inode->i_nlink && !is_bad_inode(inode) &&
1107  					atomic_read(&et->node_cnt)) {
1108  		mutex_lock(&eti->extent_tree_lock);
1109  		list_add_tail(&et->list, &eti->zombie_list);
1110  		atomic_inc(&eti->total_zombie_tree);
1111  		mutex_unlock(&eti->extent_tree_lock);
1112  		return;
1113  	}
1114  
1115  	/* free all extent info belong to this extent tree */
1116  	node_cnt = __destroy_extent_node(inode, type);
1117  
1118  	/* delete extent tree entry in radix tree */
1119  	mutex_lock(&eti->extent_tree_lock);
1120  	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1121  	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1122  	kmem_cache_free(extent_tree_slab, et);
1123  	atomic_dec(&eti->total_ext_tree);
1124  	mutex_unlock(&eti->extent_tree_lock);
1125  
1126  	F2FS_I(inode)->extent_tree[type] = NULL;
1127  
1128  	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1129  }
1130  
f2fs_destroy_extent_tree(struct inode * inode)1131  void f2fs_destroy_extent_tree(struct inode *inode)
1132  {
1133  	__destroy_extent_tree(inode, EX_READ);
1134  	__destroy_extent_tree(inode, EX_BLOCK_AGE);
1135  }
1136  
__init_extent_tree_info(struct extent_tree_info * eti)1137  static void __init_extent_tree_info(struct extent_tree_info *eti)
1138  {
1139  	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1140  	mutex_init(&eti->extent_tree_lock);
1141  	INIT_LIST_HEAD(&eti->extent_list);
1142  	spin_lock_init(&eti->extent_lock);
1143  	atomic_set(&eti->total_ext_tree, 0);
1144  	INIT_LIST_HEAD(&eti->zombie_list);
1145  	atomic_set(&eti->total_zombie_tree, 0);
1146  	atomic_set(&eti->total_ext_node, 0);
1147  }
1148  
f2fs_init_extent_cache_info(struct f2fs_sb_info * sbi)1149  void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1150  {
1151  	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1152  	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1153  
1154  	/* initialize for block age extents */
1155  	atomic64_set(&sbi->allocated_data_blocks, 0);
1156  	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1157  	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1158  	sbi->last_age_weight = LAST_AGE_WEIGHT;
1159  }
1160  
f2fs_create_extent_cache(void)1161  int __init f2fs_create_extent_cache(void)
1162  {
1163  	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1164  			sizeof(struct extent_tree));
1165  	if (!extent_tree_slab)
1166  		return -ENOMEM;
1167  	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1168  			sizeof(struct extent_node));
1169  	if (!extent_node_slab) {
1170  		kmem_cache_destroy(extent_tree_slab);
1171  		return -ENOMEM;
1172  	}
1173  	return 0;
1174  }
1175  
f2fs_destroy_extent_cache(void)1176  void f2fs_destroy_extent_cache(void)
1177  {
1178  	kmem_cache_destroy(extent_node_slab);
1179  	kmem_cache_destroy(extent_tree_slab);
1180  }
1181