1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * fs/f2fs/segment.c
4   *
5   * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6   *             http://www.samsung.com/
7   */
8  #include <linux/fs.h>
9  #include <linux/f2fs_fs.h>
10  #include <linux/bio.h>
11  #include <linux/blkdev.h>
12  #include <linux/sched/mm.h>
13  #include <linux/prefetch.h>
14  #include <linux/kthread.h>
15  #include <linux/swap.h>
16  #include <linux/timer.h>
17  #include <linux/freezer.h>
18  #include <linux/sched/signal.h>
19  #include <linux/random.h>
20  
21  #include "f2fs.h"
22  #include "segment.h"
23  #include "node.h"
24  #include "gc.h"
25  #include "iostat.h"
26  #include <trace/events/f2fs.h>
27  
28  #define __reverse_ffz(x) __reverse_ffs(~(x))
29  
30  static struct kmem_cache *discard_entry_slab;
31  static struct kmem_cache *discard_cmd_slab;
32  static struct kmem_cache *sit_entry_set_slab;
33  static struct kmem_cache *revoke_entry_slab;
34  
__reverse_ulong(unsigned char * str)35  static unsigned long __reverse_ulong(unsigned char *str)
36  {
37  	unsigned long tmp = 0;
38  	int shift = 24, idx = 0;
39  
40  #if BITS_PER_LONG == 64
41  	shift = 56;
42  #endif
43  	while (shift >= 0) {
44  		tmp |= (unsigned long)str[idx++] << shift;
45  		shift -= BITS_PER_BYTE;
46  	}
47  	return tmp;
48  }
49  
50  /*
51   * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
52   * MSB and LSB are reversed in a byte by f2fs_set_bit.
53   */
__reverse_ffs(unsigned long word)54  static inline unsigned long __reverse_ffs(unsigned long word)
55  {
56  	int num = 0;
57  
58  #if BITS_PER_LONG == 64
59  	if ((word & 0xffffffff00000000UL) == 0)
60  		num += 32;
61  	else
62  		word >>= 32;
63  #endif
64  	if ((word & 0xffff0000) == 0)
65  		num += 16;
66  	else
67  		word >>= 16;
68  
69  	if ((word & 0xff00) == 0)
70  		num += 8;
71  	else
72  		word >>= 8;
73  
74  	if ((word & 0xf0) == 0)
75  		num += 4;
76  	else
77  		word >>= 4;
78  
79  	if ((word & 0xc) == 0)
80  		num += 2;
81  	else
82  		word >>= 2;
83  
84  	if ((word & 0x2) == 0)
85  		num += 1;
86  	return num;
87  }
88  
89  /*
90   * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
91   * f2fs_set_bit makes MSB and LSB reversed in a byte.
92   * @size must be integral times of unsigned long.
93   * Example:
94   *                             MSB <--> LSB
95   *   f2fs_set_bit(0, bitmap) => 1000 0000
96   *   f2fs_set_bit(7, bitmap) => 0000 0001
97   */
__find_rev_next_bit(const unsigned long * addr,unsigned long size,unsigned long offset)98  static unsigned long __find_rev_next_bit(const unsigned long *addr,
99  			unsigned long size, unsigned long offset)
100  {
101  	const unsigned long *p = addr + BIT_WORD(offset);
102  	unsigned long result = size;
103  	unsigned long tmp;
104  
105  	if (offset >= size)
106  		return size;
107  
108  	size -= (offset & ~(BITS_PER_LONG - 1));
109  	offset %= BITS_PER_LONG;
110  
111  	while (1) {
112  		if (*p == 0)
113  			goto pass;
114  
115  		tmp = __reverse_ulong((unsigned char *)p);
116  
117  		tmp &= ~0UL >> offset;
118  		if (size < BITS_PER_LONG)
119  			tmp &= (~0UL << (BITS_PER_LONG - size));
120  		if (tmp)
121  			goto found;
122  pass:
123  		if (size <= BITS_PER_LONG)
124  			break;
125  		size -= BITS_PER_LONG;
126  		offset = 0;
127  		p++;
128  	}
129  	return result;
130  found:
131  	return result - size + __reverse_ffs(tmp);
132  }
133  
__find_rev_next_zero_bit(const unsigned long * addr,unsigned long size,unsigned long offset)134  static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
135  			unsigned long size, unsigned long offset)
136  {
137  	const unsigned long *p = addr + BIT_WORD(offset);
138  	unsigned long result = size;
139  	unsigned long tmp;
140  
141  	if (offset >= size)
142  		return size;
143  
144  	size -= (offset & ~(BITS_PER_LONG - 1));
145  	offset %= BITS_PER_LONG;
146  
147  	while (1) {
148  		if (*p == ~0UL)
149  			goto pass;
150  
151  		tmp = __reverse_ulong((unsigned char *)p);
152  
153  		if (offset)
154  			tmp |= ~0UL << (BITS_PER_LONG - offset);
155  		if (size < BITS_PER_LONG)
156  			tmp |= ~0UL >> size;
157  		if (tmp != ~0UL)
158  			goto found;
159  pass:
160  		if (size <= BITS_PER_LONG)
161  			break;
162  		size -= BITS_PER_LONG;
163  		offset = 0;
164  		p++;
165  	}
166  	return result;
167  found:
168  	return result - size + __reverse_ffz(tmp);
169  }
170  
f2fs_need_SSR(struct f2fs_sb_info * sbi)171  bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
172  {
173  	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
174  	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
175  	int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
176  
177  	if (f2fs_lfs_mode(sbi))
178  		return false;
179  	if (sbi->gc_mode == GC_URGENT_HIGH)
180  		return true;
181  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
182  		return true;
183  
184  	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
185  			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
186  }
187  
f2fs_abort_atomic_write(struct inode * inode,bool clean)188  void f2fs_abort_atomic_write(struct inode *inode, bool clean)
189  {
190  	struct f2fs_inode_info *fi = F2FS_I(inode);
191  
192  	if (!f2fs_is_atomic_file(inode))
193  		return;
194  
195  	if (clean)
196  		truncate_inode_pages_final(inode->i_mapping);
197  
198  	release_atomic_write_cnt(inode);
199  	clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
200  	clear_inode_flag(inode, FI_ATOMIC_REPLACE);
201  	clear_inode_flag(inode, FI_ATOMIC_FILE);
202  	if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
203  		clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
204  		f2fs_mark_inode_dirty_sync(inode, true);
205  	}
206  	stat_dec_atomic_inode(inode);
207  
208  	F2FS_I(inode)->atomic_write_task = NULL;
209  
210  	if (clean) {
211  		f2fs_i_size_write(inode, fi->original_i_size);
212  		fi->original_i_size = 0;
213  	}
214  	/* avoid stale dirty inode during eviction */
215  	sync_inode_metadata(inode, 0);
216  }
217  
__replace_atomic_write_block(struct inode * inode,pgoff_t index,block_t new_addr,block_t * old_addr,bool recover)218  static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
219  			block_t new_addr, block_t *old_addr, bool recover)
220  {
221  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
222  	struct dnode_of_data dn;
223  	struct node_info ni;
224  	int err;
225  
226  retry:
227  	set_new_dnode(&dn, inode, NULL, NULL, 0);
228  	err = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
229  	if (err) {
230  		if (err == -ENOMEM) {
231  			f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
232  			goto retry;
233  		}
234  		return err;
235  	}
236  
237  	err = f2fs_get_node_info(sbi, dn.nid, &ni, false);
238  	if (err) {
239  		f2fs_put_dnode(&dn);
240  		return err;
241  	}
242  
243  	if (recover) {
244  		/* dn.data_blkaddr is always valid */
245  		if (!__is_valid_data_blkaddr(new_addr)) {
246  			if (new_addr == NULL_ADDR)
247  				dec_valid_block_count(sbi, inode, 1);
248  			f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
249  			f2fs_update_data_blkaddr(&dn, new_addr);
250  		} else {
251  			f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
252  				new_addr, ni.version, true, true);
253  		}
254  	} else {
255  		blkcnt_t count = 1;
256  
257  		err = inc_valid_block_count(sbi, inode, &count, true);
258  		if (err) {
259  			f2fs_put_dnode(&dn);
260  			return err;
261  		}
262  
263  		*old_addr = dn.data_blkaddr;
264  		f2fs_truncate_data_blocks_range(&dn, 1);
265  		dec_valid_block_count(sbi, F2FS_I(inode)->cow_inode, count);
266  
267  		f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
268  					ni.version, true, false);
269  	}
270  
271  	f2fs_put_dnode(&dn);
272  
273  	trace_f2fs_replace_atomic_write_block(inode, F2FS_I(inode)->cow_inode,
274  			index, old_addr ? *old_addr : 0, new_addr, recover);
275  	return 0;
276  }
277  
__complete_revoke_list(struct inode * inode,struct list_head * head,bool revoke)278  static void __complete_revoke_list(struct inode *inode, struct list_head *head,
279  					bool revoke)
280  {
281  	struct revoke_entry *cur, *tmp;
282  	pgoff_t start_index = 0;
283  	bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
284  
285  	list_for_each_entry_safe(cur, tmp, head, list) {
286  		if (revoke) {
287  			__replace_atomic_write_block(inode, cur->index,
288  						cur->old_addr, NULL, true);
289  		} else if (truncate) {
290  			f2fs_truncate_hole(inode, start_index, cur->index);
291  			start_index = cur->index + 1;
292  		}
293  
294  		list_del(&cur->list);
295  		kmem_cache_free(revoke_entry_slab, cur);
296  	}
297  
298  	if (!revoke && truncate)
299  		f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
300  }
301  
__f2fs_commit_atomic_write(struct inode * inode)302  static int __f2fs_commit_atomic_write(struct inode *inode)
303  {
304  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
305  	struct f2fs_inode_info *fi = F2FS_I(inode);
306  	struct inode *cow_inode = fi->cow_inode;
307  	struct revoke_entry *new;
308  	struct list_head revoke_list;
309  	block_t blkaddr;
310  	struct dnode_of_data dn;
311  	pgoff_t len = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
312  	pgoff_t off = 0, blen, index;
313  	int ret = 0, i;
314  
315  	INIT_LIST_HEAD(&revoke_list);
316  
317  	while (len) {
318  		blen = min_t(pgoff_t, ADDRS_PER_BLOCK(cow_inode), len);
319  
320  		set_new_dnode(&dn, cow_inode, NULL, NULL, 0);
321  		ret = f2fs_get_dnode_of_data(&dn, off, LOOKUP_NODE_RA);
322  		if (ret && ret != -ENOENT) {
323  			goto out;
324  		} else if (ret == -ENOENT) {
325  			ret = 0;
326  			if (dn.max_level == 0)
327  				goto out;
328  			goto next;
329  		}
330  
331  		blen = min((pgoff_t)ADDRS_PER_PAGE(dn.node_page, cow_inode),
332  				len);
333  		index = off;
334  		for (i = 0; i < blen; i++, dn.ofs_in_node++, index++) {
335  			blkaddr = f2fs_data_blkaddr(&dn);
336  
337  			if (!__is_valid_data_blkaddr(blkaddr)) {
338  				continue;
339  			} else if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
340  					DATA_GENERIC_ENHANCE)) {
341  				f2fs_put_dnode(&dn);
342  				ret = -EFSCORRUPTED;
343  				goto out;
344  			}
345  
346  			new = f2fs_kmem_cache_alloc(revoke_entry_slab, GFP_NOFS,
347  							true, NULL);
348  
349  			ret = __replace_atomic_write_block(inode, index, blkaddr,
350  							&new->old_addr, false);
351  			if (ret) {
352  				f2fs_put_dnode(&dn);
353  				kmem_cache_free(revoke_entry_slab, new);
354  				goto out;
355  			}
356  
357  			f2fs_update_data_blkaddr(&dn, NULL_ADDR);
358  			new->index = index;
359  			list_add_tail(&new->list, &revoke_list);
360  		}
361  		f2fs_put_dnode(&dn);
362  next:
363  		off += blen;
364  		len -= blen;
365  	}
366  
367  out:
368  	if (ret) {
369  		sbi->revoked_atomic_block += fi->atomic_write_cnt;
370  	} else {
371  		sbi->committed_atomic_block += fi->atomic_write_cnt;
372  		set_inode_flag(inode, FI_ATOMIC_COMMITTED);
373  		if (is_inode_flag_set(inode, FI_ATOMIC_DIRTIED)) {
374  			clear_inode_flag(inode, FI_ATOMIC_DIRTIED);
375  			f2fs_mark_inode_dirty_sync(inode, true);
376  		}
377  	}
378  
379  	__complete_revoke_list(inode, &revoke_list, ret ? true : false);
380  
381  	return ret;
382  }
383  
f2fs_commit_atomic_write(struct inode * inode)384  int f2fs_commit_atomic_write(struct inode *inode)
385  {
386  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
387  	struct f2fs_inode_info *fi = F2FS_I(inode);
388  	int err;
389  
390  	err = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
391  	if (err)
392  		return err;
393  
394  	f2fs_down_write(&fi->i_gc_rwsem[WRITE]);
395  	f2fs_lock_op(sbi);
396  
397  	err = __f2fs_commit_atomic_write(inode);
398  
399  	f2fs_unlock_op(sbi);
400  	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
401  
402  	return err;
403  }
404  
405  /*
406   * This function balances dirty node and dentry pages.
407   * In addition, it controls garbage collection.
408   */
f2fs_balance_fs(struct f2fs_sb_info * sbi,bool need)409  void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
410  {
411  	if (f2fs_cp_error(sbi))
412  		return;
413  
414  	if (time_to_inject(sbi, FAULT_CHECKPOINT))
415  		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_FAULT_INJECT);
416  
417  	/* balance_fs_bg is able to be pending */
418  	if (need && excess_cached_nats(sbi))
419  		f2fs_balance_fs_bg(sbi, false);
420  
421  	if (!f2fs_is_checkpoint_ready(sbi))
422  		return;
423  
424  	/*
425  	 * We should do GC or end up with checkpoint, if there are so many dirty
426  	 * dir/node pages without enough free segments.
427  	 */
428  	if (has_enough_free_secs(sbi, 0, 0))
429  		return;
430  
431  	if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
432  				sbi->gc_thread->f2fs_gc_task) {
433  		DEFINE_WAIT(wait);
434  
435  		prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
436  					TASK_UNINTERRUPTIBLE);
437  		wake_up(&sbi->gc_thread->gc_wait_queue_head);
438  		io_schedule();
439  		finish_wait(&sbi->gc_thread->fggc_wq, &wait);
440  	} else {
441  		struct f2fs_gc_control gc_control = {
442  			.victim_segno = NULL_SEGNO,
443  			.init_gc_type = BG_GC,
444  			.no_bg_gc = true,
445  			.should_migrate_blocks = false,
446  			.err_gc_skipped = false,
447  			.nr_free_secs = 1 };
448  		f2fs_down_write(&sbi->gc_lock);
449  		stat_inc_gc_call_count(sbi, FOREGROUND);
450  		f2fs_gc(sbi, &gc_control);
451  	}
452  }
453  
excess_dirty_threshold(struct f2fs_sb_info * sbi)454  static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi)
455  {
456  	int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2;
457  	unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS);
458  	unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA);
459  	unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES);
460  	unsigned int meta = get_pages(sbi, F2FS_DIRTY_META);
461  	unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA);
462  	unsigned int threshold =
463  		SEGS_TO_BLKS(sbi, (factor * DEFAULT_DIRTY_THRESHOLD));
464  	unsigned int global_threshold = threshold * 3 / 2;
465  
466  	if (dents >= threshold || qdata >= threshold ||
467  		nodes >= threshold || meta >= threshold ||
468  		imeta >= threshold)
469  		return true;
470  	return dents + qdata + nodes + meta + imeta >  global_threshold;
471  }
472  
f2fs_balance_fs_bg(struct f2fs_sb_info * sbi,bool from_bg)473  void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
474  {
475  	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
476  		return;
477  
478  	/* try to shrink extent cache when there is no enough memory */
479  	if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
480  		f2fs_shrink_read_extent_tree(sbi,
481  				READ_EXTENT_CACHE_SHRINK_NUMBER);
482  
483  	/* try to shrink age extent cache when there is no enough memory */
484  	if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
485  		f2fs_shrink_age_extent_tree(sbi,
486  				AGE_EXTENT_CACHE_SHRINK_NUMBER);
487  
488  	/* check the # of cached NAT entries */
489  	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
490  		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
491  
492  	if (!f2fs_available_free_memory(sbi, FREE_NIDS))
493  		f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
494  	else
495  		f2fs_build_free_nids(sbi, false, false);
496  
497  	if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) ||
498  		excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi))
499  		goto do_sync;
500  
501  	/* there is background inflight IO or foreground operation recently */
502  	if (is_inflight_io(sbi, REQ_TIME) ||
503  		(!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem)))
504  		return;
505  
506  	/* exceed periodical checkpoint timeout threshold */
507  	if (f2fs_time_over(sbi, CP_TIME))
508  		goto do_sync;
509  
510  	/* checkpoint is the only way to shrink partial cached entries */
511  	if (f2fs_available_free_memory(sbi, NAT_ENTRIES) &&
512  		f2fs_available_free_memory(sbi, INO_ENTRIES))
513  		return;
514  
515  do_sync:
516  	if (test_opt(sbi, DATA_FLUSH) && from_bg) {
517  		struct blk_plug plug;
518  
519  		mutex_lock(&sbi->flush_lock);
520  
521  		blk_start_plug(&plug);
522  		f2fs_sync_dirty_inodes(sbi, FILE_INODE, false);
523  		blk_finish_plug(&plug);
524  
525  		mutex_unlock(&sbi->flush_lock);
526  	}
527  	stat_inc_cp_call_count(sbi, BACKGROUND);
528  	f2fs_sync_fs(sbi->sb, 1);
529  }
530  
__submit_flush_wait(struct f2fs_sb_info * sbi,struct block_device * bdev)531  static int __submit_flush_wait(struct f2fs_sb_info *sbi,
532  				struct block_device *bdev)
533  {
534  	int ret = blkdev_issue_flush(bdev);
535  
536  	trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
537  				test_opt(sbi, FLUSH_MERGE), ret);
538  	if (!ret)
539  		f2fs_update_iostat(sbi, NULL, FS_FLUSH_IO, 0);
540  	return ret;
541  }
542  
submit_flush_wait(struct f2fs_sb_info * sbi,nid_t ino)543  static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
544  {
545  	int ret = 0;
546  	int i;
547  
548  	if (!f2fs_is_multi_device(sbi))
549  		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
550  
551  	for (i = 0; i < sbi->s_ndevs; i++) {
552  		if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
553  			continue;
554  		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
555  		if (ret)
556  			break;
557  	}
558  	return ret;
559  }
560  
issue_flush_thread(void * data)561  static int issue_flush_thread(void *data)
562  {
563  	struct f2fs_sb_info *sbi = data;
564  	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
565  	wait_queue_head_t *q = &fcc->flush_wait_queue;
566  repeat:
567  	if (kthread_should_stop())
568  		return 0;
569  
570  	if (!llist_empty(&fcc->issue_list)) {
571  		struct flush_cmd *cmd, *next;
572  		int ret;
573  
574  		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
575  		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
576  
577  		cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
578  
579  		ret = submit_flush_wait(sbi, cmd->ino);
580  		atomic_inc(&fcc->issued_flush);
581  
582  		llist_for_each_entry_safe(cmd, next,
583  					  fcc->dispatch_list, llnode) {
584  			cmd->ret = ret;
585  			complete(&cmd->wait);
586  		}
587  		fcc->dispatch_list = NULL;
588  	}
589  
590  	wait_event_interruptible(*q,
591  		kthread_should_stop() || !llist_empty(&fcc->issue_list));
592  	goto repeat;
593  }
594  
f2fs_issue_flush(struct f2fs_sb_info * sbi,nid_t ino)595  int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
596  {
597  	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
598  	struct flush_cmd cmd;
599  	int ret;
600  
601  	if (test_opt(sbi, NOBARRIER))
602  		return 0;
603  
604  	if (!test_opt(sbi, FLUSH_MERGE)) {
605  		atomic_inc(&fcc->queued_flush);
606  		ret = submit_flush_wait(sbi, ino);
607  		atomic_dec(&fcc->queued_flush);
608  		atomic_inc(&fcc->issued_flush);
609  		return ret;
610  	}
611  
612  	if (atomic_inc_return(&fcc->queued_flush) == 1 ||
613  	    f2fs_is_multi_device(sbi)) {
614  		ret = submit_flush_wait(sbi, ino);
615  		atomic_dec(&fcc->queued_flush);
616  
617  		atomic_inc(&fcc->issued_flush);
618  		return ret;
619  	}
620  
621  	cmd.ino = ino;
622  	init_completion(&cmd.wait);
623  
624  	llist_add(&cmd.llnode, &fcc->issue_list);
625  
626  	/*
627  	 * update issue_list before we wake up issue_flush thread, this
628  	 * smp_mb() pairs with another barrier in ___wait_event(), see
629  	 * more details in comments of waitqueue_active().
630  	 */
631  	smp_mb();
632  
633  	if (waitqueue_active(&fcc->flush_wait_queue))
634  		wake_up(&fcc->flush_wait_queue);
635  
636  	if (fcc->f2fs_issue_flush) {
637  		wait_for_completion(&cmd.wait);
638  		atomic_dec(&fcc->queued_flush);
639  	} else {
640  		struct llist_node *list;
641  
642  		list = llist_del_all(&fcc->issue_list);
643  		if (!list) {
644  			wait_for_completion(&cmd.wait);
645  			atomic_dec(&fcc->queued_flush);
646  		} else {
647  			struct flush_cmd *tmp, *next;
648  
649  			ret = submit_flush_wait(sbi, ino);
650  
651  			llist_for_each_entry_safe(tmp, next, list, llnode) {
652  				if (tmp == &cmd) {
653  					cmd.ret = ret;
654  					atomic_dec(&fcc->queued_flush);
655  					continue;
656  				}
657  				tmp->ret = ret;
658  				complete(&tmp->wait);
659  			}
660  		}
661  	}
662  
663  	return cmd.ret;
664  }
665  
f2fs_create_flush_cmd_control(struct f2fs_sb_info * sbi)666  int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
667  {
668  	dev_t dev = sbi->sb->s_bdev->bd_dev;
669  	struct flush_cmd_control *fcc;
670  
671  	if (SM_I(sbi)->fcc_info) {
672  		fcc = SM_I(sbi)->fcc_info;
673  		if (fcc->f2fs_issue_flush)
674  			return 0;
675  		goto init_thread;
676  	}
677  
678  	fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
679  	if (!fcc)
680  		return -ENOMEM;
681  	atomic_set(&fcc->issued_flush, 0);
682  	atomic_set(&fcc->queued_flush, 0);
683  	init_waitqueue_head(&fcc->flush_wait_queue);
684  	init_llist_head(&fcc->issue_list);
685  	SM_I(sbi)->fcc_info = fcc;
686  	if (!test_opt(sbi, FLUSH_MERGE))
687  		return 0;
688  
689  init_thread:
690  	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
691  				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
692  	if (IS_ERR(fcc->f2fs_issue_flush)) {
693  		int err = PTR_ERR(fcc->f2fs_issue_flush);
694  
695  		fcc->f2fs_issue_flush = NULL;
696  		return err;
697  	}
698  
699  	return 0;
700  }
701  
f2fs_destroy_flush_cmd_control(struct f2fs_sb_info * sbi,bool free)702  void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
703  {
704  	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
705  
706  	if (fcc && fcc->f2fs_issue_flush) {
707  		struct task_struct *flush_thread = fcc->f2fs_issue_flush;
708  
709  		fcc->f2fs_issue_flush = NULL;
710  		kthread_stop(flush_thread);
711  	}
712  	if (free) {
713  		kfree(fcc);
714  		SM_I(sbi)->fcc_info = NULL;
715  	}
716  }
717  
f2fs_flush_device_cache(struct f2fs_sb_info * sbi)718  int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
719  {
720  	int ret = 0, i;
721  
722  	if (!f2fs_is_multi_device(sbi))
723  		return 0;
724  
725  	if (test_opt(sbi, NOBARRIER))
726  		return 0;
727  
728  	for (i = 1; i < sbi->s_ndevs; i++) {
729  		int count = DEFAULT_RETRY_IO_COUNT;
730  
731  		if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
732  			continue;
733  
734  		do {
735  			ret = __submit_flush_wait(sbi, FDEV(i).bdev);
736  			if (ret)
737  				f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
738  		} while (ret && --count);
739  
740  		if (ret) {
741  			f2fs_stop_checkpoint(sbi, false,
742  					STOP_CP_REASON_FLUSH_FAIL);
743  			break;
744  		}
745  
746  		spin_lock(&sbi->dev_lock);
747  		f2fs_clear_bit(i, (char *)&sbi->dirty_device);
748  		spin_unlock(&sbi->dev_lock);
749  	}
750  
751  	return ret;
752  }
753  
__locate_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno,enum dirty_type dirty_type)754  static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
755  		enum dirty_type dirty_type)
756  {
757  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
758  
759  	/* need not be added */
760  	if (IS_CURSEG(sbi, segno))
761  		return;
762  
763  	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
764  		dirty_i->nr_dirty[dirty_type]++;
765  
766  	if (dirty_type == DIRTY) {
767  		struct seg_entry *sentry = get_seg_entry(sbi, segno);
768  		enum dirty_type t = sentry->type;
769  
770  		if (unlikely(t >= DIRTY)) {
771  			f2fs_bug_on(sbi, 1);
772  			return;
773  		}
774  		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
775  			dirty_i->nr_dirty[t]++;
776  
777  		if (__is_large_section(sbi)) {
778  			unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
779  			block_t valid_blocks =
780  				get_valid_blocks(sbi, segno, true);
781  
782  			f2fs_bug_on(sbi,
783  				(!is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
784  				!valid_blocks) ||
785  				valid_blocks == CAP_BLKS_PER_SEC(sbi));
786  
787  			if (!IS_CURSEC(sbi, secno))
788  				set_bit(secno, dirty_i->dirty_secmap);
789  		}
790  	}
791  }
792  
__remove_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno,enum dirty_type dirty_type)793  static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
794  		enum dirty_type dirty_type)
795  {
796  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
797  	block_t valid_blocks;
798  
799  	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
800  		dirty_i->nr_dirty[dirty_type]--;
801  
802  	if (dirty_type == DIRTY) {
803  		struct seg_entry *sentry = get_seg_entry(sbi, segno);
804  		enum dirty_type t = sentry->type;
805  
806  		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
807  			dirty_i->nr_dirty[t]--;
808  
809  		valid_blocks = get_valid_blocks(sbi, segno, true);
810  		if (valid_blocks == 0) {
811  			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
812  						dirty_i->victim_secmap);
813  #ifdef CONFIG_F2FS_CHECK_FS
814  			clear_bit(segno, SIT_I(sbi)->invalid_segmap);
815  #endif
816  		}
817  		if (__is_large_section(sbi)) {
818  			unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
819  
820  			if (!valid_blocks ||
821  					valid_blocks == CAP_BLKS_PER_SEC(sbi)) {
822  				clear_bit(secno, dirty_i->dirty_secmap);
823  				return;
824  			}
825  
826  			if (!IS_CURSEC(sbi, secno))
827  				set_bit(secno, dirty_i->dirty_secmap);
828  		}
829  	}
830  }
831  
832  /*
833   * Should not occur error such as -ENOMEM.
834   * Adding dirty entry into seglist is not critical operation.
835   * If a given segment is one of current working segments, it won't be added.
836   */
locate_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno)837  static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
838  {
839  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
840  	unsigned short valid_blocks, ckpt_valid_blocks;
841  	unsigned int usable_blocks;
842  
843  	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
844  		return;
845  
846  	usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
847  	mutex_lock(&dirty_i->seglist_lock);
848  
849  	valid_blocks = get_valid_blocks(sbi, segno, false);
850  	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
851  
852  	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
853  		ckpt_valid_blocks == usable_blocks)) {
854  		__locate_dirty_segment(sbi, segno, PRE);
855  		__remove_dirty_segment(sbi, segno, DIRTY);
856  	} else if (valid_blocks < usable_blocks) {
857  		__locate_dirty_segment(sbi, segno, DIRTY);
858  	} else {
859  		/* Recovery routine with SSR needs this */
860  		__remove_dirty_segment(sbi, segno, DIRTY);
861  	}
862  
863  	mutex_unlock(&dirty_i->seglist_lock);
864  }
865  
866  /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
f2fs_dirty_to_prefree(struct f2fs_sb_info * sbi)867  void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
868  {
869  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
870  	unsigned int segno;
871  
872  	mutex_lock(&dirty_i->seglist_lock);
873  	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
874  		if (get_valid_blocks(sbi, segno, false))
875  			continue;
876  		if (IS_CURSEG(sbi, segno))
877  			continue;
878  		__locate_dirty_segment(sbi, segno, PRE);
879  		__remove_dirty_segment(sbi, segno, DIRTY);
880  	}
881  	mutex_unlock(&dirty_i->seglist_lock);
882  }
883  
f2fs_get_unusable_blocks(struct f2fs_sb_info * sbi)884  block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
885  {
886  	int ovp_hole_segs =
887  		(overprovision_segments(sbi) - reserved_segments(sbi));
888  	block_t ovp_holes = SEGS_TO_BLKS(sbi, ovp_hole_segs);
889  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
890  	block_t holes[2] = {0, 0};	/* DATA and NODE */
891  	block_t unusable;
892  	struct seg_entry *se;
893  	unsigned int segno;
894  
895  	mutex_lock(&dirty_i->seglist_lock);
896  	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
897  		se = get_seg_entry(sbi, segno);
898  		if (IS_NODESEG(se->type))
899  			holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
900  							se->valid_blocks;
901  		else
902  			holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
903  							se->valid_blocks;
904  	}
905  	mutex_unlock(&dirty_i->seglist_lock);
906  
907  	unusable = max(holes[DATA], holes[NODE]);
908  	if (unusable > ovp_holes)
909  		return unusable - ovp_holes;
910  	return 0;
911  }
912  
f2fs_disable_cp_again(struct f2fs_sb_info * sbi,block_t unusable)913  int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
914  {
915  	int ovp_hole_segs =
916  		(overprovision_segments(sbi) - reserved_segments(sbi));
917  
918  	if (F2FS_OPTION(sbi).unusable_cap_perc == 100)
919  		return 0;
920  	if (unusable > F2FS_OPTION(sbi).unusable_cap)
921  		return -EAGAIN;
922  	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
923  		dirty_segments(sbi) > ovp_hole_segs)
924  		return -EAGAIN;
925  	if (has_not_enough_free_secs(sbi, 0, 0))
926  		return -EAGAIN;
927  	return 0;
928  }
929  
930  /* This is only used by SBI_CP_DISABLED */
get_free_segment(struct f2fs_sb_info * sbi)931  static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
932  {
933  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
934  	unsigned int segno = 0;
935  
936  	mutex_lock(&dirty_i->seglist_lock);
937  	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
938  		if (get_valid_blocks(sbi, segno, false))
939  			continue;
940  		if (get_ckpt_valid_blocks(sbi, segno, false))
941  			continue;
942  		mutex_unlock(&dirty_i->seglist_lock);
943  		return segno;
944  	}
945  	mutex_unlock(&dirty_i->seglist_lock);
946  	return NULL_SEGNO;
947  }
948  
__create_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)949  static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
950  		struct block_device *bdev, block_t lstart,
951  		block_t start, block_t len)
952  {
953  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
954  	struct list_head *pend_list;
955  	struct discard_cmd *dc;
956  
957  	f2fs_bug_on(sbi, !len);
958  
959  	pend_list = &dcc->pend_list[plist_idx(len)];
960  
961  	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS, true, NULL);
962  	INIT_LIST_HEAD(&dc->list);
963  	dc->bdev = bdev;
964  	dc->di.lstart = lstart;
965  	dc->di.start = start;
966  	dc->di.len = len;
967  	dc->ref = 0;
968  	dc->state = D_PREP;
969  	dc->queued = 0;
970  	dc->error = 0;
971  	init_completion(&dc->wait);
972  	list_add_tail(&dc->list, pend_list);
973  	spin_lock_init(&dc->lock);
974  	dc->bio_ref = 0;
975  	atomic_inc(&dcc->discard_cmd_cnt);
976  	dcc->undiscard_blks += len;
977  
978  	return dc;
979  }
980  
f2fs_check_discard_tree(struct f2fs_sb_info * sbi)981  static bool f2fs_check_discard_tree(struct f2fs_sb_info *sbi)
982  {
983  #ifdef CONFIG_F2FS_CHECK_FS
984  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
985  	struct rb_node *cur = rb_first_cached(&dcc->root), *next;
986  	struct discard_cmd *cur_dc, *next_dc;
987  
988  	while (cur) {
989  		next = rb_next(cur);
990  		if (!next)
991  			return true;
992  
993  		cur_dc = rb_entry(cur, struct discard_cmd, rb_node);
994  		next_dc = rb_entry(next, struct discard_cmd, rb_node);
995  
996  		if (cur_dc->di.lstart + cur_dc->di.len > next_dc->di.lstart) {
997  			f2fs_info(sbi, "broken discard_rbtree, "
998  				"cur(%u, %u) next(%u, %u)",
999  				cur_dc->di.lstart, cur_dc->di.len,
1000  				next_dc->di.lstart, next_dc->di.len);
1001  			return false;
1002  		}
1003  		cur = next;
1004  	}
1005  #endif
1006  	return true;
1007  }
1008  
__lookup_discard_cmd(struct f2fs_sb_info * sbi,block_t blkaddr)1009  static struct discard_cmd *__lookup_discard_cmd(struct f2fs_sb_info *sbi,
1010  						block_t blkaddr)
1011  {
1012  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1013  	struct rb_node *node = dcc->root.rb_root.rb_node;
1014  	struct discard_cmd *dc;
1015  
1016  	while (node) {
1017  		dc = rb_entry(node, struct discard_cmd, rb_node);
1018  
1019  		if (blkaddr < dc->di.lstart)
1020  			node = node->rb_left;
1021  		else if (blkaddr >= dc->di.lstart + dc->di.len)
1022  			node = node->rb_right;
1023  		else
1024  			return dc;
1025  	}
1026  	return NULL;
1027  }
1028  
__lookup_discard_cmd_ret(struct rb_root_cached * root,block_t blkaddr,struct discard_cmd ** prev_entry,struct discard_cmd ** next_entry,struct rb_node *** insert_p,struct rb_node ** insert_parent)1029  static struct discard_cmd *__lookup_discard_cmd_ret(struct rb_root_cached *root,
1030  				block_t blkaddr,
1031  				struct discard_cmd **prev_entry,
1032  				struct discard_cmd **next_entry,
1033  				struct rb_node ***insert_p,
1034  				struct rb_node **insert_parent)
1035  {
1036  	struct rb_node **pnode = &root->rb_root.rb_node;
1037  	struct rb_node *parent = NULL, *tmp_node;
1038  	struct discard_cmd *dc;
1039  
1040  	*insert_p = NULL;
1041  	*insert_parent = NULL;
1042  	*prev_entry = NULL;
1043  	*next_entry = NULL;
1044  
1045  	if (RB_EMPTY_ROOT(&root->rb_root))
1046  		return NULL;
1047  
1048  	while (*pnode) {
1049  		parent = *pnode;
1050  		dc = rb_entry(*pnode, struct discard_cmd, rb_node);
1051  
1052  		if (blkaddr < dc->di.lstart)
1053  			pnode = &(*pnode)->rb_left;
1054  		else if (blkaddr >= dc->di.lstart + dc->di.len)
1055  			pnode = &(*pnode)->rb_right;
1056  		else
1057  			goto lookup_neighbors;
1058  	}
1059  
1060  	*insert_p = pnode;
1061  	*insert_parent = parent;
1062  
1063  	dc = rb_entry(parent, struct discard_cmd, rb_node);
1064  	tmp_node = parent;
1065  	if (parent && blkaddr > dc->di.lstart)
1066  		tmp_node = rb_next(parent);
1067  	*next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1068  
1069  	tmp_node = parent;
1070  	if (parent && blkaddr < dc->di.lstart)
1071  		tmp_node = rb_prev(parent);
1072  	*prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1073  	return NULL;
1074  
1075  lookup_neighbors:
1076  	/* lookup prev node for merging backward later */
1077  	tmp_node = rb_prev(&dc->rb_node);
1078  	*prev_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1079  
1080  	/* lookup next node for merging frontward later */
1081  	tmp_node = rb_next(&dc->rb_node);
1082  	*next_entry = rb_entry_safe(tmp_node, struct discard_cmd, rb_node);
1083  	return dc;
1084  }
1085  
__detach_discard_cmd(struct discard_cmd_control * dcc,struct discard_cmd * dc)1086  static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1087  							struct discard_cmd *dc)
1088  {
1089  	if (dc->state == D_DONE)
1090  		atomic_sub(dc->queued, &dcc->queued_discard);
1091  
1092  	list_del(&dc->list);
1093  	rb_erase_cached(&dc->rb_node, &dcc->root);
1094  	dcc->undiscard_blks -= dc->di.len;
1095  
1096  	kmem_cache_free(discard_cmd_slab, dc);
1097  
1098  	atomic_dec(&dcc->discard_cmd_cnt);
1099  }
1100  
__remove_discard_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc)1101  static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1102  							struct discard_cmd *dc)
1103  {
1104  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1105  	unsigned long flags;
1106  
1107  	trace_f2fs_remove_discard(dc->bdev, dc->di.start, dc->di.len);
1108  
1109  	spin_lock_irqsave(&dc->lock, flags);
1110  	if (dc->bio_ref) {
1111  		spin_unlock_irqrestore(&dc->lock, flags);
1112  		return;
1113  	}
1114  	spin_unlock_irqrestore(&dc->lock, flags);
1115  
1116  	f2fs_bug_on(sbi, dc->ref);
1117  
1118  	if (dc->error == -EOPNOTSUPP)
1119  		dc->error = 0;
1120  
1121  	if (dc->error)
1122  		f2fs_info_ratelimited(sbi,
1123  			"Issue discard(%u, %u, %u) failed, ret: %d",
1124  			dc->di.lstart, dc->di.start, dc->di.len, dc->error);
1125  	__detach_discard_cmd(dcc, dc);
1126  }
1127  
f2fs_submit_discard_endio(struct bio * bio)1128  static void f2fs_submit_discard_endio(struct bio *bio)
1129  {
1130  	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1131  	unsigned long flags;
1132  
1133  	spin_lock_irqsave(&dc->lock, flags);
1134  	if (!dc->error)
1135  		dc->error = blk_status_to_errno(bio->bi_status);
1136  	dc->bio_ref--;
1137  	if (!dc->bio_ref && dc->state == D_SUBMIT) {
1138  		dc->state = D_DONE;
1139  		complete_all(&dc->wait);
1140  	}
1141  	spin_unlock_irqrestore(&dc->lock, flags);
1142  	bio_put(bio);
1143  }
1144  
__check_sit_bitmap(struct f2fs_sb_info * sbi,block_t start,block_t end)1145  static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1146  				block_t start, block_t end)
1147  {
1148  #ifdef CONFIG_F2FS_CHECK_FS
1149  	struct seg_entry *sentry;
1150  	unsigned int segno;
1151  	block_t blk = start;
1152  	unsigned long offset, size, *map;
1153  
1154  	while (blk < end) {
1155  		segno = GET_SEGNO(sbi, blk);
1156  		sentry = get_seg_entry(sbi, segno);
1157  		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1158  
1159  		if (end < START_BLOCK(sbi, segno + 1))
1160  			size = GET_BLKOFF_FROM_SEG0(sbi, end);
1161  		else
1162  			size = BLKS_PER_SEG(sbi);
1163  		map = (unsigned long *)(sentry->cur_valid_map);
1164  		offset = __find_rev_next_bit(map, size, offset);
1165  		f2fs_bug_on(sbi, offset != size);
1166  		blk = START_BLOCK(sbi, segno + 1);
1167  	}
1168  #endif
1169  }
1170  
__init_discard_policy(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,int discard_type,unsigned int granularity)1171  static void __init_discard_policy(struct f2fs_sb_info *sbi,
1172  				struct discard_policy *dpolicy,
1173  				int discard_type, unsigned int granularity)
1174  {
1175  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1176  
1177  	/* common policy */
1178  	dpolicy->type = discard_type;
1179  	dpolicy->sync = true;
1180  	dpolicy->ordered = false;
1181  	dpolicy->granularity = granularity;
1182  
1183  	dpolicy->max_requests = dcc->max_discard_request;
1184  	dpolicy->io_aware_gran = dcc->discard_io_aware_gran;
1185  	dpolicy->timeout = false;
1186  
1187  	if (discard_type == DPOLICY_BG) {
1188  		dpolicy->min_interval = dcc->min_discard_issue_time;
1189  		dpolicy->mid_interval = dcc->mid_discard_issue_time;
1190  		dpolicy->max_interval = dcc->max_discard_issue_time;
1191  		if (dcc->discard_io_aware == DPOLICY_IO_AWARE_ENABLE)
1192  			dpolicy->io_aware = true;
1193  		else if (dcc->discard_io_aware == DPOLICY_IO_AWARE_DISABLE)
1194  			dpolicy->io_aware = false;
1195  		dpolicy->sync = false;
1196  		dpolicy->ordered = true;
1197  		if (utilization(sbi) > dcc->discard_urgent_util) {
1198  			dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1199  			if (atomic_read(&dcc->discard_cmd_cnt))
1200  				dpolicy->max_interval =
1201  					dcc->min_discard_issue_time;
1202  		}
1203  	} else if (discard_type == DPOLICY_FORCE) {
1204  		dpolicy->min_interval = dcc->min_discard_issue_time;
1205  		dpolicy->mid_interval = dcc->mid_discard_issue_time;
1206  		dpolicy->max_interval = dcc->max_discard_issue_time;
1207  		dpolicy->io_aware = false;
1208  	} else if (discard_type == DPOLICY_FSTRIM) {
1209  		dpolicy->io_aware = false;
1210  	} else if (discard_type == DPOLICY_UMOUNT) {
1211  		dpolicy->io_aware = false;
1212  		/* we need to issue all to keep CP_TRIMMED_FLAG */
1213  		dpolicy->granularity = MIN_DISCARD_GRANULARITY;
1214  		dpolicy->timeout = true;
1215  	}
1216  }
1217  
1218  static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1219  				struct block_device *bdev, block_t lstart,
1220  				block_t start, block_t len);
1221  
1222  #ifdef CONFIG_BLK_DEV_ZONED
__submit_zone_reset_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc,blk_opf_t flag,struct list_head * wait_list,unsigned int * issued)1223  static void __submit_zone_reset_cmd(struct f2fs_sb_info *sbi,
1224  				   struct discard_cmd *dc, blk_opf_t flag,
1225  				   struct list_head *wait_list,
1226  				   unsigned int *issued)
1227  {
1228  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1229  	struct block_device *bdev = dc->bdev;
1230  	struct bio *bio = bio_alloc(bdev, 0, REQ_OP_ZONE_RESET | flag, GFP_NOFS);
1231  	unsigned long flags;
1232  
1233  	trace_f2fs_issue_reset_zone(bdev, dc->di.start);
1234  
1235  	spin_lock_irqsave(&dc->lock, flags);
1236  	dc->state = D_SUBMIT;
1237  	dc->bio_ref++;
1238  	spin_unlock_irqrestore(&dc->lock, flags);
1239  
1240  	if (issued)
1241  		(*issued)++;
1242  
1243  	atomic_inc(&dcc->queued_discard);
1244  	dc->queued++;
1245  	list_move_tail(&dc->list, wait_list);
1246  
1247  	/* sanity check on discard range */
1248  	__check_sit_bitmap(sbi, dc->di.lstart, dc->di.lstart + dc->di.len);
1249  
1250  	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(dc->di.start);
1251  	bio->bi_private = dc;
1252  	bio->bi_end_io = f2fs_submit_discard_endio;
1253  	submit_bio(bio);
1254  
1255  	atomic_inc(&dcc->issued_discard);
1256  	f2fs_update_iostat(sbi, NULL, FS_ZONE_RESET_IO, dc->di.len * F2FS_BLKSIZE);
1257  }
1258  #endif
1259  
1260  /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
__submit_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,struct discard_cmd * dc,int * issued)1261  static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1262  				struct discard_policy *dpolicy,
1263  				struct discard_cmd *dc, int *issued)
1264  {
1265  	struct block_device *bdev = dc->bdev;
1266  	unsigned int max_discard_blocks =
1267  			SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1268  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1269  	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1270  					&(dcc->fstrim_list) : &(dcc->wait_list);
1271  	blk_opf_t flag = dpolicy->sync ? REQ_SYNC : 0;
1272  	block_t lstart, start, len, total_len;
1273  	int err = 0;
1274  
1275  	if (dc->state != D_PREP)
1276  		return 0;
1277  
1278  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1279  		return 0;
1280  
1281  #ifdef CONFIG_BLK_DEV_ZONED
1282  	if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev)) {
1283  		int devi = f2fs_bdev_index(sbi, bdev);
1284  
1285  		if (devi < 0)
1286  			return -EINVAL;
1287  
1288  		if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1289  			__submit_zone_reset_cmd(sbi, dc, flag,
1290  						wait_list, issued);
1291  			return 0;
1292  		}
1293  
1294  		/*
1295  		 * Issue discard for conventional zones only if the device
1296  		 * supports discard.
1297  		 */
1298  		if (!bdev_max_discard_sectors(bdev))
1299  			return -EOPNOTSUPP;
1300  	}
1301  #endif
1302  
1303  	trace_f2fs_issue_discard(bdev, dc->di.start, dc->di.len);
1304  
1305  	lstart = dc->di.lstart;
1306  	start = dc->di.start;
1307  	len = dc->di.len;
1308  	total_len = len;
1309  
1310  	dc->di.len = 0;
1311  
1312  	while (total_len && *issued < dpolicy->max_requests && !err) {
1313  		struct bio *bio = NULL;
1314  		unsigned long flags;
1315  		bool last = true;
1316  
1317  		if (len > max_discard_blocks) {
1318  			len = max_discard_blocks;
1319  			last = false;
1320  		}
1321  
1322  		(*issued)++;
1323  		if (*issued == dpolicy->max_requests)
1324  			last = true;
1325  
1326  		dc->di.len += len;
1327  
1328  		if (time_to_inject(sbi, FAULT_DISCARD)) {
1329  			err = -EIO;
1330  		} else {
1331  			err = __blkdev_issue_discard(bdev,
1332  					SECTOR_FROM_BLOCK(start),
1333  					SECTOR_FROM_BLOCK(len),
1334  					GFP_NOFS, &bio);
1335  		}
1336  		if (err) {
1337  			spin_lock_irqsave(&dc->lock, flags);
1338  			if (dc->state == D_PARTIAL)
1339  				dc->state = D_SUBMIT;
1340  			spin_unlock_irqrestore(&dc->lock, flags);
1341  
1342  			break;
1343  		}
1344  
1345  		f2fs_bug_on(sbi, !bio);
1346  
1347  		/*
1348  		 * should keep before submission to avoid D_DONE
1349  		 * right away
1350  		 */
1351  		spin_lock_irqsave(&dc->lock, flags);
1352  		if (last)
1353  			dc->state = D_SUBMIT;
1354  		else
1355  			dc->state = D_PARTIAL;
1356  		dc->bio_ref++;
1357  		spin_unlock_irqrestore(&dc->lock, flags);
1358  
1359  		atomic_inc(&dcc->queued_discard);
1360  		dc->queued++;
1361  		list_move_tail(&dc->list, wait_list);
1362  
1363  		/* sanity check on discard range */
1364  		__check_sit_bitmap(sbi, lstart, lstart + len);
1365  
1366  		bio->bi_private = dc;
1367  		bio->bi_end_io = f2fs_submit_discard_endio;
1368  		bio->bi_opf |= flag;
1369  		submit_bio(bio);
1370  
1371  		atomic_inc(&dcc->issued_discard);
1372  
1373  		f2fs_update_iostat(sbi, NULL, FS_DISCARD_IO, len * F2FS_BLKSIZE);
1374  
1375  		lstart += len;
1376  		start += len;
1377  		total_len -= len;
1378  		len = total_len;
1379  	}
1380  
1381  	if (!err && len) {
1382  		dcc->undiscard_blks -= len;
1383  		__update_discard_tree_range(sbi, bdev, lstart, start, len);
1384  	}
1385  	return err;
1386  }
1387  
__insert_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)1388  static void __insert_discard_cmd(struct f2fs_sb_info *sbi,
1389  				struct block_device *bdev, block_t lstart,
1390  				block_t start, block_t len)
1391  {
1392  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1393  	struct rb_node **p = &dcc->root.rb_root.rb_node;
1394  	struct rb_node *parent = NULL;
1395  	struct discard_cmd *dc;
1396  	bool leftmost = true;
1397  
1398  	/* look up rb tree to find parent node */
1399  	while (*p) {
1400  		parent = *p;
1401  		dc = rb_entry(parent, struct discard_cmd, rb_node);
1402  
1403  		if (lstart < dc->di.lstart) {
1404  			p = &(*p)->rb_left;
1405  		} else if (lstart >= dc->di.lstart + dc->di.len) {
1406  			p = &(*p)->rb_right;
1407  			leftmost = false;
1408  		} else {
1409  			/* Let's skip to add, if exists */
1410  			return;
1411  		}
1412  	}
1413  
1414  	dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1415  
1416  	rb_link_node(&dc->rb_node, parent, p);
1417  	rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1418  }
1419  
__relocate_discard_cmd(struct discard_cmd_control * dcc,struct discard_cmd * dc)1420  static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1421  						struct discard_cmd *dc)
1422  {
1423  	list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->di.len)]);
1424  }
1425  
__punch_discard_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc,block_t blkaddr)1426  static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1427  				struct discard_cmd *dc, block_t blkaddr)
1428  {
1429  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1430  	struct discard_info di = dc->di;
1431  	bool modified = false;
1432  
1433  	if (dc->state == D_DONE || dc->di.len == 1) {
1434  		__remove_discard_cmd(sbi, dc);
1435  		return;
1436  	}
1437  
1438  	dcc->undiscard_blks -= di.len;
1439  
1440  	if (blkaddr > di.lstart) {
1441  		dc->di.len = blkaddr - dc->di.lstart;
1442  		dcc->undiscard_blks += dc->di.len;
1443  		__relocate_discard_cmd(dcc, dc);
1444  		modified = true;
1445  	}
1446  
1447  	if (blkaddr < di.lstart + di.len - 1) {
1448  		if (modified) {
1449  			__insert_discard_cmd(sbi, dc->bdev, blkaddr + 1,
1450  					di.start + blkaddr + 1 - di.lstart,
1451  					di.lstart + di.len - 1 - blkaddr);
1452  		} else {
1453  			dc->di.lstart++;
1454  			dc->di.len--;
1455  			dc->di.start++;
1456  			dcc->undiscard_blks += dc->di.len;
1457  			__relocate_discard_cmd(dcc, dc);
1458  		}
1459  	}
1460  }
1461  
__update_discard_tree_range(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)1462  static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1463  				struct block_device *bdev, block_t lstart,
1464  				block_t start, block_t len)
1465  {
1466  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1467  	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1468  	struct discard_cmd *dc;
1469  	struct discard_info di = {0};
1470  	struct rb_node **insert_p = NULL, *insert_parent = NULL;
1471  	unsigned int max_discard_blocks =
1472  			SECTOR_TO_BLOCK(bdev_max_discard_sectors(bdev));
1473  	block_t end = lstart + len;
1474  
1475  	dc = __lookup_discard_cmd_ret(&dcc->root, lstart,
1476  				&prev_dc, &next_dc, &insert_p, &insert_parent);
1477  	if (dc)
1478  		prev_dc = dc;
1479  
1480  	if (!prev_dc) {
1481  		di.lstart = lstart;
1482  		di.len = next_dc ? next_dc->di.lstart - lstart : len;
1483  		di.len = min(di.len, len);
1484  		di.start = start;
1485  	}
1486  
1487  	while (1) {
1488  		struct rb_node *node;
1489  		bool merged = false;
1490  		struct discard_cmd *tdc = NULL;
1491  
1492  		if (prev_dc) {
1493  			di.lstart = prev_dc->di.lstart + prev_dc->di.len;
1494  			if (di.lstart < lstart)
1495  				di.lstart = lstart;
1496  			if (di.lstart >= end)
1497  				break;
1498  
1499  			if (!next_dc || next_dc->di.lstart > end)
1500  				di.len = end - di.lstart;
1501  			else
1502  				di.len = next_dc->di.lstart - di.lstart;
1503  			di.start = start + di.lstart - lstart;
1504  		}
1505  
1506  		if (!di.len)
1507  			goto next;
1508  
1509  		if (prev_dc && prev_dc->state == D_PREP &&
1510  			prev_dc->bdev == bdev &&
1511  			__is_discard_back_mergeable(&di, &prev_dc->di,
1512  							max_discard_blocks)) {
1513  			prev_dc->di.len += di.len;
1514  			dcc->undiscard_blks += di.len;
1515  			__relocate_discard_cmd(dcc, prev_dc);
1516  			di = prev_dc->di;
1517  			tdc = prev_dc;
1518  			merged = true;
1519  		}
1520  
1521  		if (next_dc && next_dc->state == D_PREP &&
1522  			next_dc->bdev == bdev &&
1523  			__is_discard_front_mergeable(&di, &next_dc->di,
1524  							max_discard_blocks)) {
1525  			next_dc->di.lstart = di.lstart;
1526  			next_dc->di.len += di.len;
1527  			next_dc->di.start = di.start;
1528  			dcc->undiscard_blks += di.len;
1529  			__relocate_discard_cmd(dcc, next_dc);
1530  			if (tdc)
1531  				__remove_discard_cmd(sbi, tdc);
1532  			merged = true;
1533  		}
1534  
1535  		if (!merged)
1536  			__insert_discard_cmd(sbi, bdev,
1537  						di.lstart, di.start, di.len);
1538   next:
1539  		prev_dc = next_dc;
1540  		if (!prev_dc)
1541  			break;
1542  
1543  		node = rb_next(&prev_dc->rb_node);
1544  		next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1545  	}
1546  }
1547  
1548  #ifdef CONFIG_BLK_DEV_ZONED
__queue_zone_reset_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t lblkstart,block_t blklen)1549  static void __queue_zone_reset_cmd(struct f2fs_sb_info *sbi,
1550  		struct block_device *bdev, block_t blkstart, block_t lblkstart,
1551  		block_t blklen)
1552  {
1553  	trace_f2fs_queue_reset_zone(bdev, blkstart);
1554  
1555  	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1556  	__insert_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen);
1557  	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1558  }
1559  #endif
1560  
__queue_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1561  static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
1562  		struct block_device *bdev, block_t blkstart, block_t blklen)
1563  {
1564  	block_t lblkstart = blkstart;
1565  
1566  	if (!f2fs_bdev_support_discard(bdev))
1567  		return;
1568  
1569  	trace_f2fs_queue_discard(bdev, blkstart, blklen);
1570  
1571  	if (f2fs_is_multi_device(sbi)) {
1572  		int devi = f2fs_target_device_index(sbi, blkstart);
1573  
1574  		blkstart -= FDEV(devi).start_blk;
1575  	}
1576  	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1577  	__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1578  	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1579  }
1580  
__issue_discard_cmd_orderly(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,int * issued)1581  static void __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1582  		struct discard_policy *dpolicy, int *issued)
1583  {
1584  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1585  	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1586  	struct rb_node **insert_p = NULL, *insert_parent = NULL;
1587  	struct discard_cmd *dc;
1588  	struct blk_plug plug;
1589  	bool io_interrupted = false;
1590  
1591  	mutex_lock(&dcc->cmd_lock);
1592  	dc = __lookup_discard_cmd_ret(&dcc->root, dcc->next_pos,
1593  				&prev_dc, &next_dc, &insert_p, &insert_parent);
1594  	if (!dc)
1595  		dc = next_dc;
1596  
1597  	blk_start_plug(&plug);
1598  
1599  	while (dc) {
1600  		struct rb_node *node;
1601  		int err = 0;
1602  
1603  		if (dc->state != D_PREP)
1604  			goto next;
1605  
1606  		if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1607  			io_interrupted = true;
1608  			break;
1609  		}
1610  
1611  		dcc->next_pos = dc->di.lstart + dc->di.len;
1612  		err = __submit_discard_cmd(sbi, dpolicy, dc, issued);
1613  
1614  		if (*issued >= dpolicy->max_requests)
1615  			break;
1616  next:
1617  		node = rb_next(&dc->rb_node);
1618  		if (err)
1619  			__remove_discard_cmd(sbi, dc);
1620  		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1621  	}
1622  
1623  	blk_finish_plug(&plug);
1624  
1625  	if (!dc)
1626  		dcc->next_pos = 0;
1627  
1628  	mutex_unlock(&dcc->cmd_lock);
1629  
1630  	if (!(*issued) && io_interrupted)
1631  		*issued = -1;
1632  }
1633  static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1634  					struct discard_policy *dpolicy);
1635  
__issue_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1636  static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1637  					struct discard_policy *dpolicy)
1638  {
1639  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1640  	struct list_head *pend_list;
1641  	struct discard_cmd *dc, *tmp;
1642  	struct blk_plug plug;
1643  	int i, issued;
1644  	bool io_interrupted = false;
1645  
1646  	if (dpolicy->timeout)
1647  		f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1648  
1649  retry:
1650  	issued = 0;
1651  	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1652  		if (dpolicy->timeout &&
1653  				f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1654  			break;
1655  
1656  		if (i + 1 < dpolicy->granularity)
1657  			break;
1658  
1659  		if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered) {
1660  			__issue_discard_cmd_orderly(sbi, dpolicy, &issued);
1661  			return issued;
1662  		}
1663  
1664  		pend_list = &dcc->pend_list[i];
1665  
1666  		mutex_lock(&dcc->cmd_lock);
1667  		if (list_empty(pend_list))
1668  			goto next;
1669  		if (unlikely(dcc->rbtree_check))
1670  			f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
1671  		blk_start_plug(&plug);
1672  		list_for_each_entry_safe(dc, tmp, pend_list, list) {
1673  			f2fs_bug_on(sbi, dc->state != D_PREP);
1674  
1675  			if (dpolicy->timeout &&
1676  				f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1677  				break;
1678  
1679  			if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1680  						!is_idle(sbi, DISCARD_TIME)) {
1681  				io_interrupted = true;
1682  				break;
1683  			}
1684  
1685  			__submit_discard_cmd(sbi, dpolicy, dc, &issued);
1686  
1687  			if (issued >= dpolicy->max_requests)
1688  				break;
1689  		}
1690  		blk_finish_plug(&plug);
1691  next:
1692  		mutex_unlock(&dcc->cmd_lock);
1693  
1694  		if (issued >= dpolicy->max_requests || io_interrupted)
1695  			break;
1696  	}
1697  
1698  	if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1699  		__wait_all_discard_cmd(sbi, dpolicy);
1700  		goto retry;
1701  	}
1702  
1703  	if (!issued && io_interrupted)
1704  		issued = -1;
1705  
1706  	return issued;
1707  }
1708  
__drop_discard_cmd(struct f2fs_sb_info * sbi)1709  static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1710  {
1711  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1712  	struct list_head *pend_list;
1713  	struct discard_cmd *dc, *tmp;
1714  	int i;
1715  	bool dropped = false;
1716  
1717  	mutex_lock(&dcc->cmd_lock);
1718  	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1719  		pend_list = &dcc->pend_list[i];
1720  		list_for_each_entry_safe(dc, tmp, pend_list, list) {
1721  			f2fs_bug_on(sbi, dc->state != D_PREP);
1722  			__remove_discard_cmd(sbi, dc);
1723  			dropped = true;
1724  		}
1725  	}
1726  	mutex_unlock(&dcc->cmd_lock);
1727  
1728  	return dropped;
1729  }
1730  
f2fs_drop_discard_cmd(struct f2fs_sb_info * sbi)1731  void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1732  {
1733  	__drop_discard_cmd(sbi);
1734  }
1735  
__wait_one_discard_bio(struct f2fs_sb_info * sbi,struct discard_cmd * dc)1736  static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1737  							struct discard_cmd *dc)
1738  {
1739  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1740  	unsigned int len = 0;
1741  
1742  	wait_for_completion_io(&dc->wait);
1743  	mutex_lock(&dcc->cmd_lock);
1744  	f2fs_bug_on(sbi, dc->state != D_DONE);
1745  	dc->ref--;
1746  	if (!dc->ref) {
1747  		if (!dc->error)
1748  			len = dc->di.len;
1749  		__remove_discard_cmd(sbi, dc);
1750  	}
1751  	mutex_unlock(&dcc->cmd_lock);
1752  
1753  	return len;
1754  }
1755  
__wait_discard_cmd_range(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,block_t start,block_t end)1756  static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1757  						struct discard_policy *dpolicy,
1758  						block_t start, block_t end)
1759  {
1760  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1761  	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1762  					&(dcc->fstrim_list) : &(dcc->wait_list);
1763  	struct discard_cmd *dc = NULL, *iter, *tmp;
1764  	unsigned int trimmed = 0;
1765  
1766  next:
1767  	dc = NULL;
1768  
1769  	mutex_lock(&dcc->cmd_lock);
1770  	list_for_each_entry_safe(iter, tmp, wait_list, list) {
1771  		if (iter->di.lstart + iter->di.len <= start ||
1772  					end <= iter->di.lstart)
1773  			continue;
1774  		if (iter->di.len < dpolicy->granularity)
1775  			continue;
1776  		if (iter->state == D_DONE && !iter->ref) {
1777  			wait_for_completion_io(&iter->wait);
1778  			if (!iter->error)
1779  				trimmed += iter->di.len;
1780  			__remove_discard_cmd(sbi, iter);
1781  		} else {
1782  			iter->ref++;
1783  			dc = iter;
1784  			break;
1785  		}
1786  	}
1787  	mutex_unlock(&dcc->cmd_lock);
1788  
1789  	if (dc) {
1790  		trimmed += __wait_one_discard_bio(sbi, dc);
1791  		goto next;
1792  	}
1793  
1794  	return trimmed;
1795  }
1796  
__wait_all_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1797  static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1798  						struct discard_policy *dpolicy)
1799  {
1800  	struct discard_policy dp;
1801  	unsigned int discard_blks;
1802  
1803  	if (dpolicy)
1804  		return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1805  
1806  	/* wait all */
1807  	__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, MIN_DISCARD_GRANULARITY);
1808  	discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1809  	__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, MIN_DISCARD_GRANULARITY);
1810  	discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1811  
1812  	return discard_blks;
1813  }
1814  
1815  /* This should be covered by global mutex, &sit_i->sentry_lock */
f2fs_wait_discard_bio(struct f2fs_sb_info * sbi,block_t blkaddr)1816  static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1817  {
1818  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1819  	struct discard_cmd *dc;
1820  	bool need_wait = false;
1821  
1822  	mutex_lock(&dcc->cmd_lock);
1823  	dc = __lookup_discard_cmd(sbi, blkaddr);
1824  #ifdef CONFIG_BLK_DEV_ZONED
1825  	if (dc && f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(dc->bdev)) {
1826  		int devi = f2fs_bdev_index(sbi, dc->bdev);
1827  
1828  		if (devi < 0) {
1829  			mutex_unlock(&dcc->cmd_lock);
1830  			return;
1831  		}
1832  
1833  		if (f2fs_blkz_is_seq(sbi, devi, dc->di.start)) {
1834  			/* force submit zone reset */
1835  			if (dc->state == D_PREP)
1836  				__submit_zone_reset_cmd(sbi, dc, REQ_SYNC,
1837  							&dcc->wait_list, NULL);
1838  			dc->ref++;
1839  			mutex_unlock(&dcc->cmd_lock);
1840  			/* wait zone reset */
1841  			__wait_one_discard_bio(sbi, dc);
1842  			return;
1843  		}
1844  	}
1845  #endif
1846  	if (dc) {
1847  		if (dc->state == D_PREP) {
1848  			__punch_discard_cmd(sbi, dc, blkaddr);
1849  		} else {
1850  			dc->ref++;
1851  			need_wait = true;
1852  		}
1853  	}
1854  	mutex_unlock(&dcc->cmd_lock);
1855  
1856  	if (need_wait)
1857  		__wait_one_discard_bio(sbi, dc);
1858  }
1859  
f2fs_stop_discard_thread(struct f2fs_sb_info * sbi)1860  void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1861  {
1862  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1863  
1864  	if (dcc && dcc->f2fs_issue_discard) {
1865  		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1866  
1867  		dcc->f2fs_issue_discard = NULL;
1868  		kthread_stop(discard_thread);
1869  	}
1870  }
1871  
1872  /**
1873   * f2fs_issue_discard_timeout() - Issue all discard cmd within UMOUNT_DISCARD_TIMEOUT
1874   * @sbi: the f2fs_sb_info data for discard cmd to issue
1875   *
1876   * When UMOUNT_DISCARD_TIMEOUT is exceeded, all remaining discard commands will be dropped
1877   *
1878   * Return true if issued all discard cmd or no discard cmd need issue, otherwise return false.
1879   */
f2fs_issue_discard_timeout(struct f2fs_sb_info * sbi)1880  bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1881  {
1882  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1883  	struct discard_policy dpolicy;
1884  	bool dropped;
1885  
1886  	if (!atomic_read(&dcc->discard_cmd_cnt))
1887  		return true;
1888  
1889  	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1890  					dcc->discard_granularity);
1891  	__issue_discard_cmd(sbi, &dpolicy);
1892  	dropped = __drop_discard_cmd(sbi);
1893  
1894  	/* just to make sure there is no pending discard commands */
1895  	__wait_all_discard_cmd(sbi, NULL);
1896  
1897  	f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1898  	return !dropped;
1899  }
1900  
issue_discard_thread(void * data)1901  static int issue_discard_thread(void *data)
1902  {
1903  	struct f2fs_sb_info *sbi = data;
1904  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1905  	wait_queue_head_t *q = &dcc->discard_wait_queue;
1906  	struct discard_policy dpolicy;
1907  	unsigned int wait_ms = dcc->min_discard_issue_time;
1908  	int issued;
1909  
1910  	set_freezable();
1911  
1912  	do {
1913  		wait_event_freezable_timeout(*q,
1914  				kthread_should_stop() || dcc->discard_wake,
1915  				msecs_to_jiffies(wait_ms));
1916  
1917  		if (sbi->gc_mode == GC_URGENT_HIGH ||
1918  			!f2fs_available_free_memory(sbi, DISCARD_CACHE))
1919  			__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE,
1920  						MIN_DISCARD_GRANULARITY);
1921  		else
1922  			__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1923  						dcc->discard_granularity);
1924  
1925  		if (dcc->discard_wake)
1926  			dcc->discard_wake = false;
1927  
1928  		/* clean up pending candidates before going to sleep */
1929  		if (atomic_read(&dcc->queued_discard))
1930  			__wait_all_discard_cmd(sbi, NULL);
1931  
1932  		if (f2fs_readonly(sbi->sb))
1933  			continue;
1934  		if (kthread_should_stop())
1935  			return 0;
1936  		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
1937  			!atomic_read(&dcc->discard_cmd_cnt)) {
1938  			wait_ms = dpolicy.max_interval;
1939  			continue;
1940  		}
1941  
1942  		sb_start_intwrite(sbi->sb);
1943  
1944  		issued = __issue_discard_cmd(sbi, &dpolicy);
1945  		if (issued > 0) {
1946  			__wait_all_discard_cmd(sbi, &dpolicy);
1947  			wait_ms = dpolicy.min_interval;
1948  		} else if (issued == -1) {
1949  			wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1950  			if (!wait_ms)
1951  				wait_ms = dpolicy.mid_interval;
1952  		} else {
1953  			wait_ms = dpolicy.max_interval;
1954  		}
1955  		if (!atomic_read(&dcc->discard_cmd_cnt))
1956  			wait_ms = dpolicy.max_interval;
1957  
1958  		sb_end_intwrite(sbi->sb);
1959  
1960  	} while (!kthread_should_stop());
1961  	return 0;
1962  }
1963  
1964  #ifdef CONFIG_BLK_DEV_ZONED
__f2fs_issue_discard_zone(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1965  static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1966  		struct block_device *bdev, block_t blkstart, block_t blklen)
1967  {
1968  	sector_t sector, nr_sects;
1969  	block_t lblkstart = blkstart;
1970  	int devi = 0;
1971  	u64 remainder = 0;
1972  
1973  	if (f2fs_is_multi_device(sbi)) {
1974  		devi = f2fs_target_device_index(sbi, blkstart);
1975  		if (blkstart < FDEV(devi).start_blk ||
1976  		    blkstart > FDEV(devi).end_blk) {
1977  			f2fs_err(sbi, "Invalid block %x", blkstart);
1978  			return -EIO;
1979  		}
1980  		blkstart -= FDEV(devi).start_blk;
1981  	}
1982  
1983  	/* For sequential zones, reset the zone write pointer */
1984  	if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1985  		sector = SECTOR_FROM_BLOCK(blkstart);
1986  		nr_sects = SECTOR_FROM_BLOCK(blklen);
1987  		div64_u64_rem(sector, bdev_zone_sectors(bdev), &remainder);
1988  
1989  		if (remainder || nr_sects != bdev_zone_sectors(bdev)) {
1990  			f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1991  				 devi, sbi->s_ndevs ? FDEV(devi).path : "",
1992  				 blkstart, blklen);
1993  			return -EIO;
1994  		}
1995  
1996  		if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) {
1997  			unsigned int nofs_flags;
1998  			int ret;
1999  
2000  			trace_f2fs_issue_reset_zone(bdev, blkstart);
2001  			nofs_flags = memalloc_nofs_save();
2002  			ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
2003  						sector, nr_sects);
2004  			memalloc_nofs_restore(nofs_flags);
2005  			return ret;
2006  		}
2007  
2008  		__queue_zone_reset_cmd(sbi, bdev, blkstart, lblkstart, blklen);
2009  		return 0;
2010  	}
2011  
2012  	/* For conventional zones, use regular discard if supported */
2013  	__queue_discard_cmd(sbi, bdev, lblkstart, blklen);
2014  	return 0;
2015  }
2016  #endif
2017  
__issue_discard_async(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)2018  static int __issue_discard_async(struct f2fs_sb_info *sbi,
2019  		struct block_device *bdev, block_t blkstart, block_t blklen)
2020  {
2021  #ifdef CONFIG_BLK_DEV_ZONED
2022  	if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
2023  		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
2024  #endif
2025  	__queue_discard_cmd(sbi, bdev, blkstart, blklen);
2026  	return 0;
2027  }
2028  
f2fs_issue_discard(struct f2fs_sb_info * sbi,block_t blkstart,block_t blklen)2029  static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
2030  				block_t blkstart, block_t blklen)
2031  {
2032  	sector_t start = blkstart, len = 0;
2033  	struct block_device *bdev;
2034  	struct seg_entry *se;
2035  	unsigned int offset;
2036  	block_t i;
2037  	int err = 0;
2038  
2039  	bdev = f2fs_target_device(sbi, blkstart, NULL);
2040  
2041  	for (i = blkstart; i < blkstart + blklen; i++, len++) {
2042  		if (i != start) {
2043  			struct block_device *bdev2 =
2044  				f2fs_target_device(sbi, i, NULL);
2045  
2046  			if (bdev2 != bdev) {
2047  				err = __issue_discard_async(sbi, bdev,
2048  						start, len);
2049  				if (err)
2050  					return err;
2051  				bdev = bdev2;
2052  				start = i;
2053  				len = 0;
2054  			}
2055  		}
2056  
2057  		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
2058  		offset = GET_BLKOFF_FROM_SEG0(sbi, i);
2059  
2060  		if (f2fs_block_unit_discard(sbi) &&
2061  				!f2fs_test_and_set_bit(offset, se->discard_map))
2062  			sbi->discard_blks--;
2063  	}
2064  
2065  	if (len)
2066  		err = __issue_discard_async(sbi, bdev, start, len);
2067  	return err;
2068  }
2069  
add_discard_addrs(struct f2fs_sb_info * sbi,struct cp_control * cpc,bool check_only)2070  static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
2071  							bool check_only)
2072  {
2073  	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2074  	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
2075  	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2076  	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2077  	unsigned long *discard_map = (unsigned long *)se->discard_map;
2078  	unsigned long *dmap = SIT_I(sbi)->tmp_map;
2079  	unsigned int start = 0, end = -1;
2080  	bool force = (cpc->reason & CP_DISCARD);
2081  	struct discard_entry *de = NULL;
2082  	struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
2083  	int i;
2084  
2085  	if (se->valid_blocks == BLKS_PER_SEG(sbi) ||
2086  	    !f2fs_hw_support_discard(sbi) ||
2087  	    !f2fs_block_unit_discard(sbi))
2088  		return false;
2089  
2090  	if (!force) {
2091  		if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
2092  			SM_I(sbi)->dcc_info->nr_discards >=
2093  				SM_I(sbi)->dcc_info->max_discards)
2094  			return false;
2095  	}
2096  
2097  	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
2098  	for (i = 0; i < entries; i++)
2099  		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
2100  				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
2101  
2102  	while (force || SM_I(sbi)->dcc_info->nr_discards <=
2103  				SM_I(sbi)->dcc_info->max_discards) {
2104  		start = __find_rev_next_bit(dmap, BLKS_PER_SEG(sbi), end + 1);
2105  		if (start >= BLKS_PER_SEG(sbi))
2106  			break;
2107  
2108  		end = __find_rev_next_zero_bit(dmap,
2109  						BLKS_PER_SEG(sbi), start + 1);
2110  		if (force && start && end != BLKS_PER_SEG(sbi) &&
2111  		    (end - start) < cpc->trim_minlen)
2112  			continue;
2113  
2114  		if (check_only)
2115  			return true;
2116  
2117  		if (!de) {
2118  			de = f2fs_kmem_cache_alloc(discard_entry_slab,
2119  						GFP_F2FS_ZERO, true, NULL);
2120  			de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
2121  			list_add_tail(&de->list, head);
2122  		}
2123  
2124  		for (i = start; i < end; i++)
2125  			__set_bit_le(i, (void *)de->discard_map);
2126  
2127  		SM_I(sbi)->dcc_info->nr_discards += end - start;
2128  	}
2129  	return false;
2130  }
2131  
release_discard_addr(struct discard_entry * entry)2132  static void release_discard_addr(struct discard_entry *entry)
2133  {
2134  	list_del(&entry->list);
2135  	kmem_cache_free(discard_entry_slab, entry);
2136  }
2137  
f2fs_release_discard_addrs(struct f2fs_sb_info * sbi)2138  void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
2139  {
2140  	struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
2141  	struct discard_entry *entry, *this;
2142  
2143  	/* drop caches */
2144  	list_for_each_entry_safe(entry, this, head, list)
2145  		release_discard_addr(entry);
2146  }
2147  
2148  /*
2149   * Should call f2fs_clear_prefree_segments after checkpoint is done.
2150   */
set_prefree_as_free_segments(struct f2fs_sb_info * sbi)2151  static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
2152  {
2153  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2154  	unsigned int segno;
2155  
2156  	mutex_lock(&dirty_i->seglist_lock);
2157  	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2158  		__set_test_and_free(sbi, segno, false);
2159  	mutex_unlock(&dirty_i->seglist_lock);
2160  }
2161  
f2fs_clear_prefree_segments(struct f2fs_sb_info * sbi,struct cp_control * cpc)2162  void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2163  						struct cp_control *cpc)
2164  {
2165  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2166  	struct list_head *head = &dcc->entry_list;
2167  	struct discard_entry *entry, *this;
2168  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2169  	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2170  	unsigned int start = 0, end = -1;
2171  	unsigned int secno, start_segno;
2172  	bool force = (cpc->reason & CP_DISCARD);
2173  	bool section_alignment = F2FS_OPTION(sbi).discard_unit ==
2174  						DISCARD_UNIT_SECTION;
2175  
2176  	if (f2fs_lfs_mode(sbi) && __is_large_section(sbi))
2177  		section_alignment = true;
2178  
2179  	mutex_lock(&dirty_i->seglist_lock);
2180  
2181  	while (1) {
2182  		int i;
2183  
2184  		if (section_alignment && end != -1)
2185  			end--;
2186  		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2187  		if (start >= MAIN_SEGS(sbi))
2188  			break;
2189  		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2190  								start + 1);
2191  
2192  		if (section_alignment) {
2193  			start = rounddown(start, SEGS_PER_SEC(sbi));
2194  			end = roundup(end, SEGS_PER_SEC(sbi));
2195  		}
2196  
2197  		for (i = start; i < end; i++) {
2198  			if (test_and_clear_bit(i, prefree_map))
2199  				dirty_i->nr_dirty[PRE]--;
2200  		}
2201  
2202  		if (!f2fs_realtime_discard_enable(sbi))
2203  			continue;
2204  
2205  		if (force && start >= cpc->trim_start &&
2206  					(end - 1) <= cpc->trim_end)
2207  			continue;
2208  
2209  		/* Should cover 2MB zoned device for zone-based reset */
2210  		if (!f2fs_sb_has_blkzoned(sbi) &&
2211  		    (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi))) {
2212  			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2213  				SEGS_TO_BLKS(sbi, end - start));
2214  			continue;
2215  		}
2216  next:
2217  		secno = GET_SEC_FROM_SEG(sbi, start);
2218  		start_segno = GET_SEG_FROM_SEC(sbi, secno);
2219  		if (!IS_CURSEC(sbi, secno) &&
2220  			!get_valid_blocks(sbi, start, true))
2221  			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2222  						BLKS_PER_SEC(sbi));
2223  
2224  		start = start_segno + SEGS_PER_SEC(sbi);
2225  		if (start < end)
2226  			goto next;
2227  		else
2228  			end = start - 1;
2229  	}
2230  	mutex_unlock(&dirty_i->seglist_lock);
2231  
2232  	if (!f2fs_block_unit_discard(sbi))
2233  		goto wakeup;
2234  
2235  	/* send small discards */
2236  	list_for_each_entry_safe(entry, this, head, list) {
2237  		unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2238  		bool is_valid = test_bit_le(0, entry->discard_map);
2239  
2240  find_next:
2241  		if (is_valid) {
2242  			next_pos = find_next_zero_bit_le(entry->discard_map,
2243  						BLKS_PER_SEG(sbi), cur_pos);
2244  			len = next_pos - cur_pos;
2245  
2246  			if (f2fs_sb_has_blkzoned(sbi) ||
2247  			    (force && len < cpc->trim_minlen))
2248  				goto skip;
2249  
2250  			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2251  									len);
2252  			total_len += len;
2253  		} else {
2254  			next_pos = find_next_bit_le(entry->discard_map,
2255  						BLKS_PER_SEG(sbi), cur_pos);
2256  		}
2257  skip:
2258  		cur_pos = next_pos;
2259  		is_valid = !is_valid;
2260  
2261  		if (cur_pos < BLKS_PER_SEG(sbi))
2262  			goto find_next;
2263  
2264  		release_discard_addr(entry);
2265  		dcc->nr_discards -= total_len;
2266  	}
2267  
2268  wakeup:
2269  	wake_up_discard_thread(sbi, false);
2270  }
2271  
f2fs_start_discard_thread(struct f2fs_sb_info * sbi)2272  int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
2273  {
2274  	dev_t dev = sbi->sb->s_bdev->bd_dev;
2275  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2276  	int err = 0;
2277  
2278  	if (f2fs_sb_has_readonly(sbi)) {
2279  		f2fs_info(sbi,
2280  			"Skip to start discard thread for readonly image");
2281  		return 0;
2282  	}
2283  
2284  	if (!f2fs_realtime_discard_enable(sbi))
2285  		return 0;
2286  
2287  	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2288  				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2289  	if (IS_ERR(dcc->f2fs_issue_discard)) {
2290  		err = PTR_ERR(dcc->f2fs_issue_discard);
2291  		dcc->f2fs_issue_discard = NULL;
2292  	}
2293  
2294  	return err;
2295  }
2296  
create_discard_cmd_control(struct f2fs_sb_info * sbi)2297  static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2298  {
2299  	struct discard_cmd_control *dcc;
2300  	int err = 0, i;
2301  
2302  	if (SM_I(sbi)->dcc_info) {
2303  		dcc = SM_I(sbi)->dcc_info;
2304  		goto init_thread;
2305  	}
2306  
2307  	dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2308  	if (!dcc)
2309  		return -ENOMEM;
2310  
2311  	dcc->discard_io_aware_gran = MAX_PLIST_NUM;
2312  	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2313  	dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
2314  	dcc->discard_io_aware = DPOLICY_IO_AWARE_ENABLE;
2315  	if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
2316  		dcc->discard_granularity = BLKS_PER_SEG(sbi);
2317  	else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
2318  		dcc->discard_granularity = BLKS_PER_SEC(sbi);
2319  
2320  	INIT_LIST_HEAD(&dcc->entry_list);
2321  	for (i = 0; i < MAX_PLIST_NUM; i++)
2322  		INIT_LIST_HEAD(&dcc->pend_list[i]);
2323  	INIT_LIST_HEAD(&dcc->wait_list);
2324  	INIT_LIST_HEAD(&dcc->fstrim_list);
2325  	mutex_init(&dcc->cmd_lock);
2326  	atomic_set(&dcc->issued_discard, 0);
2327  	atomic_set(&dcc->queued_discard, 0);
2328  	atomic_set(&dcc->discard_cmd_cnt, 0);
2329  	dcc->nr_discards = 0;
2330  	dcc->max_discards = SEGS_TO_BLKS(sbi, MAIN_SEGS(sbi));
2331  	dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST;
2332  	dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
2333  	dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
2334  	dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
2335  	dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
2336  	dcc->undiscard_blks = 0;
2337  	dcc->next_pos = 0;
2338  	dcc->root = RB_ROOT_CACHED;
2339  	dcc->rbtree_check = false;
2340  
2341  	init_waitqueue_head(&dcc->discard_wait_queue);
2342  	SM_I(sbi)->dcc_info = dcc;
2343  init_thread:
2344  	err = f2fs_start_discard_thread(sbi);
2345  	if (err) {
2346  		kfree(dcc);
2347  		SM_I(sbi)->dcc_info = NULL;
2348  	}
2349  
2350  	return err;
2351  }
2352  
destroy_discard_cmd_control(struct f2fs_sb_info * sbi)2353  static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2354  {
2355  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2356  
2357  	if (!dcc)
2358  		return;
2359  
2360  	f2fs_stop_discard_thread(sbi);
2361  
2362  	/*
2363  	 * Recovery can cache discard commands, so in error path of
2364  	 * fill_super(), it needs to give a chance to handle them.
2365  	 */
2366  	f2fs_issue_discard_timeout(sbi);
2367  
2368  	kfree(dcc);
2369  	SM_I(sbi)->dcc_info = NULL;
2370  }
2371  
__mark_sit_entry_dirty(struct f2fs_sb_info * sbi,unsigned int segno)2372  static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2373  {
2374  	struct sit_info *sit_i = SIT_I(sbi);
2375  
2376  	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2377  		sit_i->dirty_sentries++;
2378  		return false;
2379  	}
2380  
2381  	return true;
2382  }
2383  
__set_sit_entry_type(struct f2fs_sb_info * sbi,int type,unsigned int segno,int modified)2384  static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2385  					unsigned int segno, int modified)
2386  {
2387  	struct seg_entry *se = get_seg_entry(sbi, segno);
2388  
2389  	se->type = type;
2390  	if (modified)
2391  		__mark_sit_entry_dirty(sbi, segno);
2392  }
2393  
get_segment_mtime(struct f2fs_sb_info * sbi,block_t blkaddr)2394  static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2395  								block_t blkaddr)
2396  {
2397  	unsigned int segno = GET_SEGNO(sbi, blkaddr);
2398  
2399  	if (segno == NULL_SEGNO)
2400  		return 0;
2401  	return get_seg_entry(sbi, segno)->mtime;
2402  }
2403  
update_segment_mtime(struct f2fs_sb_info * sbi,block_t blkaddr,unsigned long long old_mtime)2404  static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2405  						unsigned long long old_mtime)
2406  {
2407  	struct seg_entry *se;
2408  	unsigned int segno = GET_SEGNO(sbi, blkaddr);
2409  	unsigned long long ctime = get_mtime(sbi, false);
2410  	unsigned long long mtime = old_mtime ? old_mtime : ctime;
2411  
2412  	if (segno == NULL_SEGNO)
2413  		return;
2414  
2415  	se = get_seg_entry(sbi, segno);
2416  
2417  	if (!se->mtime)
2418  		se->mtime = mtime;
2419  	else
2420  		se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2421  						se->valid_blocks + 1);
2422  
2423  	if (ctime > SIT_I(sbi)->max_mtime)
2424  		SIT_I(sbi)->max_mtime = ctime;
2425  }
2426  
update_sit_entry(struct f2fs_sb_info * sbi,block_t blkaddr,int del)2427  static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2428  {
2429  	struct seg_entry *se;
2430  	unsigned int segno, offset;
2431  	long int new_vblocks;
2432  	bool exist;
2433  #ifdef CONFIG_F2FS_CHECK_FS
2434  	bool mir_exist;
2435  #endif
2436  
2437  	segno = GET_SEGNO(sbi, blkaddr);
2438  	if (segno == NULL_SEGNO)
2439  		return;
2440  
2441  	se = get_seg_entry(sbi, segno);
2442  	new_vblocks = se->valid_blocks + del;
2443  	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2444  
2445  	f2fs_bug_on(sbi, (new_vblocks < 0 ||
2446  			(new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2447  
2448  	se->valid_blocks = new_vblocks;
2449  
2450  	/* Update valid block bitmap */
2451  	if (del > 0) {
2452  		exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2453  #ifdef CONFIG_F2FS_CHECK_FS
2454  		mir_exist = f2fs_test_and_set_bit(offset,
2455  						se->cur_valid_map_mir);
2456  		if (unlikely(exist != mir_exist)) {
2457  			f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2458  				 blkaddr, exist);
2459  			f2fs_bug_on(sbi, 1);
2460  		}
2461  #endif
2462  		if (unlikely(exist)) {
2463  			f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2464  				 blkaddr);
2465  			f2fs_bug_on(sbi, 1);
2466  			se->valid_blocks--;
2467  			del = 0;
2468  		}
2469  
2470  		if (f2fs_block_unit_discard(sbi) &&
2471  				!f2fs_test_and_set_bit(offset, se->discard_map))
2472  			sbi->discard_blks--;
2473  
2474  		/*
2475  		 * SSR should never reuse block which is checkpointed
2476  		 * or newly invalidated.
2477  		 */
2478  		if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2479  			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2480  				se->ckpt_valid_blocks++;
2481  		}
2482  	} else {
2483  		exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2484  #ifdef CONFIG_F2FS_CHECK_FS
2485  		mir_exist = f2fs_test_and_clear_bit(offset,
2486  						se->cur_valid_map_mir);
2487  		if (unlikely(exist != mir_exist)) {
2488  			f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2489  				 blkaddr, exist);
2490  			f2fs_bug_on(sbi, 1);
2491  		}
2492  #endif
2493  		if (unlikely(!exist)) {
2494  			f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2495  				 blkaddr);
2496  			f2fs_bug_on(sbi, 1);
2497  			se->valid_blocks++;
2498  			del = 0;
2499  		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2500  			/*
2501  			 * If checkpoints are off, we must not reuse data that
2502  			 * was used in the previous checkpoint. If it was used
2503  			 * before, we must track that to know how much space we
2504  			 * really have.
2505  			 */
2506  			if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2507  				spin_lock(&sbi->stat_lock);
2508  				sbi->unusable_block_count++;
2509  				spin_unlock(&sbi->stat_lock);
2510  			}
2511  		}
2512  
2513  		if (f2fs_block_unit_discard(sbi) &&
2514  			f2fs_test_and_clear_bit(offset, se->discard_map))
2515  			sbi->discard_blks++;
2516  	}
2517  	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2518  		se->ckpt_valid_blocks += del;
2519  
2520  	__mark_sit_entry_dirty(sbi, segno);
2521  
2522  	/* update total number of valid blocks to be written in ckpt area */
2523  	SIT_I(sbi)->written_valid_blocks += del;
2524  
2525  	if (__is_large_section(sbi))
2526  		get_sec_entry(sbi, segno)->valid_blocks += del;
2527  }
2528  
f2fs_invalidate_blocks(struct f2fs_sb_info * sbi,block_t addr)2529  void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2530  {
2531  	unsigned int segno = GET_SEGNO(sbi, addr);
2532  	struct sit_info *sit_i = SIT_I(sbi);
2533  
2534  	f2fs_bug_on(sbi, addr == NULL_ADDR);
2535  	if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2536  		return;
2537  
2538  	f2fs_invalidate_internal_cache(sbi, addr);
2539  
2540  	/* add it into sit main buffer */
2541  	down_write(&sit_i->sentry_lock);
2542  
2543  	update_segment_mtime(sbi, addr, 0);
2544  	update_sit_entry(sbi, addr, -1);
2545  
2546  	/* add it into dirty seglist */
2547  	locate_dirty_segment(sbi, segno);
2548  
2549  	up_write(&sit_i->sentry_lock);
2550  }
2551  
f2fs_is_checkpointed_data(struct f2fs_sb_info * sbi,block_t blkaddr)2552  bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2553  {
2554  	struct sit_info *sit_i = SIT_I(sbi);
2555  	unsigned int segno, offset;
2556  	struct seg_entry *se;
2557  	bool is_cp = false;
2558  
2559  	if (!__is_valid_data_blkaddr(blkaddr))
2560  		return true;
2561  
2562  	down_read(&sit_i->sentry_lock);
2563  
2564  	segno = GET_SEGNO(sbi, blkaddr);
2565  	se = get_seg_entry(sbi, segno);
2566  	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2567  
2568  	if (f2fs_test_bit(offset, se->ckpt_valid_map))
2569  		is_cp = true;
2570  
2571  	up_read(&sit_i->sentry_lock);
2572  
2573  	return is_cp;
2574  }
2575  
f2fs_curseg_valid_blocks(struct f2fs_sb_info * sbi,int type)2576  static unsigned short f2fs_curseg_valid_blocks(struct f2fs_sb_info *sbi, int type)
2577  {
2578  	struct curseg_info *curseg = CURSEG_I(sbi, type);
2579  
2580  	if (sbi->ckpt->alloc_type[type] == SSR)
2581  		return BLKS_PER_SEG(sbi);
2582  	return curseg->next_blkoff;
2583  }
2584  
2585  /*
2586   * Calculate the number of current summary pages for writing
2587   */
f2fs_npages_for_summary_flush(struct f2fs_sb_info * sbi,bool for_ra)2588  int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2589  {
2590  	int valid_sum_count = 0;
2591  	int i, sum_in_page;
2592  
2593  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2594  		if (sbi->ckpt->alloc_type[i] != SSR && for_ra)
2595  			valid_sum_count +=
2596  				le16_to_cpu(F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2597  		else
2598  			valid_sum_count += f2fs_curseg_valid_blocks(sbi, i);
2599  	}
2600  
2601  	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2602  			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2603  	if (valid_sum_count <= sum_in_page)
2604  		return 1;
2605  	else if ((valid_sum_count - sum_in_page) <=
2606  		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2607  		return 2;
2608  	return 3;
2609  }
2610  
2611  /*
2612   * Caller should put this summary page
2613   */
f2fs_get_sum_page(struct f2fs_sb_info * sbi,unsigned int segno)2614  struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2615  {
2616  	if (unlikely(f2fs_cp_error(sbi)))
2617  		return ERR_PTR(-EIO);
2618  	return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2619  }
2620  
f2fs_update_meta_page(struct f2fs_sb_info * sbi,void * src,block_t blk_addr)2621  void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2622  					void *src, block_t blk_addr)
2623  {
2624  	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2625  
2626  	memcpy(page_address(page), src, PAGE_SIZE);
2627  	set_page_dirty(page);
2628  	f2fs_put_page(page, 1);
2629  }
2630  
write_sum_page(struct f2fs_sb_info * sbi,struct f2fs_summary_block * sum_blk,block_t blk_addr)2631  static void write_sum_page(struct f2fs_sb_info *sbi,
2632  			struct f2fs_summary_block *sum_blk, block_t blk_addr)
2633  {
2634  	f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2635  }
2636  
write_current_sum_page(struct f2fs_sb_info * sbi,int type,block_t blk_addr)2637  static void write_current_sum_page(struct f2fs_sb_info *sbi,
2638  						int type, block_t blk_addr)
2639  {
2640  	struct curseg_info *curseg = CURSEG_I(sbi, type);
2641  	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2642  	struct f2fs_summary_block *src = curseg->sum_blk;
2643  	struct f2fs_summary_block *dst;
2644  
2645  	dst = (struct f2fs_summary_block *)page_address(page);
2646  	memset(dst, 0, PAGE_SIZE);
2647  
2648  	mutex_lock(&curseg->curseg_mutex);
2649  
2650  	down_read(&curseg->journal_rwsem);
2651  	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2652  	up_read(&curseg->journal_rwsem);
2653  
2654  	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2655  	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2656  
2657  	mutex_unlock(&curseg->curseg_mutex);
2658  
2659  	set_page_dirty(page);
2660  	f2fs_put_page(page, 1);
2661  }
2662  
is_next_segment_free(struct f2fs_sb_info * sbi,struct curseg_info * curseg)2663  static int is_next_segment_free(struct f2fs_sb_info *sbi,
2664  				struct curseg_info *curseg)
2665  {
2666  	unsigned int segno = curseg->segno + 1;
2667  	struct free_segmap_info *free_i = FREE_I(sbi);
2668  
2669  	if (segno < MAIN_SEGS(sbi) && segno % SEGS_PER_SEC(sbi))
2670  		return !test_bit(segno, free_i->free_segmap);
2671  	return 0;
2672  }
2673  
2674  /*
2675   * Find a new segment from the free segments bitmap to right order
2676   * This function should be returned with success, otherwise BUG
2677   */
get_new_segment(struct f2fs_sb_info * sbi,unsigned int * newseg,bool new_sec,bool pinning)2678  static int get_new_segment(struct f2fs_sb_info *sbi,
2679  			unsigned int *newseg, bool new_sec, bool pinning)
2680  {
2681  	struct free_segmap_info *free_i = FREE_I(sbi);
2682  	unsigned int segno, secno, zoneno;
2683  	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2684  	unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2685  	unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2686  	bool init = true;
2687  	int i;
2688  	int ret = 0;
2689  
2690  	spin_lock(&free_i->segmap_lock);
2691  
2692  	if (time_to_inject(sbi, FAULT_NO_SEGMENT)) {
2693  		ret = -ENOSPC;
2694  		goto out_unlock;
2695  	}
2696  
2697  	if (!new_sec && ((*newseg + 1) % SEGS_PER_SEC(sbi))) {
2698  		segno = find_next_zero_bit(free_i->free_segmap,
2699  			GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2700  		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2701  			goto got_it;
2702  	}
2703  
2704  #ifdef CONFIG_BLK_DEV_ZONED
2705  	/*
2706  	 * If we format f2fs on zoned storage, let's try to get pinned sections
2707  	 * from beginning of the storage, which should be a conventional one.
2708  	 */
2709  	if (f2fs_sb_has_blkzoned(sbi)) {
2710  		/* Prioritize writing to conventional zones */
2711  		if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_PRIOR_CONV || pinning)
2712  			segno = 0;
2713  		else
2714  			segno = max(first_zoned_segno(sbi), *newseg);
2715  		hint = GET_SEC_FROM_SEG(sbi, segno);
2716  	}
2717  #endif
2718  
2719  find_other_zone:
2720  	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2721  
2722  #ifdef CONFIG_BLK_DEV_ZONED
2723  	if (secno >= MAIN_SECS(sbi) && f2fs_sb_has_blkzoned(sbi)) {
2724  		/* Write only to sequential zones */
2725  		if (sbi->blkzone_alloc_policy == BLKZONE_ALLOC_ONLY_SEQ) {
2726  			hint = GET_SEC_FROM_SEG(sbi, first_zoned_segno(sbi));
2727  			secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2728  		} else
2729  			secno = find_first_zero_bit(free_i->free_secmap,
2730  								MAIN_SECS(sbi));
2731  		if (secno >= MAIN_SECS(sbi)) {
2732  			ret = -ENOSPC;
2733  			f2fs_bug_on(sbi, 1);
2734  			goto out_unlock;
2735  		}
2736  	}
2737  #endif
2738  
2739  	if (secno >= MAIN_SECS(sbi)) {
2740  		secno = find_first_zero_bit(free_i->free_secmap,
2741  							MAIN_SECS(sbi));
2742  		if (secno >= MAIN_SECS(sbi)) {
2743  			ret = -ENOSPC;
2744  			f2fs_bug_on(sbi, 1);
2745  			goto out_unlock;
2746  		}
2747  	}
2748  	segno = GET_SEG_FROM_SEC(sbi, secno);
2749  	zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2750  
2751  	/* give up on finding another zone */
2752  	if (!init)
2753  		goto got_it;
2754  	if (sbi->secs_per_zone == 1)
2755  		goto got_it;
2756  	if (zoneno == old_zoneno)
2757  		goto got_it;
2758  	for (i = 0; i < NR_CURSEG_TYPE; i++)
2759  		if (CURSEG_I(sbi, i)->zone == zoneno)
2760  			break;
2761  
2762  	if (i < NR_CURSEG_TYPE) {
2763  		/* zone is in user, try another */
2764  		if (zoneno + 1 >= total_zones)
2765  			hint = 0;
2766  		else
2767  			hint = (zoneno + 1) * sbi->secs_per_zone;
2768  		init = false;
2769  		goto find_other_zone;
2770  	}
2771  got_it:
2772  	/* set it as dirty segment in free segmap */
2773  	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2774  
2775  	/* no free section in conventional zone */
2776  	if (new_sec && pinning &&
2777  		!f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) {
2778  		ret = -EAGAIN;
2779  		goto out_unlock;
2780  	}
2781  	__set_inuse(sbi, segno);
2782  	*newseg = segno;
2783  out_unlock:
2784  	spin_unlock(&free_i->segmap_lock);
2785  
2786  	if (ret == -ENOSPC)
2787  		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_NO_SEGMENT);
2788  	return ret;
2789  }
2790  
reset_curseg(struct f2fs_sb_info * sbi,int type,int modified)2791  static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2792  {
2793  	struct curseg_info *curseg = CURSEG_I(sbi, type);
2794  	struct summary_footer *sum_footer;
2795  	unsigned short seg_type = curseg->seg_type;
2796  
2797  	/* only happen when get_new_segment() fails */
2798  	if (curseg->next_segno == NULL_SEGNO)
2799  		return;
2800  
2801  	curseg->inited = true;
2802  	curseg->segno = curseg->next_segno;
2803  	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2804  	curseg->next_blkoff = 0;
2805  	curseg->next_segno = NULL_SEGNO;
2806  
2807  	sum_footer = &(curseg->sum_blk->footer);
2808  	memset(sum_footer, 0, sizeof(struct summary_footer));
2809  
2810  	sanity_check_seg_type(sbi, seg_type);
2811  
2812  	if (IS_DATASEG(seg_type))
2813  		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2814  	if (IS_NODESEG(seg_type))
2815  		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2816  	__set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2817  }
2818  
__get_next_segno(struct f2fs_sb_info * sbi,int type)2819  static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2820  {
2821  	struct curseg_info *curseg = CURSEG_I(sbi, type);
2822  	unsigned short seg_type = curseg->seg_type;
2823  
2824  	sanity_check_seg_type(sbi, seg_type);
2825  	if (__is_large_section(sbi)) {
2826  		if (f2fs_need_rand_seg(sbi)) {
2827  			unsigned int hint = GET_SEC_FROM_SEG(sbi, curseg->segno);
2828  
2829  			if (GET_SEC_FROM_SEG(sbi, curseg->segno + 1) != hint)
2830  				return curseg->segno;
2831  			return get_random_u32_inclusive(curseg->segno + 1,
2832  					GET_SEG_FROM_SEC(sbi, hint + 1) - 1);
2833  		}
2834  		return curseg->segno;
2835  	} else if (f2fs_need_rand_seg(sbi)) {
2836  		return get_random_u32_below(MAIN_SECS(sbi) * SEGS_PER_SEC(sbi));
2837  	}
2838  
2839  	/* inmem log may not locate on any segment after mount */
2840  	if (!curseg->inited)
2841  		return 0;
2842  
2843  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2844  		return 0;
2845  
2846  	if (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type))
2847  		return 0;
2848  
2849  	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2850  		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2851  
2852  	/* find segments from 0 to reuse freed segments */
2853  	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2854  		return 0;
2855  
2856  	return curseg->segno;
2857  }
2858  
2859  /*
2860   * Allocate a current working segment.
2861   * This function always allocates a free segment in LFS manner.
2862   */
new_curseg(struct f2fs_sb_info * sbi,int type,bool new_sec)2863  static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2864  {
2865  	struct curseg_info *curseg = CURSEG_I(sbi, type);
2866  	unsigned int segno = curseg->segno;
2867  	bool pinning = type == CURSEG_COLD_DATA_PINNED;
2868  	int ret;
2869  
2870  	if (curseg->inited)
2871  		write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno));
2872  
2873  	segno = __get_next_segno(sbi, type);
2874  	ret = get_new_segment(sbi, &segno, new_sec, pinning);
2875  	if (ret) {
2876  		if (ret == -ENOSPC)
2877  			curseg->segno = NULL_SEGNO;
2878  		return ret;
2879  	}
2880  
2881  	curseg->next_segno = segno;
2882  	reset_curseg(sbi, type, 1);
2883  	curseg->alloc_type = LFS;
2884  	if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
2885  		curseg->fragment_remained_chunk =
2886  				get_random_u32_inclusive(1, sbi->max_fragment_chunk);
2887  	return 0;
2888  }
2889  
__next_free_blkoff(struct f2fs_sb_info * sbi,int segno,block_t start)2890  static int __next_free_blkoff(struct f2fs_sb_info *sbi,
2891  					int segno, block_t start)
2892  {
2893  	struct seg_entry *se = get_seg_entry(sbi, segno);
2894  	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2895  	unsigned long *target_map = SIT_I(sbi)->tmp_map;
2896  	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2897  	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2898  	int i;
2899  
2900  	for (i = 0; i < entries; i++)
2901  		target_map[i] = ckpt_map[i] | cur_map[i];
2902  
2903  	return __find_rev_next_zero_bit(target_map, BLKS_PER_SEG(sbi), start);
2904  }
2905  
f2fs_find_next_ssr_block(struct f2fs_sb_info * sbi,struct curseg_info * seg)2906  static int f2fs_find_next_ssr_block(struct f2fs_sb_info *sbi,
2907  		struct curseg_info *seg)
2908  {
2909  	return __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1);
2910  }
2911  
f2fs_segment_has_free_slot(struct f2fs_sb_info * sbi,int segno)2912  bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2913  {
2914  	return __next_free_blkoff(sbi, segno, 0) < BLKS_PER_SEG(sbi);
2915  }
2916  
2917  /*
2918   * This function always allocates a used segment(from dirty seglist) by SSR
2919   * manner, so it should recover the existing segment information of valid blocks
2920   */
change_curseg(struct f2fs_sb_info * sbi,int type)2921  static int change_curseg(struct f2fs_sb_info *sbi, int type)
2922  {
2923  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2924  	struct curseg_info *curseg = CURSEG_I(sbi, type);
2925  	unsigned int new_segno = curseg->next_segno;
2926  	struct f2fs_summary_block *sum_node;
2927  	struct page *sum_page;
2928  
2929  	write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
2930  
2931  	__set_test_and_inuse(sbi, new_segno);
2932  
2933  	mutex_lock(&dirty_i->seglist_lock);
2934  	__remove_dirty_segment(sbi, new_segno, PRE);
2935  	__remove_dirty_segment(sbi, new_segno, DIRTY);
2936  	mutex_unlock(&dirty_i->seglist_lock);
2937  
2938  	reset_curseg(sbi, type, 1);
2939  	curseg->alloc_type = SSR;
2940  	curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
2941  
2942  	sum_page = f2fs_get_sum_page(sbi, new_segno);
2943  	if (IS_ERR(sum_page)) {
2944  		/* GC won't be able to use stale summary pages by cp_error */
2945  		memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2946  		return PTR_ERR(sum_page);
2947  	}
2948  	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2949  	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2950  	f2fs_put_page(sum_page, 1);
2951  	return 0;
2952  }
2953  
2954  static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2955  				int alloc_mode, unsigned long long age);
2956  
get_atssr_segment(struct f2fs_sb_info * sbi,int type,int target_type,int alloc_mode,unsigned long long age)2957  static int get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2958  					int target_type, int alloc_mode,
2959  					unsigned long long age)
2960  {
2961  	struct curseg_info *curseg = CURSEG_I(sbi, type);
2962  	int ret = 0;
2963  
2964  	curseg->seg_type = target_type;
2965  
2966  	if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2967  		struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2968  
2969  		curseg->seg_type = se->type;
2970  		ret = change_curseg(sbi, type);
2971  	} else {
2972  		/* allocate cold segment by default */
2973  		curseg->seg_type = CURSEG_COLD_DATA;
2974  		ret = new_curseg(sbi, type, true);
2975  	}
2976  	stat_inc_seg_type(sbi, curseg);
2977  	return ret;
2978  }
2979  
__f2fs_init_atgc_curseg(struct f2fs_sb_info * sbi,bool force)2980  static int __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi, bool force)
2981  {
2982  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2983  	int ret = 0;
2984  
2985  	if (!sbi->am.atgc_enabled && !force)
2986  		return 0;
2987  
2988  	f2fs_down_read(&SM_I(sbi)->curseg_lock);
2989  
2990  	mutex_lock(&curseg->curseg_mutex);
2991  	down_write(&SIT_I(sbi)->sentry_lock);
2992  
2993  	ret = get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC,
2994  					CURSEG_COLD_DATA, SSR, 0);
2995  
2996  	up_write(&SIT_I(sbi)->sentry_lock);
2997  	mutex_unlock(&curseg->curseg_mutex);
2998  
2999  	f2fs_up_read(&SM_I(sbi)->curseg_lock);
3000  	return ret;
3001  }
3002  
f2fs_init_inmem_curseg(struct f2fs_sb_info * sbi)3003  int f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
3004  {
3005  	return __f2fs_init_atgc_curseg(sbi, false);
3006  }
3007  
f2fs_reinit_atgc_curseg(struct f2fs_sb_info * sbi)3008  int f2fs_reinit_atgc_curseg(struct f2fs_sb_info *sbi)
3009  {
3010  	int ret;
3011  
3012  	if (!test_opt(sbi, ATGC))
3013  		return 0;
3014  	if (sbi->am.atgc_enabled)
3015  		return 0;
3016  	if (le64_to_cpu(F2FS_CKPT(sbi)->elapsed_time) <
3017  			sbi->am.age_threshold)
3018  		return 0;
3019  
3020  	ret = __f2fs_init_atgc_curseg(sbi, true);
3021  	if (!ret) {
3022  		sbi->am.atgc_enabled = true;
3023  		f2fs_info(sbi, "reenabled age threshold GC");
3024  	}
3025  	return ret;
3026  }
3027  
__f2fs_save_inmem_curseg(struct f2fs_sb_info * sbi,int type)3028  static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
3029  {
3030  	struct curseg_info *curseg = CURSEG_I(sbi, type);
3031  
3032  	mutex_lock(&curseg->curseg_mutex);
3033  	if (!curseg->inited)
3034  		goto out;
3035  
3036  	if (get_valid_blocks(sbi, curseg->segno, false)) {
3037  		write_sum_page(sbi, curseg->sum_blk,
3038  				GET_SUM_BLOCK(sbi, curseg->segno));
3039  	} else {
3040  		mutex_lock(&DIRTY_I(sbi)->seglist_lock);
3041  		__set_test_and_free(sbi, curseg->segno, true);
3042  		mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
3043  	}
3044  out:
3045  	mutex_unlock(&curseg->curseg_mutex);
3046  }
3047  
f2fs_save_inmem_curseg(struct f2fs_sb_info * sbi)3048  void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
3049  {
3050  	__f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
3051  
3052  	if (sbi->am.atgc_enabled)
3053  		__f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
3054  }
3055  
__f2fs_restore_inmem_curseg(struct f2fs_sb_info * sbi,int type)3056  static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
3057  {
3058  	struct curseg_info *curseg = CURSEG_I(sbi, type);
3059  
3060  	mutex_lock(&curseg->curseg_mutex);
3061  	if (!curseg->inited)
3062  		goto out;
3063  	if (get_valid_blocks(sbi, curseg->segno, false))
3064  		goto out;
3065  
3066  	mutex_lock(&DIRTY_I(sbi)->seglist_lock);
3067  	__set_test_and_inuse(sbi, curseg->segno);
3068  	mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
3069  out:
3070  	mutex_unlock(&curseg->curseg_mutex);
3071  }
3072  
f2fs_restore_inmem_curseg(struct f2fs_sb_info * sbi)3073  void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
3074  {
3075  	__f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
3076  
3077  	if (sbi->am.atgc_enabled)
3078  		__f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
3079  }
3080  
get_ssr_segment(struct f2fs_sb_info * sbi,int type,int alloc_mode,unsigned long long age)3081  static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
3082  				int alloc_mode, unsigned long long age)
3083  {
3084  	struct curseg_info *curseg = CURSEG_I(sbi, type);
3085  	unsigned segno = NULL_SEGNO;
3086  	unsigned short seg_type = curseg->seg_type;
3087  	int i, cnt;
3088  	bool reversed = false;
3089  
3090  	sanity_check_seg_type(sbi, seg_type);
3091  
3092  	/* f2fs_need_SSR() already forces to do this */
3093  	if (!f2fs_get_victim(sbi, &segno, BG_GC, seg_type,
3094  				alloc_mode, age, false)) {
3095  		curseg->next_segno = segno;
3096  		return 1;
3097  	}
3098  
3099  	/* For node segments, let's do SSR more intensively */
3100  	if (IS_NODESEG(seg_type)) {
3101  		if (seg_type >= CURSEG_WARM_NODE) {
3102  			reversed = true;
3103  			i = CURSEG_COLD_NODE;
3104  		} else {
3105  			i = CURSEG_HOT_NODE;
3106  		}
3107  		cnt = NR_CURSEG_NODE_TYPE;
3108  	} else {
3109  		if (seg_type >= CURSEG_WARM_DATA) {
3110  			reversed = true;
3111  			i = CURSEG_COLD_DATA;
3112  		} else {
3113  			i = CURSEG_HOT_DATA;
3114  		}
3115  		cnt = NR_CURSEG_DATA_TYPE;
3116  	}
3117  
3118  	for (; cnt-- > 0; reversed ? i-- : i++) {
3119  		if (i == seg_type)
3120  			continue;
3121  		if (!f2fs_get_victim(sbi, &segno, BG_GC, i,
3122  					alloc_mode, age, false)) {
3123  			curseg->next_segno = segno;
3124  			return 1;
3125  		}
3126  	}
3127  
3128  	/* find valid_blocks=0 in dirty list */
3129  	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
3130  		segno = get_free_segment(sbi);
3131  		if (segno != NULL_SEGNO) {
3132  			curseg->next_segno = segno;
3133  			return 1;
3134  		}
3135  	}
3136  	return 0;
3137  }
3138  
need_new_seg(struct f2fs_sb_info * sbi,int type)3139  static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
3140  {
3141  	struct curseg_info *curseg = CURSEG_I(sbi, type);
3142  
3143  	if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
3144  	    curseg->seg_type == CURSEG_WARM_NODE)
3145  		return true;
3146  	if (curseg->alloc_type == LFS && is_next_segment_free(sbi, curseg) &&
3147  	    likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3148  		return true;
3149  	if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
3150  		return true;
3151  	return false;
3152  }
3153  
f2fs_allocate_segment_for_resize(struct f2fs_sb_info * sbi,int type,unsigned int start,unsigned int end)3154  int f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
3155  					unsigned int start, unsigned int end)
3156  {
3157  	struct curseg_info *curseg = CURSEG_I(sbi, type);
3158  	unsigned int segno;
3159  	int ret = 0;
3160  
3161  	f2fs_down_read(&SM_I(sbi)->curseg_lock);
3162  	mutex_lock(&curseg->curseg_mutex);
3163  	down_write(&SIT_I(sbi)->sentry_lock);
3164  
3165  	segno = CURSEG_I(sbi, type)->segno;
3166  	if (segno < start || segno > end)
3167  		goto unlock;
3168  
3169  	if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
3170  		ret = change_curseg(sbi, type);
3171  	else
3172  		ret = new_curseg(sbi, type, true);
3173  
3174  	stat_inc_seg_type(sbi, curseg);
3175  
3176  	locate_dirty_segment(sbi, segno);
3177  unlock:
3178  	up_write(&SIT_I(sbi)->sentry_lock);
3179  
3180  	if (segno != curseg->segno)
3181  		f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
3182  			    type, segno, curseg->segno);
3183  
3184  	mutex_unlock(&curseg->curseg_mutex);
3185  	f2fs_up_read(&SM_I(sbi)->curseg_lock);
3186  	return ret;
3187  }
3188  
__allocate_new_segment(struct f2fs_sb_info * sbi,int type,bool new_sec,bool force)3189  static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
3190  						bool new_sec, bool force)
3191  {
3192  	struct curseg_info *curseg = CURSEG_I(sbi, type);
3193  	unsigned int old_segno;
3194  	int err = 0;
3195  
3196  	if (type == CURSEG_COLD_DATA_PINNED && !curseg->inited)
3197  		goto allocate;
3198  
3199  	if (!force && curseg->inited &&
3200  	    !curseg->next_blkoff &&
3201  	    !get_valid_blocks(sbi, curseg->segno, new_sec) &&
3202  	    !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
3203  		return 0;
3204  
3205  allocate:
3206  	old_segno = curseg->segno;
3207  	err = new_curseg(sbi, type, true);
3208  	if (err)
3209  		return err;
3210  	stat_inc_seg_type(sbi, curseg);
3211  	locate_dirty_segment(sbi, old_segno);
3212  	return 0;
3213  }
3214  
f2fs_allocate_new_section(struct f2fs_sb_info * sbi,int type,bool force)3215  int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
3216  {
3217  	int ret;
3218  
3219  	f2fs_down_read(&SM_I(sbi)->curseg_lock);
3220  	down_write(&SIT_I(sbi)->sentry_lock);
3221  	ret = __allocate_new_segment(sbi, type, true, force);
3222  	up_write(&SIT_I(sbi)->sentry_lock);
3223  	f2fs_up_read(&SM_I(sbi)->curseg_lock);
3224  
3225  	return ret;
3226  }
3227  
f2fs_allocate_pinning_section(struct f2fs_sb_info * sbi)3228  int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi)
3229  {
3230  	int err;
3231  	bool gc_required = true;
3232  
3233  retry:
3234  	f2fs_lock_op(sbi);
3235  	err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
3236  	f2fs_unlock_op(sbi);
3237  
3238  	if (f2fs_sb_has_blkzoned(sbi) && err == -EAGAIN && gc_required) {
3239  		f2fs_down_write(&sbi->gc_lock);
3240  		err = f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1);
3241  		f2fs_up_write(&sbi->gc_lock);
3242  
3243  		gc_required = false;
3244  		if (!err)
3245  			goto retry;
3246  	}
3247  
3248  	return err;
3249  }
3250  
f2fs_allocate_new_segments(struct f2fs_sb_info * sbi)3251  int f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
3252  {
3253  	int i;
3254  	int err = 0;
3255  
3256  	f2fs_down_read(&SM_I(sbi)->curseg_lock);
3257  	down_write(&SIT_I(sbi)->sentry_lock);
3258  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
3259  		err += __allocate_new_segment(sbi, i, false, false);
3260  	up_write(&SIT_I(sbi)->sentry_lock);
3261  	f2fs_up_read(&SM_I(sbi)->curseg_lock);
3262  
3263  	return err;
3264  }
3265  
f2fs_exist_trim_candidates(struct f2fs_sb_info * sbi,struct cp_control * cpc)3266  bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3267  						struct cp_control *cpc)
3268  {
3269  	__u64 trim_start = cpc->trim_start;
3270  	bool has_candidate = false;
3271  
3272  	down_write(&SIT_I(sbi)->sentry_lock);
3273  	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3274  		if (add_discard_addrs(sbi, cpc, true)) {
3275  			has_candidate = true;
3276  			break;
3277  		}
3278  	}
3279  	up_write(&SIT_I(sbi)->sentry_lock);
3280  
3281  	cpc->trim_start = trim_start;
3282  	return has_candidate;
3283  }
3284  
__issue_discard_cmd_range(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,unsigned int start,unsigned int end)3285  static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3286  					struct discard_policy *dpolicy,
3287  					unsigned int start, unsigned int end)
3288  {
3289  	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3290  	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3291  	struct rb_node **insert_p = NULL, *insert_parent = NULL;
3292  	struct discard_cmd *dc;
3293  	struct blk_plug plug;
3294  	int issued;
3295  	unsigned int trimmed = 0;
3296  
3297  next:
3298  	issued = 0;
3299  
3300  	mutex_lock(&dcc->cmd_lock);
3301  	if (unlikely(dcc->rbtree_check))
3302  		f2fs_bug_on(sbi, !f2fs_check_discard_tree(sbi));
3303  
3304  	dc = __lookup_discard_cmd_ret(&dcc->root, start,
3305  				&prev_dc, &next_dc, &insert_p, &insert_parent);
3306  	if (!dc)
3307  		dc = next_dc;
3308  
3309  	blk_start_plug(&plug);
3310  
3311  	while (dc && dc->di.lstart <= end) {
3312  		struct rb_node *node;
3313  		int err = 0;
3314  
3315  		if (dc->di.len < dpolicy->granularity)
3316  			goto skip;
3317  
3318  		if (dc->state != D_PREP) {
3319  			list_move_tail(&dc->list, &dcc->fstrim_list);
3320  			goto skip;
3321  		}
3322  
3323  		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
3324  
3325  		if (issued >= dpolicy->max_requests) {
3326  			start = dc->di.lstart + dc->di.len;
3327  
3328  			if (err)
3329  				__remove_discard_cmd(sbi, dc);
3330  
3331  			blk_finish_plug(&plug);
3332  			mutex_unlock(&dcc->cmd_lock);
3333  			trimmed += __wait_all_discard_cmd(sbi, NULL);
3334  			f2fs_io_schedule_timeout(DEFAULT_IO_TIMEOUT);
3335  			goto next;
3336  		}
3337  skip:
3338  		node = rb_next(&dc->rb_node);
3339  		if (err)
3340  			__remove_discard_cmd(sbi, dc);
3341  		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3342  
3343  		if (fatal_signal_pending(current))
3344  			break;
3345  	}
3346  
3347  	blk_finish_plug(&plug);
3348  	mutex_unlock(&dcc->cmd_lock);
3349  
3350  	return trimmed;
3351  }
3352  
f2fs_trim_fs(struct f2fs_sb_info * sbi,struct fstrim_range * range)3353  int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3354  {
3355  	__u64 start = F2FS_BYTES_TO_BLK(range->start);
3356  	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3357  	unsigned int start_segno, end_segno;
3358  	block_t start_block, end_block;
3359  	struct cp_control cpc;
3360  	struct discard_policy dpolicy;
3361  	unsigned long long trimmed = 0;
3362  	int err = 0;
3363  	bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3364  
3365  	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3366  		return -EINVAL;
3367  
3368  	if (end < MAIN_BLKADDR(sbi))
3369  		goto out;
3370  
3371  	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3372  		f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3373  		return -EFSCORRUPTED;
3374  	}
3375  
3376  	/* start/end segment number in main_area */
3377  	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3378  	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3379  						GET_SEGNO(sbi, end);
3380  	if (need_align) {
3381  		start_segno = rounddown(start_segno, SEGS_PER_SEC(sbi));
3382  		end_segno = roundup(end_segno + 1, SEGS_PER_SEC(sbi)) - 1;
3383  	}
3384  
3385  	cpc.reason = CP_DISCARD;
3386  	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3387  	cpc.trim_start = start_segno;
3388  	cpc.trim_end = end_segno;
3389  
3390  	if (sbi->discard_blks == 0)
3391  		goto out;
3392  
3393  	f2fs_down_write(&sbi->gc_lock);
3394  	stat_inc_cp_call_count(sbi, TOTAL_CALL);
3395  	err = f2fs_write_checkpoint(sbi, &cpc);
3396  	f2fs_up_write(&sbi->gc_lock);
3397  	if (err)
3398  		goto out;
3399  
3400  	/*
3401  	 * We filed discard candidates, but actually we don't need to wait for
3402  	 * all of them, since they'll be issued in idle time along with runtime
3403  	 * discard option. User configuration looks like using runtime discard
3404  	 * or periodic fstrim instead of it.
3405  	 */
3406  	if (f2fs_realtime_discard_enable(sbi))
3407  		goto out;
3408  
3409  	start_block = START_BLOCK(sbi, start_segno);
3410  	end_block = START_BLOCK(sbi, end_segno + 1);
3411  
3412  	__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3413  	trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3414  					start_block, end_block);
3415  
3416  	trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3417  					start_block, end_block);
3418  out:
3419  	if (!err)
3420  		range->len = F2FS_BLK_TO_BYTES(trimmed);
3421  	return err;
3422  }
3423  
f2fs_rw_hint_to_seg_type(struct f2fs_sb_info * sbi,enum rw_hint hint)3424  int f2fs_rw_hint_to_seg_type(struct f2fs_sb_info *sbi, enum rw_hint hint)
3425  {
3426  	if (F2FS_OPTION(sbi).active_logs == 2)
3427  		return CURSEG_HOT_DATA;
3428  	else if (F2FS_OPTION(sbi).active_logs == 4)
3429  		return CURSEG_COLD_DATA;
3430  
3431  	/* active_log == 6 */
3432  	switch (hint) {
3433  	case WRITE_LIFE_SHORT:
3434  		return CURSEG_HOT_DATA;
3435  	case WRITE_LIFE_EXTREME:
3436  		return CURSEG_COLD_DATA;
3437  	default:
3438  		return CURSEG_WARM_DATA;
3439  	}
3440  }
3441  
3442  /*
3443   * This returns write hints for each segment type. This hints will be
3444   * passed down to block layer as below by default.
3445   *
3446   * User                  F2FS                     Block
3447   * ----                  ----                     -----
3448   *                       META                     WRITE_LIFE_NONE|REQ_META
3449   *                       HOT_NODE                 WRITE_LIFE_NONE
3450   *                       WARM_NODE                WRITE_LIFE_MEDIUM
3451   *                       COLD_NODE                WRITE_LIFE_LONG
3452   * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
3453   * extension list        "                        "
3454   *
3455   * -- buffered io
3456   *                       COLD_DATA                WRITE_LIFE_EXTREME
3457   *                       HOT_DATA                 WRITE_LIFE_SHORT
3458   *                       WARM_DATA                WRITE_LIFE_NOT_SET
3459   *
3460   * -- direct io
3461   * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
3462   * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
3463   * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
3464   * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
3465   * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
3466   * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
3467   */
f2fs_io_type_to_rw_hint(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)3468  enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
3469  				enum page_type type, enum temp_type temp)
3470  {
3471  	switch (type) {
3472  	case DATA:
3473  		switch (temp) {
3474  		case WARM:
3475  			return WRITE_LIFE_NOT_SET;
3476  		case HOT:
3477  			return WRITE_LIFE_SHORT;
3478  		case COLD:
3479  			return WRITE_LIFE_EXTREME;
3480  		default:
3481  			return WRITE_LIFE_NONE;
3482  		}
3483  	case NODE:
3484  		switch (temp) {
3485  		case WARM:
3486  			return WRITE_LIFE_MEDIUM;
3487  		case HOT:
3488  			return WRITE_LIFE_NONE;
3489  		case COLD:
3490  			return WRITE_LIFE_LONG;
3491  		default:
3492  			return WRITE_LIFE_NONE;
3493  		}
3494  	case META:
3495  		return WRITE_LIFE_NONE;
3496  	default:
3497  		return WRITE_LIFE_NONE;
3498  	}
3499  }
3500  
__get_segment_type_2(struct f2fs_io_info * fio)3501  static int __get_segment_type_2(struct f2fs_io_info *fio)
3502  {
3503  	if (fio->type == DATA)
3504  		return CURSEG_HOT_DATA;
3505  	else
3506  		return CURSEG_HOT_NODE;
3507  }
3508  
__get_segment_type_4(struct f2fs_io_info * fio)3509  static int __get_segment_type_4(struct f2fs_io_info *fio)
3510  {
3511  	if (fio->type == DATA) {
3512  		struct inode *inode = fio->page->mapping->host;
3513  
3514  		if (S_ISDIR(inode->i_mode))
3515  			return CURSEG_HOT_DATA;
3516  		else
3517  			return CURSEG_COLD_DATA;
3518  	} else {
3519  		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3520  			return CURSEG_WARM_NODE;
3521  		else
3522  			return CURSEG_COLD_NODE;
3523  	}
3524  }
3525  
__get_age_segment_type(struct inode * inode,pgoff_t pgofs)3526  static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
3527  {
3528  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3529  	struct extent_info ei = {};
3530  
3531  	if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
3532  		if (!ei.age)
3533  			return NO_CHECK_TYPE;
3534  		if (ei.age <= sbi->hot_data_age_threshold)
3535  			return CURSEG_HOT_DATA;
3536  		if (ei.age <= sbi->warm_data_age_threshold)
3537  			return CURSEG_WARM_DATA;
3538  		return CURSEG_COLD_DATA;
3539  	}
3540  	return NO_CHECK_TYPE;
3541  }
3542  
__get_segment_type_6(struct f2fs_io_info * fio)3543  static int __get_segment_type_6(struct f2fs_io_info *fio)
3544  {
3545  	if (fio->type == DATA) {
3546  		struct inode *inode = fio->page->mapping->host;
3547  		int type;
3548  
3549  		if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
3550  			return CURSEG_COLD_DATA_PINNED;
3551  
3552  		if (page_private_gcing(fio->page)) {
3553  			if (fio->sbi->am.atgc_enabled &&
3554  				(fio->io_type == FS_DATA_IO) &&
3555  				(fio->sbi->gc_mode != GC_URGENT_HIGH) &&
3556  				__is_valid_data_blkaddr(fio->old_blkaddr) &&
3557  				!is_inode_flag_set(inode, FI_OPU_WRITE))
3558  				return CURSEG_ALL_DATA_ATGC;
3559  			else
3560  				return CURSEG_COLD_DATA;
3561  		}
3562  		if (file_is_cold(inode) || f2fs_need_compress_data(inode))
3563  			return CURSEG_COLD_DATA;
3564  
3565  		type = __get_age_segment_type(inode,
3566  				page_folio(fio->page)->index);
3567  		if (type != NO_CHECK_TYPE)
3568  			return type;
3569  
3570  		if (file_is_hot(inode) ||
3571  				is_inode_flag_set(inode, FI_HOT_DATA) ||
3572  				f2fs_is_cow_file(inode))
3573  			return CURSEG_HOT_DATA;
3574  		return f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode),
3575  						inode->i_write_hint);
3576  	} else {
3577  		if (IS_DNODE(fio->page))
3578  			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3579  						CURSEG_HOT_NODE;
3580  		return CURSEG_COLD_NODE;
3581  	}
3582  }
3583  
f2fs_get_segment_temp(int seg_type)3584  int f2fs_get_segment_temp(int seg_type)
3585  {
3586  	if (IS_HOT(seg_type))
3587  		return HOT;
3588  	else if (IS_WARM(seg_type))
3589  		return WARM;
3590  	return COLD;
3591  }
3592  
__get_segment_type(struct f2fs_io_info * fio)3593  static int __get_segment_type(struct f2fs_io_info *fio)
3594  {
3595  	int type = 0;
3596  
3597  	switch (F2FS_OPTION(fio->sbi).active_logs) {
3598  	case 2:
3599  		type = __get_segment_type_2(fio);
3600  		break;
3601  	case 4:
3602  		type = __get_segment_type_4(fio);
3603  		break;
3604  	case 6:
3605  		type = __get_segment_type_6(fio);
3606  		break;
3607  	default:
3608  		f2fs_bug_on(fio->sbi, true);
3609  	}
3610  
3611  	fio->temp = f2fs_get_segment_temp(type);
3612  
3613  	return type;
3614  }
3615  
f2fs_randomize_chunk(struct f2fs_sb_info * sbi,struct curseg_info * seg)3616  static void f2fs_randomize_chunk(struct f2fs_sb_info *sbi,
3617  		struct curseg_info *seg)
3618  {
3619  	/* To allocate block chunks in different sizes, use random number */
3620  	if (--seg->fragment_remained_chunk > 0)
3621  		return;
3622  
3623  	seg->fragment_remained_chunk =
3624  		get_random_u32_inclusive(1, sbi->max_fragment_chunk);
3625  	seg->next_blkoff +=
3626  		get_random_u32_inclusive(1, sbi->max_fragment_hole);
3627  }
3628  
reset_curseg_fields(struct curseg_info * curseg)3629  static void reset_curseg_fields(struct curseg_info *curseg)
3630  {
3631  	curseg->inited = false;
3632  	curseg->segno = NULL_SEGNO;
3633  	curseg->next_segno = 0;
3634  }
3635  
f2fs_allocate_data_block(struct f2fs_sb_info * sbi,struct page * page,block_t old_blkaddr,block_t * new_blkaddr,struct f2fs_summary * sum,int type,struct f2fs_io_info * fio)3636  int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3637  		block_t old_blkaddr, block_t *new_blkaddr,
3638  		struct f2fs_summary *sum, int type,
3639  		struct f2fs_io_info *fio)
3640  {
3641  	struct sit_info *sit_i = SIT_I(sbi);
3642  	struct curseg_info *curseg = CURSEG_I(sbi, type);
3643  	unsigned long long old_mtime;
3644  	bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3645  	struct seg_entry *se = NULL;
3646  	bool segment_full = false;
3647  	int ret = 0;
3648  
3649  	f2fs_down_read(&SM_I(sbi)->curseg_lock);
3650  
3651  	mutex_lock(&curseg->curseg_mutex);
3652  	down_write(&sit_i->sentry_lock);
3653  
3654  	if (curseg->segno == NULL_SEGNO) {
3655  		ret = -ENOSPC;
3656  		goto out_err;
3657  	}
3658  
3659  	if (from_gc) {
3660  		f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3661  		se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3662  		sanity_check_seg_type(sbi, se->type);
3663  		f2fs_bug_on(sbi, IS_NODESEG(se->type));
3664  	}
3665  	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3666  
3667  	f2fs_bug_on(sbi, curseg->next_blkoff >= BLKS_PER_SEG(sbi));
3668  
3669  	f2fs_wait_discard_bio(sbi, *new_blkaddr);
3670  
3671  	curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3672  	if (curseg->alloc_type == SSR) {
3673  		curseg->next_blkoff = f2fs_find_next_ssr_block(sbi, curseg);
3674  	} else {
3675  		curseg->next_blkoff++;
3676  		if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
3677  			f2fs_randomize_chunk(sbi, curseg);
3678  	}
3679  	if (curseg->next_blkoff >= f2fs_usable_blks_in_seg(sbi, curseg->segno))
3680  		segment_full = true;
3681  	stat_inc_block_count(sbi, curseg);
3682  
3683  	if (from_gc) {
3684  		old_mtime = get_segment_mtime(sbi, old_blkaddr);
3685  	} else {
3686  		update_segment_mtime(sbi, old_blkaddr, 0);
3687  		old_mtime = 0;
3688  	}
3689  	update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3690  
3691  	/*
3692  	 * SIT information should be updated before segment allocation,
3693  	 * since SSR needs latest valid block information.
3694  	 */
3695  	update_sit_entry(sbi, *new_blkaddr, 1);
3696  	update_sit_entry(sbi, old_blkaddr, -1);
3697  
3698  	/*
3699  	 * If the current segment is full, flush it out and replace it with a
3700  	 * new segment.
3701  	 */
3702  	if (segment_full) {
3703  		if (type == CURSEG_COLD_DATA_PINNED &&
3704  		    !((curseg->segno + 1) % sbi->segs_per_sec)) {
3705  			write_sum_page(sbi, curseg->sum_blk,
3706  					GET_SUM_BLOCK(sbi, curseg->segno));
3707  			reset_curseg_fields(curseg);
3708  			goto skip_new_segment;
3709  		}
3710  
3711  		if (from_gc) {
3712  			ret = get_atssr_segment(sbi, type, se->type,
3713  						AT_SSR, se->mtime);
3714  		} else {
3715  			if (need_new_seg(sbi, type))
3716  				ret = new_curseg(sbi, type, false);
3717  			else
3718  				ret = change_curseg(sbi, type);
3719  			stat_inc_seg_type(sbi, curseg);
3720  		}
3721  
3722  		if (ret)
3723  			goto out_err;
3724  	}
3725  
3726  skip_new_segment:
3727  	/*
3728  	 * segment dirty status should be updated after segment allocation,
3729  	 * so we just need to update status only one time after previous
3730  	 * segment being closed.
3731  	 */
3732  	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3733  	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3734  
3735  	if (IS_DATASEG(curseg->seg_type))
3736  		atomic64_inc(&sbi->allocated_data_blocks);
3737  
3738  	up_write(&sit_i->sentry_lock);
3739  
3740  	if (page && IS_NODESEG(curseg->seg_type)) {
3741  		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3742  
3743  		f2fs_inode_chksum_set(sbi, page);
3744  	}
3745  
3746  	if (fio) {
3747  		struct f2fs_bio_info *io;
3748  
3749  		INIT_LIST_HEAD(&fio->list);
3750  		fio->in_list = 1;
3751  		io = sbi->write_io[fio->type] + fio->temp;
3752  		spin_lock(&io->io_lock);
3753  		list_add_tail(&fio->list, &io->io_list);
3754  		spin_unlock(&io->io_lock);
3755  	}
3756  
3757  	mutex_unlock(&curseg->curseg_mutex);
3758  	f2fs_up_read(&SM_I(sbi)->curseg_lock);
3759  	return 0;
3760  
3761  out_err:
3762  	*new_blkaddr = NULL_ADDR;
3763  	up_write(&sit_i->sentry_lock);
3764  	mutex_unlock(&curseg->curseg_mutex);
3765  	f2fs_up_read(&SM_I(sbi)->curseg_lock);
3766  	return ret;
3767  }
3768  
f2fs_update_device_state(struct f2fs_sb_info * sbi,nid_t ino,block_t blkaddr,unsigned int blkcnt)3769  void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3770  					block_t blkaddr, unsigned int blkcnt)
3771  {
3772  	if (!f2fs_is_multi_device(sbi))
3773  		return;
3774  
3775  	while (1) {
3776  		unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3777  		unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
3778  
3779  		/* update device state for fsync */
3780  		f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
3781  
3782  		/* update device state for checkpoint */
3783  		if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3784  			spin_lock(&sbi->dev_lock);
3785  			f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3786  			spin_unlock(&sbi->dev_lock);
3787  		}
3788  
3789  		if (blkcnt <= blks)
3790  			break;
3791  		blkcnt -= blks;
3792  		blkaddr += blks;
3793  	}
3794  }
3795  
do_write_page(struct f2fs_summary * sum,struct f2fs_io_info * fio)3796  static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3797  {
3798  	int type = __get_segment_type(fio);
3799  	bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3800  
3801  	if (keep_order)
3802  		f2fs_down_read(&fio->sbi->io_order_lock);
3803  
3804  	if (f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3805  			&fio->new_blkaddr, sum, type, fio)) {
3806  		if (fscrypt_inode_uses_fs_layer_crypto(fio->page->mapping->host))
3807  			fscrypt_finalize_bounce_page(&fio->encrypted_page);
3808  		end_page_writeback(fio->page);
3809  		if (f2fs_in_warm_node_list(fio->sbi, fio->page))
3810  			f2fs_del_fsync_node_entry(fio->sbi, fio->page);
3811  		goto out;
3812  	}
3813  	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3814  		f2fs_invalidate_internal_cache(fio->sbi, fio->old_blkaddr);
3815  
3816  	/* writeout dirty page into bdev */
3817  	f2fs_submit_page_write(fio);
3818  
3819  	f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
3820  out:
3821  	if (keep_order)
3822  		f2fs_up_read(&fio->sbi->io_order_lock);
3823  }
3824  
f2fs_do_write_meta_page(struct f2fs_sb_info * sbi,struct folio * folio,enum iostat_type io_type)3825  void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct folio *folio,
3826  					enum iostat_type io_type)
3827  {
3828  	struct f2fs_io_info fio = {
3829  		.sbi = sbi,
3830  		.type = META,
3831  		.temp = HOT,
3832  		.op = REQ_OP_WRITE,
3833  		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3834  		.old_blkaddr = folio->index,
3835  		.new_blkaddr = folio->index,
3836  		.page = folio_page(folio, 0),
3837  		.encrypted_page = NULL,
3838  		.in_list = 0,
3839  	};
3840  
3841  	if (unlikely(folio->index >= MAIN_BLKADDR(sbi)))
3842  		fio.op_flags &= ~REQ_META;
3843  
3844  	folio_start_writeback(folio);
3845  	f2fs_submit_page_write(&fio);
3846  
3847  	stat_inc_meta_count(sbi, folio->index);
3848  	f2fs_update_iostat(sbi, NULL, io_type, F2FS_BLKSIZE);
3849  }
3850  
f2fs_do_write_node_page(unsigned int nid,struct f2fs_io_info * fio)3851  void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3852  {
3853  	struct f2fs_summary sum;
3854  
3855  	set_summary(&sum, nid, 0, 0);
3856  	do_write_page(&sum, fio);
3857  
3858  	f2fs_update_iostat(fio->sbi, NULL, fio->io_type, F2FS_BLKSIZE);
3859  }
3860  
f2fs_outplace_write_data(struct dnode_of_data * dn,struct f2fs_io_info * fio)3861  void f2fs_outplace_write_data(struct dnode_of_data *dn,
3862  					struct f2fs_io_info *fio)
3863  {
3864  	struct f2fs_sb_info *sbi = fio->sbi;
3865  	struct f2fs_summary sum;
3866  
3867  	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3868  	if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
3869  		f2fs_update_age_extent_cache(dn);
3870  	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3871  	do_write_page(&sum, fio);
3872  	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3873  
3874  	f2fs_update_iostat(sbi, dn->inode, fio->io_type, F2FS_BLKSIZE);
3875  }
3876  
f2fs_inplace_write_data(struct f2fs_io_info * fio)3877  int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3878  {
3879  	int err;
3880  	struct f2fs_sb_info *sbi = fio->sbi;
3881  	unsigned int segno;
3882  
3883  	fio->new_blkaddr = fio->old_blkaddr;
3884  	/* i/o temperature is needed for passing down write hints */
3885  	__get_segment_type(fio);
3886  
3887  	segno = GET_SEGNO(sbi, fio->new_blkaddr);
3888  
3889  	if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3890  		set_sbi_flag(sbi, SBI_NEED_FSCK);
3891  		f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3892  			  __func__, segno);
3893  		err = -EFSCORRUPTED;
3894  		f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
3895  		goto drop_bio;
3896  	}
3897  
3898  	if (f2fs_cp_error(sbi)) {
3899  		err = -EIO;
3900  		goto drop_bio;
3901  	}
3902  
3903  	if (fio->meta_gc)
3904  		f2fs_truncate_meta_inode_pages(sbi, fio->new_blkaddr, 1);
3905  
3906  	stat_inc_inplace_blocks(fio->sbi);
3907  
3908  	if (fio->bio && !IS_F2FS_IPU_NOCACHE(sbi))
3909  		err = f2fs_merge_page_bio(fio);
3910  	else
3911  		err = f2fs_submit_page_bio(fio);
3912  	if (!err) {
3913  		f2fs_update_device_state(fio->sbi, fio->ino,
3914  						fio->new_blkaddr, 1);
3915  		f2fs_update_iostat(fio->sbi, fio->page->mapping->host,
3916  						fio->io_type, F2FS_BLKSIZE);
3917  	}
3918  
3919  	return err;
3920  drop_bio:
3921  	if (fio->bio && *(fio->bio)) {
3922  		struct bio *bio = *(fio->bio);
3923  
3924  		bio->bi_status = BLK_STS_IOERR;
3925  		bio_endio(bio);
3926  		*(fio->bio) = NULL;
3927  	}
3928  	return err;
3929  }
3930  
__f2fs_get_curseg(struct f2fs_sb_info * sbi,unsigned int segno)3931  static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3932  						unsigned int segno)
3933  {
3934  	int i;
3935  
3936  	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3937  		if (CURSEG_I(sbi, i)->segno == segno)
3938  			break;
3939  	}
3940  	return i;
3941  }
3942  
f2fs_do_replace_block(struct f2fs_sb_info * sbi,struct f2fs_summary * sum,block_t old_blkaddr,block_t new_blkaddr,bool recover_curseg,bool recover_newaddr,bool from_gc)3943  void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3944  				block_t old_blkaddr, block_t new_blkaddr,
3945  				bool recover_curseg, bool recover_newaddr,
3946  				bool from_gc)
3947  {
3948  	struct sit_info *sit_i = SIT_I(sbi);
3949  	struct curseg_info *curseg;
3950  	unsigned int segno, old_cursegno;
3951  	struct seg_entry *se;
3952  	int type;
3953  	unsigned short old_blkoff;
3954  	unsigned char old_alloc_type;
3955  
3956  	segno = GET_SEGNO(sbi, new_blkaddr);
3957  	se = get_seg_entry(sbi, segno);
3958  	type = se->type;
3959  
3960  	f2fs_down_write(&SM_I(sbi)->curseg_lock);
3961  
3962  	if (!recover_curseg) {
3963  		/* for recovery flow */
3964  		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3965  			if (old_blkaddr == NULL_ADDR)
3966  				type = CURSEG_COLD_DATA;
3967  			else
3968  				type = CURSEG_WARM_DATA;
3969  		}
3970  	} else {
3971  		if (IS_CURSEG(sbi, segno)) {
3972  			/* se->type is volatile as SSR allocation */
3973  			type = __f2fs_get_curseg(sbi, segno);
3974  			f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3975  		} else {
3976  			type = CURSEG_WARM_DATA;
3977  		}
3978  	}
3979  
3980  	f2fs_bug_on(sbi, !IS_DATASEG(type));
3981  	curseg = CURSEG_I(sbi, type);
3982  
3983  	mutex_lock(&curseg->curseg_mutex);
3984  	down_write(&sit_i->sentry_lock);
3985  
3986  	old_cursegno = curseg->segno;
3987  	old_blkoff = curseg->next_blkoff;
3988  	old_alloc_type = curseg->alloc_type;
3989  
3990  	/* change the current segment */
3991  	if (segno != curseg->segno) {
3992  		curseg->next_segno = segno;
3993  		if (change_curseg(sbi, type))
3994  			goto out_unlock;
3995  	}
3996  
3997  	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3998  	curseg->sum_blk->entries[curseg->next_blkoff] = *sum;
3999  
4000  	if (!recover_curseg || recover_newaddr) {
4001  		if (!from_gc)
4002  			update_segment_mtime(sbi, new_blkaddr, 0);
4003  		update_sit_entry(sbi, new_blkaddr, 1);
4004  	}
4005  	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
4006  		f2fs_invalidate_internal_cache(sbi, old_blkaddr);
4007  		if (!from_gc)
4008  			update_segment_mtime(sbi, old_blkaddr, 0);
4009  		update_sit_entry(sbi, old_blkaddr, -1);
4010  	}
4011  
4012  	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
4013  	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
4014  
4015  	locate_dirty_segment(sbi, old_cursegno);
4016  
4017  	if (recover_curseg) {
4018  		if (old_cursegno != curseg->segno) {
4019  			curseg->next_segno = old_cursegno;
4020  			if (change_curseg(sbi, type))
4021  				goto out_unlock;
4022  		}
4023  		curseg->next_blkoff = old_blkoff;
4024  		curseg->alloc_type = old_alloc_type;
4025  	}
4026  
4027  out_unlock:
4028  	up_write(&sit_i->sentry_lock);
4029  	mutex_unlock(&curseg->curseg_mutex);
4030  	f2fs_up_write(&SM_I(sbi)->curseg_lock);
4031  }
4032  
f2fs_replace_block(struct f2fs_sb_info * sbi,struct dnode_of_data * dn,block_t old_addr,block_t new_addr,unsigned char version,bool recover_curseg,bool recover_newaddr)4033  void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
4034  				block_t old_addr, block_t new_addr,
4035  				unsigned char version, bool recover_curseg,
4036  				bool recover_newaddr)
4037  {
4038  	struct f2fs_summary sum;
4039  
4040  	set_summary(&sum, dn->nid, dn->ofs_in_node, version);
4041  
4042  	f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
4043  					recover_curseg, recover_newaddr, false);
4044  
4045  	f2fs_update_data_blkaddr(dn, new_addr);
4046  }
4047  
f2fs_wait_on_page_writeback(struct page * page,enum page_type type,bool ordered,bool locked)4048  void f2fs_wait_on_page_writeback(struct page *page,
4049  				enum page_type type, bool ordered, bool locked)
4050  {
4051  	if (folio_test_writeback(page_folio(page))) {
4052  		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
4053  
4054  		/* submit cached LFS IO */
4055  		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
4056  		/* submit cached IPU IO */
4057  		f2fs_submit_merged_ipu_write(sbi, NULL, page);
4058  		if (ordered) {
4059  			wait_on_page_writeback(page);
4060  			f2fs_bug_on(sbi, locked &&
4061  				folio_test_writeback(page_folio(page)));
4062  		} else {
4063  			wait_for_stable_page(page);
4064  		}
4065  	}
4066  }
4067  
f2fs_wait_on_block_writeback(struct inode * inode,block_t blkaddr)4068  void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
4069  {
4070  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4071  	struct page *cpage;
4072  
4073  	if (!f2fs_meta_inode_gc_required(inode))
4074  		return;
4075  
4076  	if (!__is_valid_data_blkaddr(blkaddr))
4077  		return;
4078  
4079  	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
4080  	if (cpage) {
4081  		f2fs_wait_on_page_writeback(cpage, DATA, true, true);
4082  		f2fs_put_page(cpage, 1);
4083  	}
4084  }
4085  
f2fs_wait_on_block_writeback_range(struct inode * inode,block_t blkaddr,block_t len)4086  void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
4087  								block_t len)
4088  {
4089  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4090  	block_t i;
4091  
4092  	if (!f2fs_meta_inode_gc_required(inode))
4093  		return;
4094  
4095  	for (i = 0; i < len; i++)
4096  		f2fs_wait_on_block_writeback(inode, blkaddr + i);
4097  
4098  	f2fs_truncate_meta_inode_pages(sbi, blkaddr, len);
4099  }
4100  
read_compacted_summaries(struct f2fs_sb_info * sbi)4101  static int read_compacted_summaries(struct f2fs_sb_info *sbi)
4102  {
4103  	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4104  	struct curseg_info *seg_i;
4105  	unsigned char *kaddr;
4106  	struct page *page;
4107  	block_t start;
4108  	int i, j, offset;
4109  
4110  	start = start_sum_block(sbi);
4111  
4112  	page = f2fs_get_meta_page(sbi, start++);
4113  	if (IS_ERR(page))
4114  		return PTR_ERR(page);
4115  	kaddr = (unsigned char *)page_address(page);
4116  
4117  	/* Step 1: restore nat cache */
4118  	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4119  	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
4120  
4121  	/* Step 2: restore sit cache */
4122  	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4123  	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
4124  	offset = 2 * SUM_JOURNAL_SIZE;
4125  
4126  	/* Step 3: restore summary entries */
4127  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4128  		unsigned short blk_off;
4129  		unsigned int segno;
4130  
4131  		seg_i = CURSEG_I(sbi, i);
4132  		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
4133  		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
4134  		seg_i->next_segno = segno;
4135  		reset_curseg(sbi, i, 0);
4136  		seg_i->alloc_type = ckpt->alloc_type[i];
4137  		seg_i->next_blkoff = blk_off;
4138  
4139  		if (seg_i->alloc_type == SSR)
4140  			blk_off = BLKS_PER_SEG(sbi);
4141  
4142  		for (j = 0; j < blk_off; j++) {
4143  			struct f2fs_summary *s;
4144  
4145  			s = (struct f2fs_summary *)(kaddr + offset);
4146  			seg_i->sum_blk->entries[j] = *s;
4147  			offset += SUMMARY_SIZE;
4148  			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
4149  						SUM_FOOTER_SIZE)
4150  				continue;
4151  
4152  			f2fs_put_page(page, 1);
4153  			page = NULL;
4154  
4155  			page = f2fs_get_meta_page(sbi, start++);
4156  			if (IS_ERR(page))
4157  				return PTR_ERR(page);
4158  			kaddr = (unsigned char *)page_address(page);
4159  			offset = 0;
4160  		}
4161  	}
4162  	f2fs_put_page(page, 1);
4163  	return 0;
4164  }
4165  
read_normal_summaries(struct f2fs_sb_info * sbi,int type)4166  static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
4167  {
4168  	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4169  	struct f2fs_summary_block *sum;
4170  	struct curseg_info *curseg;
4171  	struct page *new;
4172  	unsigned short blk_off;
4173  	unsigned int segno = 0;
4174  	block_t blk_addr = 0;
4175  	int err = 0;
4176  
4177  	/* get segment number and block addr */
4178  	if (IS_DATASEG(type)) {
4179  		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
4180  		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
4181  							CURSEG_HOT_DATA]);
4182  		if (__exist_node_summaries(sbi))
4183  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
4184  		else
4185  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
4186  	} else {
4187  		segno = le32_to_cpu(ckpt->cur_node_segno[type -
4188  							CURSEG_HOT_NODE]);
4189  		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
4190  							CURSEG_HOT_NODE]);
4191  		if (__exist_node_summaries(sbi))
4192  			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
4193  							type - CURSEG_HOT_NODE);
4194  		else
4195  			blk_addr = GET_SUM_BLOCK(sbi, segno);
4196  	}
4197  
4198  	new = f2fs_get_meta_page(sbi, blk_addr);
4199  	if (IS_ERR(new))
4200  		return PTR_ERR(new);
4201  	sum = (struct f2fs_summary_block *)page_address(new);
4202  
4203  	if (IS_NODESEG(type)) {
4204  		if (__exist_node_summaries(sbi)) {
4205  			struct f2fs_summary *ns = &sum->entries[0];
4206  			int i;
4207  
4208  			for (i = 0; i < BLKS_PER_SEG(sbi); i++, ns++) {
4209  				ns->version = 0;
4210  				ns->ofs_in_node = 0;
4211  			}
4212  		} else {
4213  			err = f2fs_restore_node_summary(sbi, segno, sum);
4214  			if (err)
4215  				goto out;
4216  		}
4217  	}
4218  
4219  	/* set uncompleted segment to curseg */
4220  	curseg = CURSEG_I(sbi, type);
4221  	mutex_lock(&curseg->curseg_mutex);
4222  
4223  	/* update journal info */
4224  	down_write(&curseg->journal_rwsem);
4225  	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
4226  	up_write(&curseg->journal_rwsem);
4227  
4228  	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
4229  	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
4230  	curseg->next_segno = segno;
4231  	reset_curseg(sbi, type, 0);
4232  	curseg->alloc_type = ckpt->alloc_type[type];
4233  	curseg->next_blkoff = blk_off;
4234  	mutex_unlock(&curseg->curseg_mutex);
4235  out:
4236  	f2fs_put_page(new, 1);
4237  	return err;
4238  }
4239  
restore_curseg_summaries(struct f2fs_sb_info * sbi)4240  static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
4241  {
4242  	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
4243  	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
4244  	int type = CURSEG_HOT_DATA;
4245  	int err;
4246  
4247  	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
4248  		int npages = f2fs_npages_for_summary_flush(sbi, true);
4249  
4250  		if (npages >= 2)
4251  			f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
4252  							META_CP, true);
4253  
4254  		/* restore for compacted data summary */
4255  		err = read_compacted_summaries(sbi);
4256  		if (err)
4257  			return err;
4258  		type = CURSEG_HOT_NODE;
4259  	}
4260  
4261  	if (__exist_node_summaries(sbi))
4262  		f2fs_ra_meta_pages(sbi,
4263  				sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
4264  				NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
4265  
4266  	for (; type <= CURSEG_COLD_NODE; type++) {
4267  		err = read_normal_summaries(sbi, type);
4268  		if (err)
4269  			return err;
4270  	}
4271  
4272  	/* sanity check for summary blocks */
4273  	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
4274  			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
4275  		f2fs_err(sbi, "invalid journal entries nats %u sits %u",
4276  			 nats_in_cursum(nat_j), sits_in_cursum(sit_j));
4277  		return -EINVAL;
4278  	}
4279  
4280  	return 0;
4281  }
4282  
write_compacted_summaries(struct f2fs_sb_info * sbi,block_t blkaddr)4283  static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
4284  {
4285  	struct page *page;
4286  	unsigned char *kaddr;
4287  	struct f2fs_summary *summary;
4288  	struct curseg_info *seg_i;
4289  	int written_size = 0;
4290  	int i, j;
4291  
4292  	page = f2fs_grab_meta_page(sbi, blkaddr++);
4293  	kaddr = (unsigned char *)page_address(page);
4294  	memset(kaddr, 0, PAGE_SIZE);
4295  
4296  	/* Step 1: write nat cache */
4297  	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4298  	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4299  	written_size += SUM_JOURNAL_SIZE;
4300  
4301  	/* Step 2: write sit cache */
4302  	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4303  	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4304  	written_size += SUM_JOURNAL_SIZE;
4305  
4306  	/* Step 3: write summary entries */
4307  	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4308  		seg_i = CURSEG_I(sbi, i);
4309  		for (j = 0; j < f2fs_curseg_valid_blocks(sbi, i); j++) {
4310  			if (!page) {
4311  				page = f2fs_grab_meta_page(sbi, blkaddr++);
4312  				kaddr = (unsigned char *)page_address(page);
4313  				memset(kaddr, 0, PAGE_SIZE);
4314  				written_size = 0;
4315  			}
4316  			summary = (struct f2fs_summary *)(kaddr + written_size);
4317  			*summary = seg_i->sum_blk->entries[j];
4318  			written_size += SUMMARY_SIZE;
4319  
4320  			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4321  							SUM_FOOTER_SIZE)
4322  				continue;
4323  
4324  			set_page_dirty(page);
4325  			f2fs_put_page(page, 1);
4326  			page = NULL;
4327  		}
4328  	}
4329  	if (page) {
4330  		set_page_dirty(page);
4331  		f2fs_put_page(page, 1);
4332  	}
4333  }
4334  
write_normal_summaries(struct f2fs_sb_info * sbi,block_t blkaddr,int type)4335  static void write_normal_summaries(struct f2fs_sb_info *sbi,
4336  					block_t blkaddr, int type)
4337  {
4338  	int i, end;
4339  
4340  	if (IS_DATASEG(type))
4341  		end = type + NR_CURSEG_DATA_TYPE;
4342  	else
4343  		end = type + NR_CURSEG_NODE_TYPE;
4344  
4345  	for (i = type; i < end; i++)
4346  		write_current_sum_page(sbi, i, blkaddr + (i - type));
4347  }
4348  
f2fs_write_data_summaries(struct f2fs_sb_info * sbi,block_t start_blk)4349  void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4350  {
4351  	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4352  		write_compacted_summaries(sbi, start_blk);
4353  	else
4354  		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4355  }
4356  
f2fs_write_node_summaries(struct f2fs_sb_info * sbi,block_t start_blk)4357  void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4358  {
4359  	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4360  }
4361  
f2fs_lookup_journal_in_cursum(struct f2fs_journal * journal,int type,unsigned int val,int alloc)4362  int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4363  					unsigned int val, int alloc)
4364  {
4365  	int i;
4366  
4367  	if (type == NAT_JOURNAL) {
4368  		for (i = 0; i < nats_in_cursum(journal); i++) {
4369  			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4370  				return i;
4371  		}
4372  		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4373  			return update_nats_in_cursum(journal, 1);
4374  	} else if (type == SIT_JOURNAL) {
4375  		for (i = 0; i < sits_in_cursum(journal); i++)
4376  			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4377  				return i;
4378  		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4379  			return update_sits_in_cursum(journal, 1);
4380  	}
4381  	return -1;
4382  }
4383  
get_current_sit_page(struct f2fs_sb_info * sbi,unsigned int segno)4384  static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4385  					unsigned int segno)
4386  {
4387  	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4388  }
4389  
get_next_sit_page(struct f2fs_sb_info * sbi,unsigned int start)4390  static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4391  					unsigned int start)
4392  {
4393  	struct sit_info *sit_i = SIT_I(sbi);
4394  	struct page *page;
4395  	pgoff_t src_off, dst_off;
4396  
4397  	src_off = current_sit_addr(sbi, start);
4398  	dst_off = next_sit_addr(sbi, src_off);
4399  
4400  	page = f2fs_grab_meta_page(sbi, dst_off);
4401  	seg_info_to_sit_page(sbi, page, start);
4402  
4403  	set_page_dirty(page);
4404  	set_to_next_sit(sit_i, start);
4405  
4406  	return page;
4407  }
4408  
grab_sit_entry_set(void)4409  static struct sit_entry_set *grab_sit_entry_set(void)
4410  {
4411  	struct sit_entry_set *ses =
4412  			f2fs_kmem_cache_alloc(sit_entry_set_slab,
4413  						GFP_NOFS, true, NULL);
4414  
4415  	ses->entry_cnt = 0;
4416  	INIT_LIST_HEAD(&ses->set_list);
4417  	return ses;
4418  }
4419  
release_sit_entry_set(struct sit_entry_set * ses)4420  static void release_sit_entry_set(struct sit_entry_set *ses)
4421  {
4422  	list_del(&ses->set_list);
4423  	kmem_cache_free(sit_entry_set_slab, ses);
4424  }
4425  
adjust_sit_entry_set(struct sit_entry_set * ses,struct list_head * head)4426  static void adjust_sit_entry_set(struct sit_entry_set *ses,
4427  						struct list_head *head)
4428  {
4429  	struct sit_entry_set *next = ses;
4430  
4431  	if (list_is_last(&ses->set_list, head))
4432  		return;
4433  
4434  	list_for_each_entry_continue(next, head, set_list)
4435  		if (ses->entry_cnt <= next->entry_cnt) {
4436  			list_move_tail(&ses->set_list, &next->set_list);
4437  			return;
4438  		}
4439  
4440  	list_move_tail(&ses->set_list, head);
4441  }
4442  
add_sit_entry(unsigned int segno,struct list_head * head)4443  static void add_sit_entry(unsigned int segno, struct list_head *head)
4444  {
4445  	struct sit_entry_set *ses;
4446  	unsigned int start_segno = START_SEGNO(segno);
4447  
4448  	list_for_each_entry(ses, head, set_list) {
4449  		if (ses->start_segno == start_segno) {
4450  			ses->entry_cnt++;
4451  			adjust_sit_entry_set(ses, head);
4452  			return;
4453  		}
4454  	}
4455  
4456  	ses = grab_sit_entry_set();
4457  
4458  	ses->start_segno = start_segno;
4459  	ses->entry_cnt++;
4460  	list_add(&ses->set_list, head);
4461  }
4462  
add_sits_in_set(struct f2fs_sb_info * sbi)4463  static void add_sits_in_set(struct f2fs_sb_info *sbi)
4464  {
4465  	struct f2fs_sm_info *sm_info = SM_I(sbi);
4466  	struct list_head *set_list = &sm_info->sit_entry_set;
4467  	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4468  	unsigned int segno;
4469  
4470  	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4471  		add_sit_entry(segno, set_list);
4472  }
4473  
remove_sits_in_journal(struct f2fs_sb_info * sbi)4474  static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4475  {
4476  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4477  	struct f2fs_journal *journal = curseg->journal;
4478  	int i;
4479  
4480  	down_write(&curseg->journal_rwsem);
4481  	for (i = 0; i < sits_in_cursum(journal); i++) {
4482  		unsigned int segno;
4483  		bool dirtied;
4484  
4485  		segno = le32_to_cpu(segno_in_journal(journal, i));
4486  		dirtied = __mark_sit_entry_dirty(sbi, segno);
4487  
4488  		if (!dirtied)
4489  			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4490  	}
4491  	update_sits_in_cursum(journal, -i);
4492  	up_write(&curseg->journal_rwsem);
4493  }
4494  
4495  /*
4496   * CP calls this function, which flushes SIT entries including sit_journal,
4497   * and moves prefree segs to free segs.
4498   */
f2fs_flush_sit_entries(struct f2fs_sb_info * sbi,struct cp_control * cpc)4499  void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4500  {
4501  	struct sit_info *sit_i = SIT_I(sbi);
4502  	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4503  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4504  	struct f2fs_journal *journal = curseg->journal;
4505  	struct sit_entry_set *ses, *tmp;
4506  	struct list_head *head = &SM_I(sbi)->sit_entry_set;
4507  	bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4508  	struct seg_entry *se;
4509  
4510  	down_write(&sit_i->sentry_lock);
4511  
4512  	if (!sit_i->dirty_sentries)
4513  		goto out;
4514  
4515  	/*
4516  	 * add and account sit entries of dirty bitmap in sit entry
4517  	 * set temporarily
4518  	 */
4519  	add_sits_in_set(sbi);
4520  
4521  	/*
4522  	 * if there are no enough space in journal to store dirty sit
4523  	 * entries, remove all entries from journal and add and account
4524  	 * them in sit entry set.
4525  	 */
4526  	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4527  								!to_journal)
4528  		remove_sits_in_journal(sbi);
4529  
4530  	/*
4531  	 * there are two steps to flush sit entries:
4532  	 * #1, flush sit entries to journal in current cold data summary block.
4533  	 * #2, flush sit entries to sit page.
4534  	 */
4535  	list_for_each_entry_safe(ses, tmp, head, set_list) {
4536  		struct page *page = NULL;
4537  		struct f2fs_sit_block *raw_sit = NULL;
4538  		unsigned int start_segno = ses->start_segno;
4539  		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4540  						(unsigned long)MAIN_SEGS(sbi));
4541  		unsigned int segno = start_segno;
4542  
4543  		if (to_journal &&
4544  			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4545  			to_journal = false;
4546  
4547  		if (to_journal) {
4548  			down_write(&curseg->journal_rwsem);
4549  		} else {
4550  			page = get_next_sit_page(sbi, start_segno);
4551  			raw_sit = page_address(page);
4552  		}
4553  
4554  		/* flush dirty sit entries in region of current sit set */
4555  		for_each_set_bit_from(segno, bitmap, end) {
4556  			int offset, sit_offset;
4557  
4558  			se = get_seg_entry(sbi, segno);
4559  #ifdef CONFIG_F2FS_CHECK_FS
4560  			if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4561  						SIT_VBLOCK_MAP_SIZE))
4562  				f2fs_bug_on(sbi, 1);
4563  #endif
4564  
4565  			/* add discard candidates */
4566  			if (!(cpc->reason & CP_DISCARD)) {
4567  				cpc->trim_start = segno;
4568  				add_discard_addrs(sbi, cpc, false);
4569  			}
4570  
4571  			if (to_journal) {
4572  				offset = f2fs_lookup_journal_in_cursum(journal,
4573  							SIT_JOURNAL, segno, 1);
4574  				f2fs_bug_on(sbi, offset < 0);
4575  				segno_in_journal(journal, offset) =
4576  							cpu_to_le32(segno);
4577  				seg_info_to_raw_sit(se,
4578  					&sit_in_journal(journal, offset));
4579  				check_block_count(sbi, segno,
4580  					&sit_in_journal(journal, offset));
4581  			} else {
4582  				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4583  				seg_info_to_raw_sit(se,
4584  						&raw_sit->entries[sit_offset]);
4585  				check_block_count(sbi, segno,
4586  						&raw_sit->entries[sit_offset]);
4587  			}
4588  
4589  			__clear_bit(segno, bitmap);
4590  			sit_i->dirty_sentries--;
4591  			ses->entry_cnt--;
4592  		}
4593  
4594  		if (to_journal)
4595  			up_write(&curseg->journal_rwsem);
4596  		else
4597  			f2fs_put_page(page, 1);
4598  
4599  		f2fs_bug_on(sbi, ses->entry_cnt);
4600  		release_sit_entry_set(ses);
4601  	}
4602  
4603  	f2fs_bug_on(sbi, !list_empty(head));
4604  	f2fs_bug_on(sbi, sit_i->dirty_sentries);
4605  out:
4606  	if (cpc->reason & CP_DISCARD) {
4607  		__u64 trim_start = cpc->trim_start;
4608  
4609  		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4610  			add_discard_addrs(sbi, cpc, false);
4611  
4612  		cpc->trim_start = trim_start;
4613  	}
4614  	up_write(&sit_i->sentry_lock);
4615  
4616  	set_prefree_as_free_segments(sbi);
4617  }
4618  
build_sit_info(struct f2fs_sb_info * sbi)4619  static int build_sit_info(struct f2fs_sb_info *sbi)
4620  {
4621  	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4622  	struct sit_info *sit_i;
4623  	unsigned int sit_segs, start;
4624  	char *src_bitmap, *bitmap;
4625  	unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4626  	unsigned int discard_map = f2fs_block_unit_discard(sbi) ? 1 : 0;
4627  
4628  	/* allocate memory for SIT information */
4629  	sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4630  	if (!sit_i)
4631  		return -ENOMEM;
4632  
4633  	SM_I(sbi)->sit_info = sit_i;
4634  
4635  	sit_i->sentries =
4636  		f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4637  					      MAIN_SEGS(sbi)),
4638  			      GFP_KERNEL);
4639  	if (!sit_i->sentries)
4640  		return -ENOMEM;
4641  
4642  	main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4643  	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4644  								GFP_KERNEL);
4645  	if (!sit_i->dirty_sentries_bitmap)
4646  		return -ENOMEM;
4647  
4648  #ifdef CONFIG_F2FS_CHECK_FS
4649  	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (3 + discard_map);
4650  #else
4651  	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * (2 + discard_map);
4652  #endif
4653  	sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4654  	if (!sit_i->bitmap)
4655  		return -ENOMEM;
4656  
4657  	bitmap = sit_i->bitmap;
4658  
4659  	for (start = 0; start < MAIN_SEGS(sbi); start++) {
4660  		sit_i->sentries[start].cur_valid_map = bitmap;
4661  		bitmap += SIT_VBLOCK_MAP_SIZE;
4662  
4663  		sit_i->sentries[start].ckpt_valid_map = bitmap;
4664  		bitmap += SIT_VBLOCK_MAP_SIZE;
4665  
4666  #ifdef CONFIG_F2FS_CHECK_FS
4667  		sit_i->sentries[start].cur_valid_map_mir = bitmap;
4668  		bitmap += SIT_VBLOCK_MAP_SIZE;
4669  #endif
4670  
4671  		if (discard_map) {
4672  			sit_i->sentries[start].discard_map = bitmap;
4673  			bitmap += SIT_VBLOCK_MAP_SIZE;
4674  		}
4675  	}
4676  
4677  	sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4678  	if (!sit_i->tmp_map)
4679  		return -ENOMEM;
4680  
4681  	if (__is_large_section(sbi)) {
4682  		sit_i->sec_entries =
4683  			f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4684  						      MAIN_SECS(sbi)),
4685  				      GFP_KERNEL);
4686  		if (!sit_i->sec_entries)
4687  			return -ENOMEM;
4688  	}
4689  
4690  	/* get information related with SIT */
4691  	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4692  
4693  	/* setup SIT bitmap from ckeckpoint pack */
4694  	sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4695  	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4696  
4697  	sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4698  	if (!sit_i->sit_bitmap)
4699  		return -ENOMEM;
4700  
4701  #ifdef CONFIG_F2FS_CHECK_FS
4702  	sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4703  					sit_bitmap_size, GFP_KERNEL);
4704  	if (!sit_i->sit_bitmap_mir)
4705  		return -ENOMEM;
4706  
4707  	sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4708  					main_bitmap_size, GFP_KERNEL);
4709  	if (!sit_i->invalid_segmap)
4710  		return -ENOMEM;
4711  #endif
4712  
4713  	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4714  	sit_i->sit_blocks = SEGS_TO_BLKS(sbi, sit_segs);
4715  	sit_i->written_valid_blocks = 0;
4716  	sit_i->bitmap_size = sit_bitmap_size;
4717  	sit_i->dirty_sentries = 0;
4718  	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4719  	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4720  	sit_i->mounted_time = ktime_get_boottime_seconds();
4721  	init_rwsem(&sit_i->sentry_lock);
4722  	return 0;
4723  }
4724  
build_free_segmap(struct f2fs_sb_info * sbi)4725  static int build_free_segmap(struct f2fs_sb_info *sbi)
4726  {
4727  	struct free_segmap_info *free_i;
4728  	unsigned int bitmap_size, sec_bitmap_size;
4729  
4730  	/* allocate memory for free segmap information */
4731  	free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4732  	if (!free_i)
4733  		return -ENOMEM;
4734  
4735  	SM_I(sbi)->free_info = free_i;
4736  
4737  	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4738  	free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4739  	if (!free_i->free_segmap)
4740  		return -ENOMEM;
4741  
4742  	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4743  	free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4744  	if (!free_i->free_secmap)
4745  		return -ENOMEM;
4746  
4747  	/* set all segments as dirty temporarily */
4748  	memset(free_i->free_segmap, 0xff, bitmap_size);
4749  	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4750  
4751  	/* init free segmap information */
4752  	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4753  	free_i->free_segments = 0;
4754  	free_i->free_sections = 0;
4755  	spin_lock_init(&free_i->segmap_lock);
4756  	return 0;
4757  }
4758  
build_curseg(struct f2fs_sb_info * sbi)4759  static int build_curseg(struct f2fs_sb_info *sbi)
4760  {
4761  	struct curseg_info *array;
4762  	int i;
4763  
4764  	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4765  					sizeof(*array)), GFP_KERNEL);
4766  	if (!array)
4767  		return -ENOMEM;
4768  
4769  	SM_I(sbi)->curseg_array = array;
4770  
4771  	for (i = 0; i < NO_CHECK_TYPE; i++) {
4772  		mutex_init(&array[i].curseg_mutex);
4773  		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4774  		if (!array[i].sum_blk)
4775  			return -ENOMEM;
4776  		init_rwsem(&array[i].journal_rwsem);
4777  		array[i].journal = f2fs_kzalloc(sbi,
4778  				sizeof(struct f2fs_journal), GFP_KERNEL);
4779  		if (!array[i].journal)
4780  			return -ENOMEM;
4781  		if (i < NR_PERSISTENT_LOG)
4782  			array[i].seg_type = CURSEG_HOT_DATA + i;
4783  		else if (i == CURSEG_COLD_DATA_PINNED)
4784  			array[i].seg_type = CURSEG_COLD_DATA;
4785  		else if (i == CURSEG_ALL_DATA_ATGC)
4786  			array[i].seg_type = CURSEG_COLD_DATA;
4787  		reset_curseg_fields(&array[i]);
4788  	}
4789  	return restore_curseg_summaries(sbi);
4790  }
4791  
build_sit_entries(struct f2fs_sb_info * sbi)4792  static int build_sit_entries(struct f2fs_sb_info *sbi)
4793  {
4794  	struct sit_info *sit_i = SIT_I(sbi);
4795  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4796  	struct f2fs_journal *journal = curseg->journal;
4797  	struct seg_entry *se;
4798  	struct f2fs_sit_entry sit;
4799  	int sit_blk_cnt = SIT_BLK_CNT(sbi);
4800  	unsigned int i, start, end;
4801  	unsigned int readed, start_blk = 0;
4802  	int err = 0;
4803  	block_t sit_valid_blocks[2] = {0, 0};
4804  
4805  	do {
4806  		readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
4807  							META_SIT, true);
4808  
4809  		start = start_blk * sit_i->sents_per_block;
4810  		end = (start_blk + readed) * sit_i->sents_per_block;
4811  
4812  		for (; start < end && start < MAIN_SEGS(sbi); start++) {
4813  			struct f2fs_sit_block *sit_blk;
4814  			struct page *page;
4815  
4816  			se = &sit_i->sentries[start];
4817  			page = get_current_sit_page(sbi, start);
4818  			if (IS_ERR(page))
4819  				return PTR_ERR(page);
4820  			sit_blk = (struct f2fs_sit_block *)page_address(page);
4821  			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4822  			f2fs_put_page(page, 1);
4823  
4824  			err = check_block_count(sbi, start, &sit);
4825  			if (err)
4826  				return err;
4827  			seg_info_from_raw_sit(se, &sit);
4828  
4829  			if (se->type >= NR_PERSISTENT_LOG) {
4830  				f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4831  							se->type, start);
4832  				f2fs_handle_error(sbi,
4833  						ERROR_INCONSISTENT_SUM_TYPE);
4834  				return -EFSCORRUPTED;
4835  			}
4836  
4837  			sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4838  
4839  			if (!f2fs_block_unit_discard(sbi))
4840  				goto init_discard_map_done;
4841  
4842  			/* build discard map only one time */
4843  			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4844  				memset(se->discard_map, 0xff,
4845  						SIT_VBLOCK_MAP_SIZE);
4846  				goto init_discard_map_done;
4847  			}
4848  			memcpy(se->discard_map, se->cur_valid_map,
4849  						SIT_VBLOCK_MAP_SIZE);
4850  			sbi->discard_blks += BLKS_PER_SEG(sbi) -
4851  						se->valid_blocks;
4852  init_discard_map_done:
4853  			if (__is_large_section(sbi))
4854  				get_sec_entry(sbi, start)->valid_blocks +=
4855  							se->valid_blocks;
4856  		}
4857  		start_blk += readed;
4858  	} while (start_blk < sit_blk_cnt);
4859  
4860  	down_read(&curseg->journal_rwsem);
4861  	for (i = 0; i < sits_in_cursum(journal); i++) {
4862  		unsigned int old_valid_blocks;
4863  
4864  		start = le32_to_cpu(segno_in_journal(journal, i));
4865  		if (start >= MAIN_SEGS(sbi)) {
4866  			f2fs_err(sbi, "Wrong journal entry on segno %u",
4867  				 start);
4868  			err = -EFSCORRUPTED;
4869  			f2fs_handle_error(sbi, ERROR_CORRUPTED_JOURNAL);
4870  			break;
4871  		}
4872  
4873  		se = &sit_i->sentries[start];
4874  		sit = sit_in_journal(journal, i);
4875  
4876  		old_valid_blocks = se->valid_blocks;
4877  
4878  		sit_valid_blocks[SE_PAGETYPE(se)] -= old_valid_blocks;
4879  
4880  		err = check_block_count(sbi, start, &sit);
4881  		if (err)
4882  			break;
4883  		seg_info_from_raw_sit(se, &sit);
4884  
4885  		if (se->type >= NR_PERSISTENT_LOG) {
4886  			f2fs_err(sbi, "Invalid segment type: %u, segno: %u",
4887  							se->type, start);
4888  			err = -EFSCORRUPTED;
4889  			f2fs_handle_error(sbi, ERROR_INCONSISTENT_SUM_TYPE);
4890  			break;
4891  		}
4892  
4893  		sit_valid_blocks[SE_PAGETYPE(se)] += se->valid_blocks;
4894  
4895  		if (f2fs_block_unit_discard(sbi)) {
4896  			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4897  				memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4898  			} else {
4899  				memcpy(se->discard_map, se->cur_valid_map,
4900  							SIT_VBLOCK_MAP_SIZE);
4901  				sbi->discard_blks += old_valid_blocks;
4902  				sbi->discard_blks -= se->valid_blocks;
4903  			}
4904  		}
4905  
4906  		if (__is_large_section(sbi)) {
4907  			get_sec_entry(sbi, start)->valid_blocks +=
4908  							se->valid_blocks;
4909  			get_sec_entry(sbi, start)->valid_blocks -=
4910  							old_valid_blocks;
4911  		}
4912  	}
4913  	up_read(&curseg->journal_rwsem);
4914  
4915  	if (err)
4916  		return err;
4917  
4918  	if (sit_valid_blocks[NODE] != valid_node_count(sbi)) {
4919  		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4920  			 sit_valid_blocks[NODE], valid_node_count(sbi));
4921  		f2fs_handle_error(sbi, ERROR_INCONSISTENT_NODE_COUNT);
4922  		return -EFSCORRUPTED;
4923  	}
4924  
4925  	if (sit_valid_blocks[DATA] + sit_valid_blocks[NODE] >
4926  				valid_user_blocks(sbi)) {
4927  		f2fs_err(sbi, "SIT is corrupted data# %u %u vs %u",
4928  			 sit_valid_blocks[DATA], sit_valid_blocks[NODE],
4929  			 valid_user_blocks(sbi));
4930  		f2fs_handle_error(sbi, ERROR_INCONSISTENT_BLOCK_COUNT);
4931  		return -EFSCORRUPTED;
4932  	}
4933  
4934  	return 0;
4935  }
4936  
init_free_segmap(struct f2fs_sb_info * sbi)4937  static void init_free_segmap(struct f2fs_sb_info *sbi)
4938  {
4939  	unsigned int start;
4940  	int type;
4941  	struct seg_entry *sentry;
4942  
4943  	for (start = 0; start < MAIN_SEGS(sbi); start++) {
4944  		if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4945  			continue;
4946  		sentry = get_seg_entry(sbi, start);
4947  		if (!sentry->valid_blocks)
4948  			__set_free(sbi, start);
4949  		else
4950  			SIT_I(sbi)->written_valid_blocks +=
4951  						sentry->valid_blocks;
4952  	}
4953  
4954  	/* set use the current segments */
4955  	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4956  		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4957  
4958  		__set_test_and_inuse(sbi, curseg_t->segno);
4959  	}
4960  }
4961  
init_dirty_segmap(struct f2fs_sb_info * sbi)4962  static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4963  {
4964  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4965  	struct free_segmap_info *free_i = FREE_I(sbi);
4966  	unsigned int segno = 0, offset = 0, secno;
4967  	block_t valid_blocks, usable_blks_in_seg;
4968  
4969  	while (1) {
4970  		/* find dirty segment based on free segmap */
4971  		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4972  		if (segno >= MAIN_SEGS(sbi))
4973  			break;
4974  		offset = segno + 1;
4975  		valid_blocks = get_valid_blocks(sbi, segno, false);
4976  		usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4977  		if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4978  			continue;
4979  		if (valid_blocks > usable_blks_in_seg) {
4980  			f2fs_bug_on(sbi, 1);
4981  			continue;
4982  		}
4983  		mutex_lock(&dirty_i->seglist_lock);
4984  		__locate_dirty_segment(sbi, segno, DIRTY);
4985  		mutex_unlock(&dirty_i->seglist_lock);
4986  	}
4987  
4988  	if (!__is_large_section(sbi))
4989  		return;
4990  
4991  	mutex_lock(&dirty_i->seglist_lock);
4992  	for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
4993  		valid_blocks = get_valid_blocks(sbi, segno, true);
4994  		secno = GET_SEC_FROM_SEG(sbi, segno);
4995  
4996  		if (!valid_blocks || valid_blocks == CAP_BLKS_PER_SEC(sbi))
4997  			continue;
4998  		if (IS_CURSEC(sbi, secno))
4999  			continue;
5000  		set_bit(secno, dirty_i->dirty_secmap);
5001  	}
5002  	mutex_unlock(&dirty_i->seglist_lock);
5003  }
5004  
init_victim_secmap(struct f2fs_sb_info * sbi)5005  static int init_victim_secmap(struct f2fs_sb_info *sbi)
5006  {
5007  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5008  	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
5009  
5010  	dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
5011  	if (!dirty_i->victim_secmap)
5012  		return -ENOMEM;
5013  
5014  	dirty_i->pinned_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
5015  	if (!dirty_i->pinned_secmap)
5016  		return -ENOMEM;
5017  
5018  	dirty_i->pinned_secmap_cnt = 0;
5019  	dirty_i->enable_pin_section = true;
5020  	return 0;
5021  }
5022  
build_dirty_segmap(struct f2fs_sb_info * sbi)5023  static int build_dirty_segmap(struct f2fs_sb_info *sbi)
5024  {
5025  	struct dirty_seglist_info *dirty_i;
5026  	unsigned int bitmap_size, i;
5027  
5028  	/* allocate memory for dirty segments list information */
5029  	dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
5030  								GFP_KERNEL);
5031  	if (!dirty_i)
5032  		return -ENOMEM;
5033  
5034  	SM_I(sbi)->dirty_info = dirty_i;
5035  	mutex_init(&dirty_i->seglist_lock);
5036  
5037  	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
5038  
5039  	for (i = 0; i < NR_DIRTY_TYPE; i++) {
5040  		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
5041  								GFP_KERNEL);
5042  		if (!dirty_i->dirty_segmap[i])
5043  			return -ENOMEM;
5044  	}
5045  
5046  	if (__is_large_section(sbi)) {
5047  		bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
5048  		dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
5049  						bitmap_size, GFP_KERNEL);
5050  		if (!dirty_i->dirty_secmap)
5051  			return -ENOMEM;
5052  	}
5053  
5054  	init_dirty_segmap(sbi);
5055  	return init_victim_secmap(sbi);
5056  }
5057  
sanity_check_curseg(struct f2fs_sb_info * sbi)5058  static int sanity_check_curseg(struct f2fs_sb_info *sbi)
5059  {
5060  	int i;
5061  
5062  	/*
5063  	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
5064  	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
5065  	 */
5066  	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5067  		struct curseg_info *curseg = CURSEG_I(sbi, i);
5068  		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
5069  		unsigned int blkofs = curseg->next_blkoff;
5070  
5071  		if (f2fs_sb_has_readonly(sbi) &&
5072  			i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
5073  			continue;
5074  
5075  		sanity_check_seg_type(sbi, curseg->seg_type);
5076  
5077  		if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) {
5078  			f2fs_err(sbi,
5079  				 "Current segment has invalid alloc_type:%d",
5080  				 curseg->alloc_type);
5081  			f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
5082  			return -EFSCORRUPTED;
5083  		}
5084  
5085  		if (f2fs_test_bit(blkofs, se->cur_valid_map))
5086  			goto out;
5087  
5088  		if (curseg->alloc_type == SSR)
5089  			continue;
5090  
5091  		for (blkofs += 1; blkofs < BLKS_PER_SEG(sbi); blkofs++) {
5092  			if (!f2fs_test_bit(blkofs, se->cur_valid_map))
5093  				continue;
5094  out:
5095  			f2fs_err(sbi,
5096  				 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
5097  				 i, curseg->segno, curseg->alloc_type,
5098  				 curseg->next_blkoff, blkofs);
5099  			f2fs_handle_error(sbi, ERROR_INVALID_CURSEG);
5100  			return -EFSCORRUPTED;
5101  		}
5102  	}
5103  	return 0;
5104  }
5105  
5106  #ifdef CONFIG_BLK_DEV_ZONED
check_zone_write_pointer(struct f2fs_sb_info * sbi,struct f2fs_dev_info * fdev,struct blk_zone * zone)5107  static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
5108  				    struct f2fs_dev_info *fdev,
5109  				    struct blk_zone *zone)
5110  {
5111  	unsigned int zone_segno;
5112  	block_t zone_block, valid_block_cnt;
5113  	unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
5114  	int ret;
5115  	unsigned int nofs_flags;
5116  
5117  	if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5118  		return 0;
5119  
5120  	zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
5121  	zone_segno = GET_SEGNO(sbi, zone_block);
5122  
5123  	/*
5124  	 * Skip check of zones cursegs point to, since
5125  	 * fix_curseg_write_pointer() checks them.
5126  	 */
5127  	if (zone_segno >= MAIN_SEGS(sbi))
5128  		return 0;
5129  
5130  	/*
5131  	 * Get # of valid block of the zone.
5132  	 */
5133  	valid_block_cnt = get_valid_blocks(sbi, zone_segno, true);
5134  	if (IS_CURSEC(sbi, GET_SEC_FROM_SEG(sbi, zone_segno))) {
5135  		f2fs_notice(sbi, "Open zones: valid block[0x%x,0x%x] cond[%s]",
5136  				zone_segno, valid_block_cnt,
5137  				blk_zone_cond_str(zone->cond));
5138  		return 0;
5139  	}
5140  
5141  	if ((!valid_block_cnt && zone->cond == BLK_ZONE_COND_EMPTY) ||
5142  	    (valid_block_cnt && zone->cond == BLK_ZONE_COND_FULL))
5143  		return 0;
5144  
5145  	if (!valid_block_cnt) {
5146  		f2fs_notice(sbi, "Zone without valid block has non-zero write "
5147  			    "pointer. Reset the write pointer: cond[%s]",
5148  			    blk_zone_cond_str(zone->cond));
5149  		ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
5150  					zone->len >> log_sectors_per_block);
5151  		if (ret)
5152  			f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5153  				 fdev->path, ret);
5154  		return ret;
5155  	}
5156  
5157  	/*
5158  	 * If there are valid blocks and the write pointer doesn't match
5159  	 * with them, we need to report the inconsistency and fill
5160  	 * the zone till the end to close the zone. This inconsistency
5161  	 * does not cause write error because the zone will not be
5162  	 * selected for write operation until it get discarded.
5163  	 */
5164  	f2fs_notice(sbi, "Valid blocks are not aligned with write "
5165  		    "pointer: valid block[0x%x,0x%x] cond[%s]",
5166  		    zone_segno, valid_block_cnt, blk_zone_cond_str(zone->cond));
5167  
5168  	nofs_flags = memalloc_nofs_save();
5169  	ret = blkdev_zone_mgmt(fdev->bdev, REQ_OP_ZONE_FINISH,
5170  				zone->start, zone->len);
5171  	memalloc_nofs_restore(nofs_flags);
5172  	if (ret == -EOPNOTSUPP) {
5173  		ret = blkdev_issue_zeroout(fdev->bdev, zone->wp,
5174  					zone->len - (zone->wp - zone->start),
5175  					GFP_NOFS, 0);
5176  		if (ret)
5177  			f2fs_err(sbi, "Fill up zone failed: %s (errno=%d)",
5178  					fdev->path, ret);
5179  	} else if (ret) {
5180  		f2fs_err(sbi, "Finishing zone failed: %s (errno=%d)",
5181  				fdev->path, ret);
5182  	}
5183  
5184  	return ret;
5185  }
5186  
get_target_zoned_dev(struct f2fs_sb_info * sbi,block_t zone_blkaddr)5187  static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
5188  						  block_t zone_blkaddr)
5189  {
5190  	int i;
5191  
5192  	for (i = 0; i < sbi->s_ndevs; i++) {
5193  		if (!bdev_is_zoned(FDEV(i).bdev))
5194  			continue;
5195  		if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
5196  				zone_blkaddr <= FDEV(i).end_blk))
5197  			return &FDEV(i);
5198  	}
5199  
5200  	return NULL;
5201  }
5202  
report_one_zone_cb(struct blk_zone * zone,unsigned int idx,void * data)5203  static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
5204  			      void *data)
5205  {
5206  	memcpy(data, zone, sizeof(struct blk_zone));
5207  	return 0;
5208  }
5209  
fix_curseg_write_pointer(struct f2fs_sb_info * sbi,int type)5210  static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
5211  {
5212  	struct curseg_info *cs = CURSEG_I(sbi, type);
5213  	struct f2fs_dev_info *zbd;
5214  	struct blk_zone zone;
5215  	unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
5216  	block_t cs_zone_block, wp_block;
5217  	unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
5218  	sector_t zone_sector;
5219  	int err;
5220  
5221  	cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5222  	cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5223  
5224  	zbd = get_target_zoned_dev(sbi, cs_zone_block);
5225  	if (!zbd)
5226  		return 0;
5227  
5228  	/* report zone for the sector the curseg points to */
5229  	zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5230  		<< log_sectors_per_block;
5231  	err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5232  				  report_one_zone_cb, &zone);
5233  	if (err != 1) {
5234  		f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5235  			 zbd->path, err);
5236  		return err;
5237  	}
5238  
5239  	if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5240  		return 0;
5241  
5242  	/*
5243  	 * When safely unmounted in the previous mount, we could use current
5244  	 * segments. Otherwise, allocate new sections.
5245  	 */
5246  	if (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
5247  		wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
5248  		wp_segno = GET_SEGNO(sbi, wp_block);
5249  		wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
5250  		wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
5251  
5252  		if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
5253  				wp_sector_off == 0)
5254  			return 0;
5255  
5256  		f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
5257  			    "curseg[0x%x,0x%x] wp[0x%x,0x%x]", type, cs->segno,
5258  			    cs->next_blkoff, wp_segno, wp_blkoff);
5259  	}
5260  
5261  	/* Allocate a new section if it's not new. */
5262  	if (cs->next_blkoff ||
5263  	    cs->segno != GET_SEG_FROM_SEC(sbi, GET_ZONE_FROM_SEC(sbi, cs_section))) {
5264  		unsigned int old_segno = cs->segno, old_blkoff = cs->next_blkoff;
5265  
5266  		f2fs_allocate_new_section(sbi, type, true);
5267  		f2fs_notice(sbi, "Assign new section to curseg[%d]: "
5268  				"[0x%x,0x%x] -> [0x%x,0x%x]",
5269  				type, old_segno, old_blkoff,
5270  				cs->segno, cs->next_blkoff);
5271  	}
5272  
5273  	/* check consistency of the zone curseg pointed to */
5274  	if (check_zone_write_pointer(sbi, zbd, &zone))
5275  		return -EIO;
5276  
5277  	/* check newly assigned zone */
5278  	cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5279  	cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5280  
5281  	zbd = get_target_zoned_dev(sbi, cs_zone_block);
5282  	if (!zbd)
5283  		return 0;
5284  
5285  	zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5286  		<< log_sectors_per_block;
5287  	err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5288  				  report_one_zone_cb, &zone);
5289  	if (err != 1) {
5290  		f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5291  			 zbd->path, err);
5292  		return err;
5293  	}
5294  
5295  	if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5296  		return 0;
5297  
5298  	if (zone.wp != zone.start) {
5299  		f2fs_notice(sbi,
5300  			    "New zone for curseg[%d] is not yet discarded. "
5301  			    "Reset the zone: curseg[0x%x,0x%x]",
5302  			    type, cs->segno, cs->next_blkoff);
5303  		err = __f2fs_issue_discard_zone(sbi, zbd->bdev,	cs_zone_block,
5304  					zone.len >> log_sectors_per_block);
5305  		if (err) {
5306  			f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5307  				 zbd->path, err);
5308  			return err;
5309  		}
5310  	}
5311  
5312  	return 0;
5313  }
5314  
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info * sbi)5315  int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5316  {
5317  	int i, ret;
5318  
5319  	for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5320  		ret = fix_curseg_write_pointer(sbi, i);
5321  		if (ret)
5322  			return ret;
5323  	}
5324  
5325  	return 0;
5326  }
5327  
5328  struct check_zone_write_pointer_args {
5329  	struct f2fs_sb_info *sbi;
5330  	struct f2fs_dev_info *fdev;
5331  };
5332  
check_zone_write_pointer_cb(struct blk_zone * zone,unsigned int idx,void * data)5333  static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5334  				      void *data)
5335  {
5336  	struct check_zone_write_pointer_args *args;
5337  
5338  	args = (struct check_zone_write_pointer_args *)data;
5339  
5340  	return check_zone_write_pointer(args->sbi, args->fdev, zone);
5341  }
5342  
f2fs_check_write_pointer(struct f2fs_sb_info * sbi)5343  int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5344  {
5345  	int i, ret;
5346  	struct check_zone_write_pointer_args args;
5347  
5348  	for (i = 0; i < sbi->s_ndevs; i++) {
5349  		if (!bdev_is_zoned(FDEV(i).bdev))
5350  			continue;
5351  
5352  		args.sbi = sbi;
5353  		args.fdev = &FDEV(i);
5354  		ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5355  					  check_zone_write_pointer_cb, &args);
5356  		if (ret < 0)
5357  			return ret;
5358  	}
5359  
5360  	return 0;
5361  }
5362  
5363  /*
5364   * Return the number of usable blocks in a segment. The number of blocks
5365   * returned is always equal to the number of blocks in a segment for
5366   * segments fully contained within a sequential zone capacity or a
5367   * conventional zone. For segments partially contained in a sequential
5368   * zone capacity, the number of usable blocks up to the zone capacity
5369   * is returned. 0 is returned in all other cases.
5370   */
f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5371  static inline unsigned int f2fs_usable_zone_blks_in_seg(
5372  			struct f2fs_sb_info *sbi, unsigned int segno)
5373  {
5374  	block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5375  	unsigned int secno;
5376  
5377  	if (!sbi->unusable_blocks_per_sec)
5378  		return BLKS_PER_SEG(sbi);
5379  
5380  	secno = GET_SEC_FROM_SEG(sbi, segno);
5381  	seg_start = START_BLOCK(sbi, segno);
5382  	sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5383  	sec_cap_blkaddr = sec_start_blkaddr + CAP_BLKS_PER_SEC(sbi);
5384  
5385  	/*
5386  	 * If segment starts before zone capacity and spans beyond
5387  	 * zone capacity, then usable blocks are from seg start to
5388  	 * zone capacity. If the segment starts after the zone capacity,
5389  	 * then there are no usable blocks.
5390  	 */
5391  	if (seg_start >= sec_cap_blkaddr)
5392  		return 0;
5393  	if (seg_start + BLKS_PER_SEG(sbi) > sec_cap_blkaddr)
5394  		return sec_cap_blkaddr - seg_start;
5395  
5396  	return BLKS_PER_SEG(sbi);
5397  }
5398  #else
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info * sbi)5399  int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5400  {
5401  	return 0;
5402  }
5403  
f2fs_check_write_pointer(struct f2fs_sb_info * sbi)5404  int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5405  {
5406  	return 0;
5407  }
5408  
f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5409  static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5410  							unsigned int segno)
5411  {
5412  	return 0;
5413  }
5414  
5415  #endif
f2fs_usable_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5416  unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5417  					unsigned int segno)
5418  {
5419  	if (f2fs_sb_has_blkzoned(sbi))
5420  		return f2fs_usable_zone_blks_in_seg(sbi, segno);
5421  
5422  	return BLKS_PER_SEG(sbi);
5423  }
5424  
f2fs_usable_segs_in_sec(struct f2fs_sb_info * sbi)5425  unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi)
5426  {
5427  	if (f2fs_sb_has_blkzoned(sbi))
5428  		return CAP_SEGS_PER_SEC(sbi);
5429  
5430  	return SEGS_PER_SEC(sbi);
5431  }
5432  
5433  /*
5434   * Update min, max modified time for cost-benefit GC algorithm
5435   */
init_min_max_mtime(struct f2fs_sb_info * sbi)5436  static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5437  {
5438  	struct sit_info *sit_i = SIT_I(sbi);
5439  	unsigned int segno;
5440  
5441  	down_write(&sit_i->sentry_lock);
5442  
5443  	sit_i->min_mtime = ULLONG_MAX;
5444  
5445  	for (segno = 0; segno < MAIN_SEGS(sbi); segno += SEGS_PER_SEC(sbi)) {
5446  		unsigned int i;
5447  		unsigned long long mtime = 0;
5448  
5449  		for (i = 0; i < SEGS_PER_SEC(sbi); i++)
5450  			mtime += get_seg_entry(sbi, segno + i)->mtime;
5451  
5452  		mtime = div_u64(mtime, SEGS_PER_SEC(sbi));
5453  
5454  		if (sit_i->min_mtime > mtime)
5455  			sit_i->min_mtime = mtime;
5456  	}
5457  	sit_i->max_mtime = get_mtime(sbi, false);
5458  	sit_i->dirty_max_mtime = 0;
5459  	up_write(&sit_i->sentry_lock);
5460  }
5461  
f2fs_build_segment_manager(struct f2fs_sb_info * sbi)5462  int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5463  {
5464  	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5465  	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5466  	struct f2fs_sm_info *sm_info;
5467  	int err;
5468  
5469  	sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5470  	if (!sm_info)
5471  		return -ENOMEM;
5472  
5473  	/* init sm info */
5474  	sbi->sm_info = sm_info;
5475  	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5476  	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5477  	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5478  	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5479  	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5480  	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5481  	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5482  	sm_info->rec_prefree_segments = sm_info->main_segments *
5483  					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5484  	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5485  		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5486  
5487  	if (!f2fs_lfs_mode(sbi))
5488  		sm_info->ipu_policy = BIT(F2FS_IPU_FSYNC);
5489  	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5490  	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5491  	sm_info->min_seq_blocks = BLKS_PER_SEG(sbi);
5492  	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5493  	sm_info->min_ssr_sections = reserved_sections(sbi);
5494  
5495  	INIT_LIST_HEAD(&sm_info->sit_entry_set);
5496  
5497  	init_f2fs_rwsem(&sm_info->curseg_lock);
5498  
5499  	err = f2fs_create_flush_cmd_control(sbi);
5500  	if (err)
5501  		return err;
5502  
5503  	err = create_discard_cmd_control(sbi);
5504  	if (err)
5505  		return err;
5506  
5507  	err = build_sit_info(sbi);
5508  	if (err)
5509  		return err;
5510  	err = build_free_segmap(sbi);
5511  	if (err)
5512  		return err;
5513  	err = build_curseg(sbi);
5514  	if (err)
5515  		return err;
5516  
5517  	/* reinit free segmap based on SIT */
5518  	err = build_sit_entries(sbi);
5519  	if (err)
5520  		return err;
5521  
5522  	init_free_segmap(sbi);
5523  	err = build_dirty_segmap(sbi);
5524  	if (err)
5525  		return err;
5526  
5527  	err = sanity_check_curseg(sbi);
5528  	if (err)
5529  		return err;
5530  
5531  	init_min_max_mtime(sbi);
5532  	return 0;
5533  }
5534  
discard_dirty_segmap(struct f2fs_sb_info * sbi,enum dirty_type dirty_type)5535  static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5536  		enum dirty_type dirty_type)
5537  {
5538  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5539  
5540  	mutex_lock(&dirty_i->seglist_lock);
5541  	kvfree(dirty_i->dirty_segmap[dirty_type]);
5542  	dirty_i->nr_dirty[dirty_type] = 0;
5543  	mutex_unlock(&dirty_i->seglist_lock);
5544  }
5545  
destroy_victim_secmap(struct f2fs_sb_info * sbi)5546  static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5547  {
5548  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5549  
5550  	kvfree(dirty_i->pinned_secmap);
5551  	kvfree(dirty_i->victim_secmap);
5552  }
5553  
destroy_dirty_segmap(struct f2fs_sb_info * sbi)5554  static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5555  {
5556  	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5557  	int i;
5558  
5559  	if (!dirty_i)
5560  		return;
5561  
5562  	/* discard pre-free/dirty segments list */
5563  	for (i = 0; i < NR_DIRTY_TYPE; i++)
5564  		discard_dirty_segmap(sbi, i);
5565  
5566  	if (__is_large_section(sbi)) {
5567  		mutex_lock(&dirty_i->seglist_lock);
5568  		kvfree(dirty_i->dirty_secmap);
5569  		mutex_unlock(&dirty_i->seglist_lock);
5570  	}
5571  
5572  	destroy_victim_secmap(sbi);
5573  	SM_I(sbi)->dirty_info = NULL;
5574  	kfree(dirty_i);
5575  }
5576  
destroy_curseg(struct f2fs_sb_info * sbi)5577  static void destroy_curseg(struct f2fs_sb_info *sbi)
5578  {
5579  	struct curseg_info *array = SM_I(sbi)->curseg_array;
5580  	int i;
5581  
5582  	if (!array)
5583  		return;
5584  	SM_I(sbi)->curseg_array = NULL;
5585  	for (i = 0; i < NR_CURSEG_TYPE; i++) {
5586  		kfree(array[i].sum_blk);
5587  		kfree(array[i].journal);
5588  	}
5589  	kfree(array);
5590  }
5591  
destroy_free_segmap(struct f2fs_sb_info * sbi)5592  static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5593  {
5594  	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5595  
5596  	if (!free_i)
5597  		return;
5598  	SM_I(sbi)->free_info = NULL;
5599  	kvfree(free_i->free_segmap);
5600  	kvfree(free_i->free_secmap);
5601  	kfree(free_i);
5602  }
5603  
destroy_sit_info(struct f2fs_sb_info * sbi)5604  static void destroy_sit_info(struct f2fs_sb_info *sbi)
5605  {
5606  	struct sit_info *sit_i = SIT_I(sbi);
5607  
5608  	if (!sit_i)
5609  		return;
5610  
5611  	if (sit_i->sentries)
5612  		kvfree(sit_i->bitmap);
5613  	kfree(sit_i->tmp_map);
5614  
5615  	kvfree(sit_i->sentries);
5616  	kvfree(sit_i->sec_entries);
5617  	kvfree(sit_i->dirty_sentries_bitmap);
5618  
5619  	SM_I(sbi)->sit_info = NULL;
5620  	kvfree(sit_i->sit_bitmap);
5621  #ifdef CONFIG_F2FS_CHECK_FS
5622  	kvfree(sit_i->sit_bitmap_mir);
5623  	kvfree(sit_i->invalid_segmap);
5624  #endif
5625  	kfree(sit_i);
5626  }
5627  
f2fs_destroy_segment_manager(struct f2fs_sb_info * sbi)5628  void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5629  {
5630  	struct f2fs_sm_info *sm_info = SM_I(sbi);
5631  
5632  	if (!sm_info)
5633  		return;
5634  	f2fs_destroy_flush_cmd_control(sbi, true);
5635  	destroy_discard_cmd_control(sbi);
5636  	destroy_dirty_segmap(sbi);
5637  	destroy_curseg(sbi);
5638  	destroy_free_segmap(sbi);
5639  	destroy_sit_info(sbi);
5640  	sbi->sm_info = NULL;
5641  	kfree(sm_info);
5642  }
5643  
f2fs_create_segment_manager_caches(void)5644  int __init f2fs_create_segment_manager_caches(void)
5645  {
5646  	discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5647  			sizeof(struct discard_entry));
5648  	if (!discard_entry_slab)
5649  		goto fail;
5650  
5651  	discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5652  			sizeof(struct discard_cmd));
5653  	if (!discard_cmd_slab)
5654  		goto destroy_discard_entry;
5655  
5656  	sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5657  			sizeof(struct sit_entry_set));
5658  	if (!sit_entry_set_slab)
5659  		goto destroy_discard_cmd;
5660  
5661  	revoke_entry_slab = f2fs_kmem_cache_create("f2fs_revoke_entry",
5662  			sizeof(struct revoke_entry));
5663  	if (!revoke_entry_slab)
5664  		goto destroy_sit_entry_set;
5665  	return 0;
5666  
5667  destroy_sit_entry_set:
5668  	kmem_cache_destroy(sit_entry_set_slab);
5669  destroy_discard_cmd:
5670  	kmem_cache_destroy(discard_cmd_slab);
5671  destroy_discard_entry:
5672  	kmem_cache_destroy(discard_entry_slab);
5673  fail:
5674  	return -ENOMEM;
5675  }
5676  
f2fs_destroy_segment_manager_caches(void)5677  void f2fs_destroy_segment_manager_caches(void)
5678  {
5679  	kmem_cache_destroy(sit_entry_set_slab);
5680  	kmem_cache_destroy(discard_cmd_slab);
5681  	kmem_cache_destroy(discard_entry_slab);
5682  	kmem_cache_destroy(revoke_entry_slab);
5683  }
5684