1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/fs/ext4/super.c
4   *
5   * Copyright (C) 1992, 1993, 1994, 1995
6   * Remy Card (card@masi.ibp.fr)
7   * Laboratoire MASI - Institut Blaise Pascal
8   * Universite Pierre et Marie Curie (Paris VI)
9   *
10   *  from
11   *
12   *  linux/fs/minix/inode.c
13   *
14   *  Copyright (C) 1991, 1992  Linus Torvalds
15   *
16   *  Big-endian to little-endian byte-swapping/bitmaps by
17   *        David S. Miller (davem@caip.rutgers.edu), 1995
18   */
19  
20  #include <linux/module.h>
21  #include <linux/string.h>
22  #include <linux/fs.h>
23  #include <linux/time.h>
24  #include <linux/vmalloc.h>
25  #include <linux/slab.h>
26  #include <linux/init.h>
27  #include <linux/blkdev.h>
28  #include <linux/backing-dev.h>
29  #include <linux/parser.h>
30  #include <linux/buffer_head.h>
31  #include <linux/exportfs.h>
32  #include <linux/vfs.h>
33  #include <linux/random.h>
34  #include <linux/mount.h>
35  #include <linux/namei.h>
36  #include <linux/quotaops.h>
37  #include <linux/seq_file.h>
38  #include <linux/ctype.h>
39  #include <linux/log2.h>
40  #include <linux/crc16.h>
41  #include <linux/dax.h>
42  #include <linux/uaccess.h>
43  #include <linux/iversion.h>
44  #include <linux/unicode.h>
45  #include <linux/part_stat.h>
46  #include <linux/kthread.h>
47  #include <linux/freezer.h>
48  #include <linux/fsnotify.h>
49  #include <linux/fs_context.h>
50  #include <linux/fs_parser.h>
51  
52  #include "ext4.h"
53  #include "ext4_extents.h"	/* Needed for trace points definition */
54  #include "ext4_jbd2.h"
55  #include "xattr.h"
56  #include "acl.h"
57  #include "mballoc.h"
58  #include "fsmap.h"
59  
60  #define CREATE_TRACE_POINTS
61  #include <trace/events/ext4.h>
62  
63  static struct ext4_lazy_init *ext4_li_info;
64  static DEFINE_MUTEX(ext4_li_mtx);
65  static struct ratelimit_state ext4_mount_msg_ratelimit;
66  
67  static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
68  			     unsigned long journal_devnum);
69  static int ext4_show_options(struct seq_file *seq, struct dentry *root);
70  static void ext4_update_super(struct super_block *sb);
71  static int ext4_commit_super(struct super_block *sb);
72  static int ext4_mark_recovery_complete(struct super_block *sb,
73  					struct ext4_super_block *es);
74  static int ext4_clear_journal_err(struct super_block *sb,
75  				  struct ext4_super_block *es);
76  static int ext4_sync_fs(struct super_block *sb, int wait);
77  static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
78  static int ext4_unfreeze(struct super_block *sb);
79  static int ext4_freeze(struct super_block *sb);
80  static inline int ext2_feature_set_ok(struct super_block *sb);
81  static inline int ext3_feature_set_ok(struct super_block *sb);
82  static void ext4_destroy_lazyinit_thread(void);
83  static void ext4_unregister_li_request(struct super_block *sb);
84  static void ext4_clear_request_list(void);
85  static struct inode *ext4_get_journal_inode(struct super_block *sb,
86  					    unsigned int journal_inum);
87  static int ext4_validate_options(struct fs_context *fc);
88  static int ext4_check_opt_consistency(struct fs_context *fc,
89  				      struct super_block *sb);
90  static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
91  static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
92  static int ext4_get_tree(struct fs_context *fc);
93  static int ext4_reconfigure(struct fs_context *fc);
94  static void ext4_fc_free(struct fs_context *fc);
95  static int ext4_init_fs_context(struct fs_context *fc);
96  static void ext4_kill_sb(struct super_block *sb);
97  static const struct fs_parameter_spec ext4_param_specs[];
98  
99  /*
100   * Lock ordering
101   *
102   * page fault path:
103   * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
104   *   -> page lock -> i_data_sem (rw)
105   *
106   * buffered write path:
107   * sb_start_write -> i_mutex -> mmap_lock
108   * sb_start_write -> i_mutex -> transaction start -> page lock ->
109   *   i_data_sem (rw)
110   *
111   * truncate:
112   * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
113   *   page lock
114   * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
115   *   i_data_sem (rw)
116   *
117   * direct IO:
118   * sb_start_write -> i_mutex -> mmap_lock
119   * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
120   *
121   * writepages:
122   * transaction start -> page lock(s) -> i_data_sem (rw)
123   */
124  
125  static const struct fs_context_operations ext4_context_ops = {
126  	.parse_param	= ext4_parse_param,
127  	.get_tree	= ext4_get_tree,
128  	.reconfigure	= ext4_reconfigure,
129  	.free		= ext4_fc_free,
130  };
131  
132  
133  #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
134  static struct file_system_type ext2_fs_type = {
135  	.owner			= THIS_MODULE,
136  	.name			= "ext2",
137  	.init_fs_context	= ext4_init_fs_context,
138  	.parameters		= ext4_param_specs,
139  	.kill_sb		= ext4_kill_sb,
140  	.fs_flags		= FS_REQUIRES_DEV,
141  };
142  MODULE_ALIAS_FS("ext2");
143  MODULE_ALIAS("ext2");
144  #define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
145  #else
146  #define IS_EXT2_SB(sb) (0)
147  #endif
148  
149  
150  static struct file_system_type ext3_fs_type = {
151  	.owner			= THIS_MODULE,
152  	.name			= "ext3",
153  	.init_fs_context	= ext4_init_fs_context,
154  	.parameters		= ext4_param_specs,
155  	.kill_sb		= ext4_kill_sb,
156  	.fs_flags		= FS_REQUIRES_DEV,
157  };
158  MODULE_ALIAS_FS("ext3");
159  MODULE_ALIAS("ext3");
160  #define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
161  
162  
__ext4_read_bh(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)163  static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
164  				  bh_end_io_t *end_io)
165  {
166  	/*
167  	 * buffer's verified bit is no longer valid after reading from
168  	 * disk again due to write out error, clear it to make sure we
169  	 * recheck the buffer contents.
170  	 */
171  	clear_buffer_verified(bh);
172  
173  	bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
174  	get_bh(bh);
175  	submit_bh(REQ_OP_READ | op_flags, bh);
176  }
177  
ext4_read_bh_nowait(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)178  void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
179  			 bh_end_io_t *end_io)
180  {
181  	BUG_ON(!buffer_locked(bh));
182  
183  	if (ext4_buffer_uptodate(bh)) {
184  		unlock_buffer(bh);
185  		return;
186  	}
187  	__ext4_read_bh(bh, op_flags, end_io);
188  }
189  
ext4_read_bh(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)190  int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
191  {
192  	BUG_ON(!buffer_locked(bh));
193  
194  	if (ext4_buffer_uptodate(bh)) {
195  		unlock_buffer(bh);
196  		return 0;
197  	}
198  
199  	__ext4_read_bh(bh, op_flags, end_io);
200  
201  	wait_on_buffer(bh);
202  	if (buffer_uptodate(bh))
203  		return 0;
204  	return -EIO;
205  }
206  
ext4_read_bh_lock(struct buffer_head * bh,blk_opf_t op_flags,bool wait)207  int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
208  {
209  	lock_buffer(bh);
210  	if (!wait) {
211  		ext4_read_bh_nowait(bh, op_flags, NULL);
212  		return 0;
213  	}
214  	return ext4_read_bh(bh, op_flags, NULL);
215  }
216  
217  /*
218   * This works like __bread_gfp() except it uses ERR_PTR for error
219   * returns.  Currently with sb_bread it's impossible to distinguish
220   * between ENOMEM and EIO situations (since both result in a NULL
221   * return.
222   */
__ext4_sb_bread_gfp(struct super_block * sb,sector_t block,blk_opf_t op_flags,gfp_t gfp)223  static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
224  					       sector_t block,
225  					       blk_opf_t op_flags, gfp_t gfp)
226  {
227  	struct buffer_head *bh;
228  	int ret;
229  
230  	bh = sb_getblk_gfp(sb, block, gfp);
231  	if (bh == NULL)
232  		return ERR_PTR(-ENOMEM);
233  	if (ext4_buffer_uptodate(bh))
234  		return bh;
235  
236  	ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
237  	if (ret) {
238  		put_bh(bh);
239  		return ERR_PTR(ret);
240  	}
241  	return bh;
242  }
243  
ext4_sb_bread(struct super_block * sb,sector_t block,blk_opf_t op_flags)244  struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
245  				   blk_opf_t op_flags)
246  {
247  	gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
248  			~__GFP_FS) | __GFP_MOVABLE;
249  
250  	return __ext4_sb_bread_gfp(sb, block, op_flags, gfp);
251  }
252  
ext4_sb_bread_unmovable(struct super_block * sb,sector_t block)253  struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
254  					    sector_t block)
255  {
256  	gfp_t gfp = mapping_gfp_constraint(sb->s_bdev->bd_mapping,
257  			~__GFP_FS);
258  
259  	return __ext4_sb_bread_gfp(sb, block, 0, gfp);
260  }
261  
ext4_sb_breadahead_unmovable(struct super_block * sb,sector_t block)262  void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
263  {
264  	struct buffer_head *bh = bdev_getblk(sb->s_bdev, block,
265  			sb->s_blocksize, GFP_NOWAIT | __GFP_NOWARN);
266  
267  	if (likely(bh)) {
268  		if (trylock_buffer(bh))
269  			ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
270  		brelse(bh);
271  	}
272  }
273  
ext4_verify_csum_type(struct super_block * sb,struct ext4_super_block * es)274  static int ext4_verify_csum_type(struct super_block *sb,
275  				 struct ext4_super_block *es)
276  {
277  	if (!ext4_has_feature_metadata_csum(sb))
278  		return 1;
279  
280  	return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
281  }
282  
ext4_superblock_csum(struct super_block * sb,struct ext4_super_block * es)283  __le32 ext4_superblock_csum(struct super_block *sb,
284  			    struct ext4_super_block *es)
285  {
286  	struct ext4_sb_info *sbi = EXT4_SB(sb);
287  	int offset = offsetof(struct ext4_super_block, s_checksum);
288  	__u32 csum;
289  
290  	csum = ext4_chksum(sbi, ~0, (char *)es, offset);
291  
292  	return cpu_to_le32(csum);
293  }
294  
ext4_superblock_csum_verify(struct super_block * sb,struct ext4_super_block * es)295  static int ext4_superblock_csum_verify(struct super_block *sb,
296  				       struct ext4_super_block *es)
297  {
298  	if (!ext4_has_metadata_csum(sb))
299  		return 1;
300  
301  	return es->s_checksum == ext4_superblock_csum(sb, es);
302  }
303  
ext4_superblock_csum_set(struct super_block * sb)304  void ext4_superblock_csum_set(struct super_block *sb)
305  {
306  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
307  
308  	if (!ext4_has_metadata_csum(sb))
309  		return;
310  
311  	es->s_checksum = ext4_superblock_csum(sb, es);
312  }
313  
ext4_block_bitmap(struct super_block * sb,struct ext4_group_desc * bg)314  ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
315  			       struct ext4_group_desc *bg)
316  {
317  	return le32_to_cpu(bg->bg_block_bitmap_lo) |
318  		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
319  		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
320  }
321  
ext4_inode_bitmap(struct super_block * sb,struct ext4_group_desc * bg)322  ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
323  			       struct ext4_group_desc *bg)
324  {
325  	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
326  		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
327  		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
328  }
329  
ext4_inode_table(struct super_block * sb,struct ext4_group_desc * bg)330  ext4_fsblk_t ext4_inode_table(struct super_block *sb,
331  			      struct ext4_group_desc *bg)
332  {
333  	return le32_to_cpu(bg->bg_inode_table_lo) |
334  		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
335  		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
336  }
337  
ext4_free_group_clusters(struct super_block * sb,struct ext4_group_desc * bg)338  __u32 ext4_free_group_clusters(struct super_block *sb,
339  			       struct ext4_group_desc *bg)
340  {
341  	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
342  		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
343  		 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
344  }
345  
ext4_free_inodes_count(struct super_block * sb,struct ext4_group_desc * bg)346  __u32 ext4_free_inodes_count(struct super_block *sb,
347  			      struct ext4_group_desc *bg)
348  {
349  	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
350  		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
351  		 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
352  }
353  
ext4_used_dirs_count(struct super_block * sb,struct ext4_group_desc * bg)354  __u32 ext4_used_dirs_count(struct super_block *sb,
355  			      struct ext4_group_desc *bg)
356  {
357  	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
358  		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
359  		 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
360  }
361  
ext4_itable_unused_count(struct super_block * sb,struct ext4_group_desc * bg)362  __u32 ext4_itable_unused_count(struct super_block *sb,
363  			      struct ext4_group_desc *bg)
364  {
365  	return le16_to_cpu(bg->bg_itable_unused_lo) |
366  		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
367  		 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
368  }
369  
ext4_block_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)370  void ext4_block_bitmap_set(struct super_block *sb,
371  			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
372  {
373  	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
374  	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
375  		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
376  }
377  
ext4_inode_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)378  void ext4_inode_bitmap_set(struct super_block *sb,
379  			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
380  {
381  	bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
382  	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
383  		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
384  }
385  
ext4_inode_table_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)386  void ext4_inode_table_set(struct super_block *sb,
387  			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
388  {
389  	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
390  	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
391  		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
392  }
393  
ext4_free_group_clusters_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)394  void ext4_free_group_clusters_set(struct super_block *sb,
395  				  struct ext4_group_desc *bg, __u32 count)
396  {
397  	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
398  	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
399  		bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
400  }
401  
ext4_free_inodes_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)402  void ext4_free_inodes_set(struct super_block *sb,
403  			  struct ext4_group_desc *bg, __u32 count)
404  {
405  	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
406  	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
407  		bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
408  }
409  
ext4_used_dirs_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)410  void ext4_used_dirs_set(struct super_block *sb,
411  			  struct ext4_group_desc *bg, __u32 count)
412  {
413  	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
414  	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
415  		bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
416  }
417  
ext4_itable_unused_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)418  void ext4_itable_unused_set(struct super_block *sb,
419  			  struct ext4_group_desc *bg, __u32 count)
420  {
421  	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
422  	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
423  		bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
424  }
425  
__ext4_update_tstamp(__le32 * lo,__u8 * hi,time64_t now)426  static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
427  {
428  	now = clamp_val(now, 0, (1ull << 40) - 1);
429  
430  	*lo = cpu_to_le32(lower_32_bits(now));
431  	*hi = upper_32_bits(now);
432  }
433  
__ext4_get_tstamp(__le32 * lo,__u8 * hi)434  static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
435  {
436  	return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
437  }
438  #define ext4_update_tstamp(es, tstamp) \
439  	__ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
440  			     ktime_get_real_seconds())
441  #define ext4_get_tstamp(es, tstamp) \
442  	__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
443  
444  #define EXT4_SB_REFRESH_INTERVAL_SEC (3600) /* seconds (1 hour) */
445  #define EXT4_SB_REFRESH_INTERVAL_KB (16384) /* kilobytes (16MB) */
446  
447  /*
448   * The ext4_maybe_update_superblock() function checks and updates the
449   * superblock if needed.
450   *
451   * This function is designed to update the on-disk superblock only under
452   * certain conditions to prevent excessive disk writes and unnecessary
453   * waking of the disk from sleep. The superblock will be updated if:
454   * 1. More than an hour has passed since the last superblock update, and
455   * 2. More than 16MB have been written since the last superblock update.
456   *
457   * @sb: The superblock
458   */
ext4_maybe_update_superblock(struct super_block * sb)459  static void ext4_maybe_update_superblock(struct super_block *sb)
460  {
461  	struct ext4_sb_info *sbi = EXT4_SB(sb);
462  	struct ext4_super_block *es = sbi->s_es;
463  	journal_t *journal = sbi->s_journal;
464  	time64_t now;
465  	__u64 last_update;
466  	__u64 lifetime_write_kbytes;
467  	__u64 diff_size;
468  
469  	if (sb_rdonly(sb) || !(sb->s_flags & SB_ACTIVE) ||
470  	    !journal || (journal->j_flags & JBD2_UNMOUNT))
471  		return;
472  
473  	now = ktime_get_real_seconds();
474  	last_update = ext4_get_tstamp(es, s_wtime);
475  
476  	if (likely(now - last_update < EXT4_SB_REFRESH_INTERVAL_SEC))
477  		return;
478  
479  	lifetime_write_kbytes = sbi->s_kbytes_written +
480  		((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
481  		  sbi->s_sectors_written_start) >> 1);
482  
483  	/* Get the number of kilobytes not written to disk to account
484  	 * for statistics and compare with a multiple of 16 MB. This
485  	 * is used to determine when the next superblock commit should
486  	 * occur (i.e. not more often than once per 16MB if there was
487  	 * less written in an hour).
488  	 */
489  	diff_size = lifetime_write_kbytes - le64_to_cpu(es->s_kbytes_written);
490  
491  	if (diff_size > EXT4_SB_REFRESH_INTERVAL_KB)
492  		schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
493  }
494  
ext4_journal_commit_callback(journal_t * journal,transaction_t * txn)495  static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
496  {
497  	struct super_block		*sb = journal->j_private;
498  	struct ext4_sb_info		*sbi = EXT4_SB(sb);
499  	int				error = is_journal_aborted(journal);
500  	struct ext4_journal_cb_entry	*jce;
501  
502  	BUG_ON(txn->t_state == T_FINISHED);
503  
504  	ext4_process_freed_data(sb, txn->t_tid);
505  	ext4_maybe_update_superblock(sb);
506  
507  	spin_lock(&sbi->s_md_lock);
508  	while (!list_empty(&txn->t_private_list)) {
509  		jce = list_entry(txn->t_private_list.next,
510  				 struct ext4_journal_cb_entry, jce_list);
511  		list_del_init(&jce->jce_list);
512  		spin_unlock(&sbi->s_md_lock);
513  		jce->jce_func(sb, jce, error);
514  		spin_lock(&sbi->s_md_lock);
515  	}
516  	spin_unlock(&sbi->s_md_lock);
517  }
518  
519  /*
520   * This writepage callback for write_cache_pages()
521   * takes care of a few cases after page cleaning.
522   *
523   * write_cache_pages() already checks for dirty pages
524   * and calls clear_page_dirty_for_io(), which we want,
525   * to write protect the pages.
526   *
527   * However, we may have to redirty a page (see below.)
528   */
ext4_journalled_writepage_callback(struct folio * folio,struct writeback_control * wbc,void * data)529  static int ext4_journalled_writepage_callback(struct folio *folio,
530  					      struct writeback_control *wbc,
531  					      void *data)
532  {
533  	transaction_t *transaction = (transaction_t *) data;
534  	struct buffer_head *bh, *head;
535  	struct journal_head *jh;
536  
537  	bh = head = folio_buffers(folio);
538  	do {
539  		/*
540  		 * We have to redirty a page in these cases:
541  		 * 1) If buffer is dirty, it means the page was dirty because it
542  		 * contains a buffer that needs checkpointing. So the dirty bit
543  		 * needs to be preserved so that checkpointing writes the buffer
544  		 * properly.
545  		 * 2) If buffer is not part of the committing transaction
546  		 * (we may have just accidentally come across this buffer because
547  		 * inode range tracking is not exact) or if the currently running
548  		 * transaction already contains this buffer as well, dirty bit
549  		 * needs to be preserved so that the buffer gets writeprotected
550  		 * properly on running transaction's commit.
551  		 */
552  		jh = bh2jh(bh);
553  		if (buffer_dirty(bh) ||
554  		    (jh && (jh->b_transaction != transaction ||
555  			    jh->b_next_transaction))) {
556  			folio_redirty_for_writepage(wbc, folio);
557  			goto out;
558  		}
559  	} while ((bh = bh->b_this_page) != head);
560  
561  out:
562  	return AOP_WRITEPAGE_ACTIVATE;
563  }
564  
ext4_journalled_submit_inode_data_buffers(struct jbd2_inode * jinode)565  static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
566  {
567  	struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
568  	struct writeback_control wbc = {
569  		.sync_mode =  WB_SYNC_ALL,
570  		.nr_to_write = LONG_MAX,
571  		.range_start = jinode->i_dirty_start,
572  		.range_end = jinode->i_dirty_end,
573          };
574  
575  	return write_cache_pages(mapping, &wbc,
576  				 ext4_journalled_writepage_callback,
577  				 jinode->i_transaction);
578  }
579  
ext4_journal_submit_inode_data_buffers(struct jbd2_inode * jinode)580  static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
581  {
582  	int ret;
583  
584  	if (ext4_should_journal_data(jinode->i_vfs_inode))
585  		ret = ext4_journalled_submit_inode_data_buffers(jinode);
586  	else
587  		ret = ext4_normal_submit_inode_data_buffers(jinode);
588  	return ret;
589  }
590  
ext4_journal_finish_inode_data_buffers(struct jbd2_inode * jinode)591  static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
592  {
593  	int ret = 0;
594  
595  	if (!ext4_should_journal_data(jinode->i_vfs_inode))
596  		ret = jbd2_journal_finish_inode_data_buffers(jinode);
597  
598  	return ret;
599  }
600  
system_going_down(void)601  static bool system_going_down(void)
602  {
603  	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
604  		|| system_state == SYSTEM_RESTART;
605  }
606  
607  struct ext4_err_translation {
608  	int code;
609  	int errno;
610  };
611  
612  #define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
613  
614  static struct ext4_err_translation err_translation[] = {
615  	EXT4_ERR_TRANSLATE(EIO),
616  	EXT4_ERR_TRANSLATE(ENOMEM),
617  	EXT4_ERR_TRANSLATE(EFSBADCRC),
618  	EXT4_ERR_TRANSLATE(EFSCORRUPTED),
619  	EXT4_ERR_TRANSLATE(ENOSPC),
620  	EXT4_ERR_TRANSLATE(ENOKEY),
621  	EXT4_ERR_TRANSLATE(EROFS),
622  	EXT4_ERR_TRANSLATE(EFBIG),
623  	EXT4_ERR_TRANSLATE(EEXIST),
624  	EXT4_ERR_TRANSLATE(ERANGE),
625  	EXT4_ERR_TRANSLATE(EOVERFLOW),
626  	EXT4_ERR_TRANSLATE(EBUSY),
627  	EXT4_ERR_TRANSLATE(ENOTDIR),
628  	EXT4_ERR_TRANSLATE(ENOTEMPTY),
629  	EXT4_ERR_TRANSLATE(ESHUTDOWN),
630  	EXT4_ERR_TRANSLATE(EFAULT),
631  };
632  
ext4_errno_to_code(int errno)633  static int ext4_errno_to_code(int errno)
634  {
635  	int i;
636  
637  	for (i = 0; i < ARRAY_SIZE(err_translation); i++)
638  		if (err_translation[i].errno == errno)
639  			return err_translation[i].code;
640  	return EXT4_ERR_UNKNOWN;
641  }
642  
save_error_info(struct super_block * sb,int error,__u32 ino,__u64 block,const char * func,unsigned int line)643  static void save_error_info(struct super_block *sb, int error,
644  			    __u32 ino, __u64 block,
645  			    const char *func, unsigned int line)
646  {
647  	struct ext4_sb_info *sbi = EXT4_SB(sb);
648  
649  	/* We default to EFSCORRUPTED error... */
650  	if (error == 0)
651  		error = EFSCORRUPTED;
652  
653  	spin_lock(&sbi->s_error_lock);
654  	sbi->s_add_error_count++;
655  	sbi->s_last_error_code = error;
656  	sbi->s_last_error_line = line;
657  	sbi->s_last_error_ino = ino;
658  	sbi->s_last_error_block = block;
659  	sbi->s_last_error_func = func;
660  	sbi->s_last_error_time = ktime_get_real_seconds();
661  	if (!sbi->s_first_error_time) {
662  		sbi->s_first_error_code = error;
663  		sbi->s_first_error_line = line;
664  		sbi->s_first_error_ino = ino;
665  		sbi->s_first_error_block = block;
666  		sbi->s_first_error_func = func;
667  		sbi->s_first_error_time = sbi->s_last_error_time;
668  	}
669  	spin_unlock(&sbi->s_error_lock);
670  }
671  
672  /* Deal with the reporting of failure conditions on a filesystem such as
673   * inconsistencies detected or read IO failures.
674   *
675   * On ext2, we can store the error state of the filesystem in the
676   * superblock.  That is not possible on ext4, because we may have other
677   * write ordering constraints on the superblock which prevent us from
678   * writing it out straight away; and given that the journal is about to
679   * be aborted, we can't rely on the current, or future, transactions to
680   * write out the superblock safely.
681   *
682   * We'll just use the jbd2_journal_abort() error code to record an error in
683   * the journal instead.  On recovery, the journal will complain about
684   * that error until we've noted it down and cleared it.
685   *
686   * If force_ro is set, we unconditionally force the filesystem into an
687   * ABORT|READONLY state, unless the error response on the fs has been set to
688   * panic in which case we take the easy way out and panic immediately. This is
689   * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
690   * at a critical moment in log management.
691   */
ext4_handle_error(struct super_block * sb,bool force_ro,int error,__u32 ino,__u64 block,const char * func,unsigned int line)692  static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
693  			      __u32 ino, __u64 block,
694  			      const char *func, unsigned int line)
695  {
696  	journal_t *journal = EXT4_SB(sb)->s_journal;
697  	bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
698  
699  	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
700  	if (test_opt(sb, WARN_ON_ERROR))
701  		WARN_ON_ONCE(1);
702  
703  	if (!continue_fs && !sb_rdonly(sb)) {
704  		set_bit(EXT4_FLAGS_SHUTDOWN, &EXT4_SB(sb)->s_ext4_flags);
705  		if (journal)
706  			jbd2_journal_abort(journal, -EIO);
707  	}
708  
709  	if (!bdev_read_only(sb->s_bdev)) {
710  		save_error_info(sb, error, ino, block, func, line);
711  		/*
712  		 * In case the fs should keep running, we need to writeout
713  		 * superblock through the journal. Due to lock ordering
714  		 * constraints, it may not be safe to do it right here so we
715  		 * defer superblock flushing to a workqueue.
716  		 */
717  		if (continue_fs && journal)
718  			schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
719  		else
720  			ext4_commit_super(sb);
721  	}
722  
723  	/*
724  	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
725  	 * could panic during 'reboot -f' as the underlying device got already
726  	 * disabled.
727  	 */
728  	if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
729  		panic("EXT4-fs (device %s): panic forced after error\n",
730  			sb->s_id);
731  	}
732  
733  	if (sb_rdonly(sb) || continue_fs)
734  		return;
735  
736  	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
737  	/*
738  	 * EXT4_FLAGS_SHUTDOWN was set which stops all filesystem
739  	 * modifications. We don't set SB_RDONLY because that requires
740  	 * sb->s_umount semaphore and setting it without proper remount
741  	 * procedure is confusing code such as freeze_super() leading to
742  	 * deadlocks and other problems.
743  	 */
744  }
745  
update_super_work(struct work_struct * work)746  static void update_super_work(struct work_struct *work)
747  {
748  	struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
749  						s_sb_upd_work);
750  	journal_t *journal = sbi->s_journal;
751  	handle_t *handle;
752  
753  	/*
754  	 * If the journal is still running, we have to write out superblock
755  	 * through the journal to avoid collisions of other journalled sb
756  	 * updates.
757  	 *
758  	 * We use directly jbd2 functions here to avoid recursing back into
759  	 * ext4 error handling code during handling of previous errors.
760  	 */
761  	if (!sb_rdonly(sbi->s_sb) && journal) {
762  		struct buffer_head *sbh = sbi->s_sbh;
763  		bool call_notify_err = false;
764  
765  		handle = jbd2_journal_start(journal, 1);
766  		if (IS_ERR(handle))
767  			goto write_directly;
768  		if (jbd2_journal_get_write_access(handle, sbh)) {
769  			jbd2_journal_stop(handle);
770  			goto write_directly;
771  		}
772  
773  		if (sbi->s_add_error_count > 0)
774  			call_notify_err = true;
775  
776  		ext4_update_super(sbi->s_sb);
777  		if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
778  			ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
779  				 "superblock detected");
780  			clear_buffer_write_io_error(sbh);
781  			set_buffer_uptodate(sbh);
782  		}
783  
784  		if (jbd2_journal_dirty_metadata(handle, sbh)) {
785  			jbd2_journal_stop(handle);
786  			goto write_directly;
787  		}
788  		jbd2_journal_stop(handle);
789  
790  		if (call_notify_err)
791  			ext4_notify_error_sysfs(sbi);
792  
793  		return;
794  	}
795  write_directly:
796  	/*
797  	 * Write through journal failed. Write sb directly to get error info
798  	 * out and hope for the best.
799  	 */
800  	ext4_commit_super(sbi->s_sb);
801  	ext4_notify_error_sysfs(sbi);
802  }
803  
804  #define ext4_error_ratelimit(sb)					\
805  		___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),	\
806  			     "EXT4-fs error")
807  
__ext4_error(struct super_block * sb,const char * function,unsigned int line,bool force_ro,int error,__u64 block,const char * fmt,...)808  void __ext4_error(struct super_block *sb, const char *function,
809  		  unsigned int line, bool force_ro, int error, __u64 block,
810  		  const char *fmt, ...)
811  {
812  	struct va_format vaf;
813  	va_list args;
814  
815  	if (unlikely(ext4_forced_shutdown(sb)))
816  		return;
817  
818  	trace_ext4_error(sb, function, line);
819  	if (ext4_error_ratelimit(sb)) {
820  		va_start(args, fmt);
821  		vaf.fmt = fmt;
822  		vaf.va = &args;
823  		printk(KERN_CRIT
824  		       "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
825  		       sb->s_id, function, line, current->comm, &vaf);
826  		va_end(args);
827  	}
828  	fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
829  
830  	ext4_handle_error(sb, force_ro, error, 0, block, function, line);
831  }
832  
__ext4_error_inode(struct inode * inode,const char * function,unsigned int line,ext4_fsblk_t block,int error,const char * fmt,...)833  void __ext4_error_inode(struct inode *inode, const char *function,
834  			unsigned int line, ext4_fsblk_t block, int error,
835  			const char *fmt, ...)
836  {
837  	va_list args;
838  	struct va_format vaf;
839  
840  	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
841  		return;
842  
843  	trace_ext4_error(inode->i_sb, function, line);
844  	if (ext4_error_ratelimit(inode->i_sb)) {
845  		va_start(args, fmt);
846  		vaf.fmt = fmt;
847  		vaf.va = &args;
848  		if (block)
849  			printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
850  			       "inode #%lu: block %llu: comm %s: %pV\n",
851  			       inode->i_sb->s_id, function, line, inode->i_ino,
852  			       block, current->comm, &vaf);
853  		else
854  			printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
855  			       "inode #%lu: comm %s: %pV\n",
856  			       inode->i_sb->s_id, function, line, inode->i_ino,
857  			       current->comm, &vaf);
858  		va_end(args);
859  	}
860  	fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
861  
862  	ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
863  			  function, line);
864  }
865  
__ext4_error_file(struct file * file,const char * function,unsigned int line,ext4_fsblk_t block,const char * fmt,...)866  void __ext4_error_file(struct file *file, const char *function,
867  		       unsigned int line, ext4_fsblk_t block,
868  		       const char *fmt, ...)
869  {
870  	va_list args;
871  	struct va_format vaf;
872  	struct inode *inode = file_inode(file);
873  	char pathname[80], *path;
874  
875  	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
876  		return;
877  
878  	trace_ext4_error(inode->i_sb, function, line);
879  	if (ext4_error_ratelimit(inode->i_sb)) {
880  		path = file_path(file, pathname, sizeof(pathname));
881  		if (IS_ERR(path))
882  			path = "(unknown)";
883  		va_start(args, fmt);
884  		vaf.fmt = fmt;
885  		vaf.va = &args;
886  		if (block)
887  			printk(KERN_CRIT
888  			       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
889  			       "block %llu: comm %s: path %s: %pV\n",
890  			       inode->i_sb->s_id, function, line, inode->i_ino,
891  			       block, current->comm, path, &vaf);
892  		else
893  			printk(KERN_CRIT
894  			       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
895  			       "comm %s: path %s: %pV\n",
896  			       inode->i_sb->s_id, function, line, inode->i_ino,
897  			       current->comm, path, &vaf);
898  		va_end(args);
899  	}
900  	fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
901  
902  	ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
903  			  function, line);
904  }
905  
ext4_decode_error(struct super_block * sb,int errno,char nbuf[16])906  const char *ext4_decode_error(struct super_block *sb, int errno,
907  			      char nbuf[16])
908  {
909  	char *errstr = NULL;
910  
911  	switch (errno) {
912  	case -EFSCORRUPTED:
913  		errstr = "Corrupt filesystem";
914  		break;
915  	case -EFSBADCRC:
916  		errstr = "Filesystem failed CRC";
917  		break;
918  	case -EIO:
919  		errstr = "IO failure";
920  		break;
921  	case -ENOMEM:
922  		errstr = "Out of memory";
923  		break;
924  	case -EROFS:
925  		if (!sb || (EXT4_SB(sb)->s_journal &&
926  			    EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
927  			errstr = "Journal has aborted";
928  		else
929  			errstr = "Readonly filesystem";
930  		break;
931  	default:
932  		/* If the caller passed in an extra buffer for unknown
933  		 * errors, textualise them now.  Else we just return
934  		 * NULL. */
935  		if (nbuf) {
936  			/* Check for truncated error codes... */
937  			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
938  				errstr = nbuf;
939  		}
940  		break;
941  	}
942  
943  	return errstr;
944  }
945  
946  /* __ext4_std_error decodes expected errors from journaling functions
947   * automatically and invokes the appropriate error response.  */
948  
__ext4_std_error(struct super_block * sb,const char * function,unsigned int line,int errno)949  void __ext4_std_error(struct super_block *sb, const char *function,
950  		      unsigned int line, int errno)
951  {
952  	char nbuf[16];
953  	const char *errstr;
954  
955  	if (unlikely(ext4_forced_shutdown(sb)))
956  		return;
957  
958  	/* Special case: if the error is EROFS, and we're not already
959  	 * inside a transaction, then there's really no point in logging
960  	 * an error. */
961  	if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
962  		return;
963  
964  	if (ext4_error_ratelimit(sb)) {
965  		errstr = ext4_decode_error(sb, errno, nbuf);
966  		printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
967  		       sb->s_id, function, line, errstr);
968  	}
969  	fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
970  
971  	ext4_handle_error(sb, false, -errno, 0, 0, function, line);
972  }
973  
__ext4_msg(struct super_block * sb,const char * prefix,const char * fmt,...)974  void __ext4_msg(struct super_block *sb,
975  		const char *prefix, const char *fmt, ...)
976  {
977  	struct va_format vaf;
978  	va_list args;
979  
980  	if (sb) {
981  		atomic_inc(&EXT4_SB(sb)->s_msg_count);
982  		if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
983  				  "EXT4-fs"))
984  			return;
985  	}
986  
987  	va_start(args, fmt);
988  	vaf.fmt = fmt;
989  	vaf.va = &args;
990  	if (sb)
991  		printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
992  	else
993  		printk("%sEXT4-fs: %pV\n", prefix, &vaf);
994  	va_end(args);
995  }
996  
ext4_warning_ratelimit(struct super_block * sb)997  static int ext4_warning_ratelimit(struct super_block *sb)
998  {
999  	atomic_inc(&EXT4_SB(sb)->s_warning_count);
1000  	return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
1001  			    "EXT4-fs warning");
1002  }
1003  
__ext4_warning(struct super_block * sb,const char * function,unsigned int line,const char * fmt,...)1004  void __ext4_warning(struct super_block *sb, const char *function,
1005  		    unsigned int line, const char *fmt, ...)
1006  {
1007  	struct va_format vaf;
1008  	va_list args;
1009  
1010  	if (!ext4_warning_ratelimit(sb))
1011  		return;
1012  
1013  	va_start(args, fmt);
1014  	vaf.fmt = fmt;
1015  	vaf.va = &args;
1016  	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
1017  	       sb->s_id, function, line, &vaf);
1018  	va_end(args);
1019  }
1020  
__ext4_warning_inode(const struct inode * inode,const char * function,unsigned int line,const char * fmt,...)1021  void __ext4_warning_inode(const struct inode *inode, const char *function,
1022  			  unsigned int line, const char *fmt, ...)
1023  {
1024  	struct va_format vaf;
1025  	va_list args;
1026  
1027  	if (!ext4_warning_ratelimit(inode->i_sb))
1028  		return;
1029  
1030  	va_start(args, fmt);
1031  	vaf.fmt = fmt;
1032  	vaf.va = &args;
1033  	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
1034  	       "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
1035  	       function, line, inode->i_ino, current->comm, &vaf);
1036  	va_end(args);
1037  }
1038  
__ext4_grp_locked_error(const char * function,unsigned int line,struct super_block * sb,ext4_group_t grp,unsigned long ino,ext4_fsblk_t block,const char * fmt,...)1039  void __ext4_grp_locked_error(const char *function, unsigned int line,
1040  			     struct super_block *sb, ext4_group_t grp,
1041  			     unsigned long ino, ext4_fsblk_t block,
1042  			     const char *fmt, ...)
1043  __releases(bitlock)
1044  __acquires(bitlock)
1045  {
1046  	struct va_format vaf;
1047  	va_list args;
1048  
1049  	if (unlikely(ext4_forced_shutdown(sb)))
1050  		return;
1051  
1052  	trace_ext4_error(sb, function, line);
1053  	if (ext4_error_ratelimit(sb)) {
1054  		va_start(args, fmt);
1055  		vaf.fmt = fmt;
1056  		vaf.va = &args;
1057  		printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
1058  		       sb->s_id, function, line, grp);
1059  		if (ino)
1060  			printk(KERN_CONT "inode %lu: ", ino);
1061  		if (block)
1062  			printk(KERN_CONT "block %llu:",
1063  			       (unsigned long long) block);
1064  		printk(KERN_CONT "%pV\n", &vaf);
1065  		va_end(args);
1066  	}
1067  
1068  	if (test_opt(sb, ERRORS_CONT)) {
1069  		if (test_opt(sb, WARN_ON_ERROR))
1070  			WARN_ON_ONCE(1);
1071  		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
1072  		if (!bdev_read_only(sb->s_bdev)) {
1073  			save_error_info(sb, EFSCORRUPTED, ino, block, function,
1074  					line);
1075  			schedule_work(&EXT4_SB(sb)->s_sb_upd_work);
1076  		}
1077  		return;
1078  	}
1079  	ext4_unlock_group(sb, grp);
1080  	ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
1081  	/*
1082  	 * We only get here in the ERRORS_RO case; relocking the group
1083  	 * may be dangerous, but nothing bad will happen since the
1084  	 * filesystem will have already been marked read/only and the
1085  	 * journal has been aborted.  We return 1 as a hint to callers
1086  	 * who might what to use the return value from
1087  	 * ext4_grp_locked_error() to distinguish between the
1088  	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1089  	 * aggressively from the ext4 function in question, with a
1090  	 * more appropriate error code.
1091  	 */
1092  	ext4_lock_group(sb, grp);
1093  	return;
1094  }
1095  
ext4_mark_group_bitmap_corrupted(struct super_block * sb,ext4_group_t group,unsigned int flags)1096  void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1097  				     ext4_group_t group,
1098  				     unsigned int flags)
1099  {
1100  	struct ext4_sb_info *sbi = EXT4_SB(sb);
1101  	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1102  	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
1103  	int ret;
1104  
1105  	if (!grp || !gdp)
1106  		return;
1107  	if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1108  		ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1109  					    &grp->bb_state);
1110  		if (!ret)
1111  			percpu_counter_sub(&sbi->s_freeclusters_counter,
1112  					   grp->bb_free);
1113  	}
1114  
1115  	if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1116  		ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1117  					    &grp->bb_state);
1118  		if (!ret && gdp) {
1119  			int count;
1120  
1121  			count = ext4_free_inodes_count(sb, gdp);
1122  			percpu_counter_sub(&sbi->s_freeinodes_counter,
1123  					   count);
1124  		}
1125  	}
1126  }
1127  
ext4_update_dynamic_rev(struct super_block * sb)1128  void ext4_update_dynamic_rev(struct super_block *sb)
1129  {
1130  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1131  
1132  	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
1133  		return;
1134  
1135  	ext4_warning(sb,
1136  		     "updating to rev %d because of new feature flag, "
1137  		     "running e2fsck is recommended",
1138  		     EXT4_DYNAMIC_REV);
1139  
1140  	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1141  	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1142  	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
1143  	/* leave es->s_feature_*compat flags alone */
1144  	/* es->s_uuid will be set by e2fsck if empty */
1145  
1146  	/*
1147  	 * The rest of the superblock fields should be zero, and if not it
1148  	 * means they are likely already in use, so leave them alone.  We
1149  	 * can leave it up to e2fsck to clean up any inconsistencies there.
1150  	 */
1151  }
1152  
orphan_list_entry(struct list_head * l)1153  static inline struct inode *orphan_list_entry(struct list_head *l)
1154  {
1155  	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
1156  }
1157  
dump_orphan_list(struct super_block * sb,struct ext4_sb_info * sbi)1158  static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
1159  {
1160  	struct list_head *l;
1161  
1162  	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1163  		 le32_to_cpu(sbi->s_es->s_last_orphan));
1164  
1165  	printk(KERN_ERR "sb_info orphan list:\n");
1166  	list_for_each(l, &sbi->s_orphan) {
1167  		struct inode *inode = orphan_list_entry(l);
1168  		printk(KERN_ERR "  "
1169  		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1170  		       inode->i_sb->s_id, inode->i_ino, inode,
1171  		       inode->i_mode, inode->i_nlink,
1172  		       NEXT_ORPHAN(inode));
1173  	}
1174  }
1175  
1176  #ifdef CONFIG_QUOTA
1177  static int ext4_quota_off(struct super_block *sb, int type);
1178  
ext4_quotas_off(struct super_block * sb,int type)1179  static inline void ext4_quotas_off(struct super_block *sb, int type)
1180  {
1181  	BUG_ON(type > EXT4_MAXQUOTAS);
1182  
1183  	/* Use our quota_off function to clear inode flags etc. */
1184  	for (type--; type >= 0; type--)
1185  		ext4_quota_off(sb, type);
1186  }
1187  
1188  /*
1189   * This is a helper function which is used in the mount/remount
1190   * codepaths (which holds s_umount) to fetch the quota file name.
1191   */
get_qf_name(struct super_block * sb,struct ext4_sb_info * sbi,int type)1192  static inline char *get_qf_name(struct super_block *sb,
1193  				struct ext4_sb_info *sbi,
1194  				int type)
1195  {
1196  	return rcu_dereference_protected(sbi->s_qf_names[type],
1197  					 lockdep_is_held(&sb->s_umount));
1198  }
1199  #else
ext4_quotas_off(struct super_block * sb,int type)1200  static inline void ext4_quotas_off(struct super_block *sb, int type)
1201  {
1202  }
1203  #endif
1204  
ext4_percpu_param_init(struct ext4_sb_info * sbi)1205  static int ext4_percpu_param_init(struct ext4_sb_info *sbi)
1206  {
1207  	ext4_fsblk_t block;
1208  	int err;
1209  
1210  	block = ext4_count_free_clusters(sbi->s_sb);
1211  	ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block));
1212  	err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
1213  				  GFP_KERNEL);
1214  	if (!err) {
1215  		unsigned long freei = ext4_count_free_inodes(sbi->s_sb);
1216  		sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
1217  		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
1218  					  GFP_KERNEL);
1219  	}
1220  	if (!err)
1221  		err = percpu_counter_init(&sbi->s_dirs_counter,
1222  					  ext4_count_dirs(sbi->s_sb), GFP_KERNEL);
1223  	if (!err)
1224  		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
1225  					  GFP_KERNEL);
1226  	if (!err)
1227  		err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
1228  					  GFP_KERNEL);
1229  	if (!err)
1230  		err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
1231  
1232  	if (err)
1233  		ext4_msg(sbi->s_sb, KERN_ERR, "insufficient memory");
1234  
1235  	return err;
1236  }
1237  
ext4_percpu_param_destroy(struct ext4_sb_info * sbi)1238  static void ext4_percpu_param_destroy(struct ext4_sb_info *sbi)
1239  {
1240  	percpu_counter_destroy(&sbi->s_freeclusters_counter);
1241  	percpu_counter_destroy(&sbi->s_freeinodes_counter);
1242  	percpu_counter_destroy(&sbi->s_dirs_counter);
1243  	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1244  	percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
1245  	percpu_free_rwsem(&sbi->s_writepages_rwsem);
1246  }
1247  
ext4_group_desc_free(struct ext4_sb_info * sbi)1248  static void ext4_group_desc_free(struct ext4_sb_info *sbi)
1249  {
1250  	struct buffer_head **group_desc;
1251  	int i;
1252  
1253  	rcu_read_lock();
1254  	group_desc = rcu_dereference(sbi->s_group_desc);
1255  	for (i = 0; i < sbi->s_gdb_count; i++)
1256  		brelse(group_desc[i]);
1257  	kvfree(group_desc);
1258  	rcu_read_unlock();
1259  }
1260  
ext4_flex_groups_free(struct ext4_sb_info * sbi)1261  static void ext4_flex_groups_free(struct ext4_sb_info *sbi)
1262  {
1263  	struct flex_groups **flex_groups;
1264  	int i;
1265  
1266  	rcu_read_lock();
1267  	flex_groups = rcu_dereference(sbi->s_flex_groups);
1268  	if (flex_groups) {
1269  		for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1270  			kvfree(flex_groups[i]);
1271  		kvfree(flex_groups);
1272  	}
1273  	rcu_read_unlock();
1274  }
1275  
ext4_put_super(struct super_block * sb)1276  static void ext4_put_super(struct super_block *sb)
1277  {
1278  	struct ext4_sb_info *sbi = EXT4_SB(sb);
1279  	struct ext4_super_block *es = sbi->s_es;
1280  	int aborted = 0;
1281  	int err;
1282  
1283  	/*
1284  	 * Unregister sysfs before destroying jbd2 journal.
1285  	 * Since we could still access attr_journal_task attribute via sysfs
1286  	 * path which could have sbi->s_journal->j_task as NULL
1287  	 * Unregister sysfs before flush sbi->s_sb_upd_work.
1288  	 * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1289  	 * read metadata verify failed then will queue error work.
1290  	 * update_super_work will call start_this_handle may trigger
1291  	 * BUG_ON.
1292  	 */
1293  	ext4_unregister_sysfs(sb);
1294  
1295  	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
1296  		ext4_msg(sb, KERN_INFO, "unmounting filesystem %pU.",
1297  			 &sb->s_uuid);
1298  
1299  	ext4_unregister_li_request(sb);
1300  	ext4_quotas_off(sb, EXT4_MAXQUOTAS);
1301  
1302  	flush_work(&sbi->s_sb_upd_work);
1303  	destroy_workqueue(sbi->rsv_conversion_wq);
1304  	ext4_release_orphan_info(sb);
1305  
1306  	if (sbi->s_journal) {
1307  		aborted = is_journal_aborted(sbi->s_journal);
1308  		err = jbd2_journal_destroy(sbi->s_journal);
1309  		sbi->s_journal = NULL;
1310  		if ((err < 0) && !aborted) {
1311  			ext4_abort(sb, -err, "Couldn't clean up the journal");
1312  		}
1313  	}
1314  
1315  	ext4_es_unregister_shrinker(sbi);
1316  	timer_shutdown_sync(&sbi->s_err_report);
1317  	ext4_release_system_zone(sb);
1318  	ext4_mb_release(sb);
1319  	ext4_ext_release(sb);
1320  
1321  	if (!sb_rdonly(sb) && !aborted) {
1322  		ext4_clear_feature_journal_needs_recovery(sb);
1323  		ext4_clear_feature_orphan_present(sb);
1324  		es->s_state = cpu_to_le16(sbi->s_mount_state);
1325  	}
1326  	if (!sb_rdonly(sb))
1327  		ext4_commit_super(sb);
1328  
1329  	ext4_group_desc_free(sbi);
1330  	ext4_flex_groups_free(sbi);
1331  
1332  	WARN_ON_ONCE(!(sbi->s_mount_state & EXT4_ERROR_FS) &&
1333  		     percpu_counter_sum(&sbi->s_dirtyclusters_counter));
1334  	ext4_percpu_param_destroy(sbi);
1335  #ifdef CONFIG_QUOTA
1336  	for (int i = 0; i < EXT4_MAXQUOTAS; i++)
1337  		kfree(get_qf_name(sb, sbi, i));
1338  #endif
1339  
1340  	/* Debugging code just in case the in-memory inode orphan list
1341  	 * isn't empty.  The on-disk one can be non-empty if we've
1342  	 * detected an error and taken the fs readonly, but the
1343  	 * in-memory list had better be clean by this point. */
1344  	if (!list_empty(&sbi->s_orphan))
1345  		dump_orphan_list(sb, sbi);
1346  	ASSERT(list_empty(&sbi->s_orphan));
1347  
1348  	sync_blockdev(sb->s_bdev);
1349  	invalidate_bdev(sb->s_bdev);
1350  	if (sbi->s_journal_bdev_file) {
1351  		/*
1352  		 * Invalidate the journal device's buffers.  We don't want them
1353  		 * floating about in memory - the physical journal device may
1354  		 * hotswapped, and it breaks the `ro-after' testing code.
1355  		 */
1356  		sync_blockdev(file_bdev(sbi->s_journal_bdev_file));
1357  		invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
1358  	}
1359  
1360  	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1361  	sbi->s_ea_inode_cache = NULL;
1362  
1363  	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1364  	sbi->s_ea_block_cache = NULL;
1365  
1366  	ext4_stop_mmpd(sbi);
1367  
1368  	brelse(sbi->s_sbh);
1369  	sb->s_fs_info = NULL;
1370  	/*
1371  	 * Now that we are completely done shutting down the
1372  	 * superblock, we need to actually destroy the kobject.
1373  	 */
1374  	kobject_put(&sbi->s_kobj);
1375  	wait_for_completion(&sbi->s_kobj_unregister);
1376  	if (sbi->s_chksum_driver)
1377  		crypto_free_shash(sbi->s_chksum_driver);
1378  	kfree(sbi->s_blockgroup_lock);
1379  	fs_put_dax(sbi->s_daxdev, NULL);
1380  	fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
1381  #if IS_ENABLED(CONFIG_UNICODE)
1382  	utf8_unload(sb->s_encoding);
1383  #endif
1384  	kfree(sbi);
1385  }
1386  
1387  static struct kmem_cache *ext4_inode_cachep;
1388  
1389  /*
1390   * Called inside transaction, so use GFP_NOFS
1391   */
ext4_alloc_inode(struct super_block * sb)1392  static struct inode *ext4_alloc_inode(struct super_block *sb)
1393  {
1394  	struct ext4_inode_info *ei;
1395  
1396  	ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
1397  	if (!ei)
1398  		return NULL;
1399  
1400  	inode_set_iversion(&ei->vfs_inode, 1);
1401  	ei->i_flags = 0;
1402  	spin_lock_init(&ei->i_raw_lock);
1403  	ei->i_prealloc_node = RB_ROOT;
1404  	atomic_set(&ei->i_prealloc_active, 0);
1405  	rwlock_init(&ei->i_prealloc_lock);
1406  	ext4_es_init_tree(&ei->i_es_tree);
1407  	rwlock_init(&ei->i_es_lock);
1408  	INIT_LIST_HEAD(&ei->i_es_list);
1409  	ei->i_es_all_nr = 0;
1410  	ei->i_es_shk_nr = 0;
1411  	ei->i_es_shrink_lblk = 0;
1412  	ei->i_reserved_data_blocks = 0;
1413  	spin_lock_init(&(ei->i_block_reservation_lock));
1414  	ext4_init_pending_tree(&ei->i_pending_tree);
1415  #ifdef CONFIG_QUOTA
1416  	ei->i_reserved_quota = 0;
1417  	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1418  #endif
1419  	ei->jinode = NULL;
1420  	INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1421  	spin_lock_init(&ei->i_completed_io_lock);
1422  	ei->i_sync_tid = 0;
1423  	ei->i_datasync_tid = 0;
1424  	atomic_set(&ei->i_unwritten, 0);
1425  	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1426  	ext4_fc_init_inode(&ei->vfs_inode);
1427  	mutex_init(&ei->i_fc_lock);
1428  	return &ei->vfs_inode;
1429  }
1430  
ext4_drop_inode(struct inode * inode)1431  static int ext4_drop_inode(struct inode *inode)
1432  {
1433  	int drop = generic_drop_inode(inode);
1434  
1435  	if (!drop)
1436  		drop = fscrypt_drop_inode(inode);
1437  
1438  	trace_ext4_drop_inode(inode, drop);
1439  	return drop;
1440  }
1441  
ext4_free_in_core_inode(struct inode * inode)1442  static void ext4_free_in_core_inode(struct inode *inode)
1443  {
1444  	fscrypt_free_inode(inode);
1445  	if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1446  		pr_warn("%s: inode %ld still in fc list",
1447  			__func__, inode->i_ino);
1448  	}
1449  	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1450  }
1451  
ext4_destroy_inode(struct inode * inode)1452  static void ext4_destroy_inode(struct inode *inode)
1453  {
1454  	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1455  		ext4_msg(inode->i_sb, KERN_ERR,
1456  			 "Inode %lu (%p): orphan list check failed!",
1457  			 inode->i_ino, EXT4_I(inode));
1458  		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1459  				EXT4_I(inode), sizeof(struct ext4_inode_info),
1460  				true);
1461  		dump_stack();
1462  	}
1463  
1464  	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ERROR_FS) &&
1465  	    WARN_ON_ONCE(EXT4_I(inode)->i_reserved_data_blocks))
1466  		ext4_msg(inode->i_sb, KERN_ERR,
1467  			 "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1468  			 inode->i_ino, EXT4_I(inode),
1469  			 EXT4_I(inode)->i_reserved_data_blocks);
1470  }
1471  
ext4_shutdown(struct super_block * sb)1472  static void ext4_shutdown(struct super_block *sb)
1473  {
1474         ext4_force_shutdown(sb, EXT4_GOING_FLAGS_NOLOGFLUSH);
1475  }
1476  
init_once(void * foo)1477  static void init_once(void *foo)
1478  {
1479  	struct ext4_inode_info *ei = foo;
1480  
1481  	INIT_LIST_HEAD(&ei->i_orphan);
1482  	init_rwsem(&ei->xattr_sem);
1483  	init_rwsem(&ei->i_data_sem);
1484  	inode_init_once(&ei->vfs_inode);
1485  	ext4_fc_init_inode(&ei->vfs_inode);
1486  }
1487  
init_inodecache(void)1488  static int __init init_inodecache(void)
1489  {
1490  	ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1491  				sizeof(struct ext4_inode_info), 0,
1492  				SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT,
1493  				offsetof(struct ext4_inode_info, i_data),
1494  				sizeof_field(struct ext4_inode_info, i_data),
1495  				init_once);
1496  	if (ext4_inode_cachep == NULL)
1497  		return -ENOMEM;
1498  	return 0;
1499  }
1500  
destroy_inodecache(void)1501  static void destroy_inodecache(void)
1502  {
1503  	/*
1504  	 * Make sure all delayed rcu free inodes are flushed before we
1505  	 * destroy cache.
1506  	 */
1507  	rcu_barrier();
1508  	kmem_cache_destroy(ext4_inode_cachep);
1509  }
1510  
ext4_clear_inode(struct inode * inode)1511  void ext4_clear_inode(struct inode *inode)
1512  {
1513  	ext4_fc_del(inode);
1514  	invalidate_inode_buffers(inode);
1515  	clear_inode(inode);
1516  	ext4_discard_preallocations(inode);
1517  	ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1518  	dquot_drop(inode);
1519  	if (EXT4_I(inode)->jinode) {
1520  		jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1521  					       EXT4_I(inode)->jinode);
1522  		jbd2_free_inode(EXT4_I(inode)->jinode);
1523  		EXT4_I(inode)->jinode = NULL;
1524  	}
1525  	fscrypt_put_encryption_info(inode);
1526  	fsverity_cleanup_inode(inode);
1527  }
1528  
ext4_nfs_get_inode(struct super_block * sb,u64 ino,u32 generation)1529  static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1530  					u64 ino, u32 generation)
1531  {
1532  	struct inode *inode;
1533  
1534  	/*
1535  	 * Currently we don't know the generation for parent directory, so
1536  	 * a generation of 0 means "accept any"
1537  	 */
1538  	inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1539  	if (IS_ERR(inode))
1540  		return ERR_CAST(inode);
1541  	if (generation && inode->i_generation != generation) {
1542  		iput(inode);
1543  		return ERR_PTR(-ESTALE);
1544  	}
1545  
1546  	return inode;
1547  }
1548  
ext4_fh_to_dentry(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1549  static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1550  					int fh_len, int fh_type)
1551  {
1552  	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1553  				    ext4_nfs_get_inode);
1554  }
1555  
ext4_fh_to_parent(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1556  static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1557  					int fh_len, int fh_type)
1558  {
1559  	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1560  				    ext4_nfs_get_inode);
1561  }
1562  
ext4_nfs_commit_metadata(struct inode * inode)1563  static int ext4_nfs_commit_metadata(struct inode *inode)
1564  {
1565  	struct writeback_control wbc = {
1566  		.sync_mode = WB_SYNC_ALL
1567  	};
1568  
1569  	trace_ext4_nfs_commit_metadata(inode);
1570  	return ext4_write_inode(inode, &wbc);
1571  }
1572  
1573  #ifdef CONFIG_QUOTA
1574  static const char * const quotatypes[] = INITQFNAMES;
1575  #define QTYPE2NAME(t) (quotatypes[t])
1576  
1577  static int ext4_write_dquot(struct dquot *dquot);
1578  static int ext4_acquire_dquot(struct dquot *dquot);
1579  static int ext4_release_dquot(struct dquot *dquot);
1580  static int ext4_mark_dquot_dirty(struct dquot *dquot);
1581  static int ext4_write_info(struct super_block *sb, int type);
1582  static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1583  			 const struct path *path);
1584  static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1585  			       size_t len, loff_t off);
1586  static ssize_t ext4_quota_write(struct super_block *sb, int type,
1587  				const char *data, size_t len, loff_t off);
1588  static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1589  			     unsigned int flags);
1590  
ext4_get_dquots(struct inode * inode)1591  static struct dquot __rcu **ext4_get_dquots(struct inode *inode)
1592  {
1593  	return EXT4_I(inode)->i_dquot;
1594  }
1595  
1596  static const struct dquot_operations ext4_quota_operations = {
1597  	.get_reserved_space	= ext4_get_reserved_space,
1598  	.write_dquot		= ext4_write_dquot,
1599  	.acquire_dquot		= ext4_acquire_dquot,
1600  	.release_dquot		= ext4_release_dquot,
1601  	.mark_dirty		= ext4_mark_dquot_dirty,
1602  	.write_info		= ext4_write_info,
1603  	.alloc_dquot		= dquot_alloc,
1604  	.destroy_dquot		= dquot_destroy,
1605  	.get_projid		= ext4_get_projid,
1606  	.get_inode_usage	= ext4_get_inode_usage,
1607  	.get_next_id		= dquot_get_next_id,
1608  };
1609  
1610  static const struct quotactl_ops ext4_qctl_operations = {
1611  	.quota_on	= ext4_quota_on,
1612  	.quota_off	= ext4_quota_off,
1613  	.quota_sync	= dquot_quota_sync,
1614  	.get_state	= dquot_get_state,
1615  	.set_info	= dquot_set_dqinfo,
1616  	.get_dqblk	= dquot_get_dqblk,
1617  	.set_dqblk	= dquot_set_dqblk,
1618  	.get_nextdqblk	= dquot_get_next_dqblk,
1619  };
1620  #endif
1621  
1622  static const struct super_operations ext4_sops = {
1623  	.alloc_inode	= ext4_alloc_inode,
1624  	.free_inode	= ext4_free_in_core_inode,
1625  	.destroy_inode	= ext4_destroy_inode,
1626  	.write_inode	= ext4_write_inode,
1627  	.dirty_inode	= ext4_dirty_inode,
1628  	.drop_inode	= ext4_drop_inode,
1629  	.evict_inode	= ext4_evict_inode,
1630  	.put_super	= ext4_put_super,
1631  	.sync_fs	= ext4_sync_fs,
1632  	.freeze_fs	= ext4_freeze,
1633  	.unfreeze_fs	= ext4_unfreeze,
1634  	.statfs		= ext4_statfs,
1635  	.show_options	= ext4_show_options,
1636  	.shutdown	= ext4_shutdown,
1637  #ifdef CONFIG_QUOTA
1638  	.quota_read	= ext4_quota_read,
1639  	.quota_write	= ext4_quota_write,
1640  	.get_dquots	= ext4_get_dquots,
1641  #endif
1642  };
1643  
1644  static const struct export_operations ext4_export_ops = {
1645  	.encode_fh = generic_encode_ino32_fh,
1646  	.fh_to_dentry = ext4_fh_to_dentry,
1647  	.fh_to_parent = ext4_fh_to_parent,
1648  	.get_parent = ext4_get_parent,
1649  	.commit_metadata = ext4_nfs_commit_metadata,
1650  };
1651  
1652  enum {
1653  	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1654  	Opt_resgid, Opt_resuid, Opt_sb,
1655  	Opt_nouid32, Opt_debug, Opt_removed,
1656  	Opt_user_xattr, Opt_acl,
1657  	Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1658  	Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1659  	Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1660  	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1661  	Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1662  	Opt_inlinecrypt,
1663  	Opt_usrjquota, Opt_grpjquota, Opt_quota,
1664  	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1665  	Opt_usrquota, Opt_grpquota, Opt_prjquota,
1666  	Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1667  	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1668  	Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
1669  	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1670  	Opt_inode_readahead_blks, Opt_journal_ioprio,
1671  	Opt_dioread_nolock, Opt_dioread_lock,
1672  	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1673  	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1674  	Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
1675  	Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
1676  #ifdef CONFIG_EXT4_DEBUG
1677  	Opt_fc_debug_max_replay, Opt_fc_debug_force
1678  #endif
1679  };
1680  
1681  static const struct constant_table ext4_param_errors[] = {
1682  	{"continue",	EXT4_MOUNT_ERRORS_CONT},
1683  	{"panic",	EXT4_MOUNT_ERRORS_PANIC},
1684  	{"remount-ro",	EXT4_MOUNT_ERRORS_RO},
1685  	{}
1686  };
1687  
1688  static const struct constant_table ext4_param_data[] = {
1689  	{"journal",	EXT4_MOUNT_JOURNAL_DATA},
1690  	{"ordered",	EXT4_MOUNT_ORDERED_DATA},
1691  	{"writeback",	EXT4_MOUNT_WRITEBACK_DATA},
1692  	{}
1693  };
1694  
1695  static const struct constant_table ext4_param_data_err[] = {
1696  	{"abort",	Opt_data_err_abort},
1697  	{"ignore",	Opt_data_err_ignore},
1698  	{}
1699  };
1700  
1701  static const struct constant_table ext4_param_jqfmt[] = {
1702  	{"vfsold",	QFMT_VFS_OLD},
1703  	{"vfsv0",	QFMT_VFS_V0},
1704  	{"vfsv1",	QFMT_VFS_V1},
1705  	{}
1706  };
1707  
1708  static const struct constant_table ext4_param_dax[] = {
1709  	{"always",	Opt_dax_always},
1710  	{"inode",	Opt_dax_inode},
1711  	{"never",	Opt_dax_never},
1712  	{}
1713  };
1714  
1715  /*
1716   * Mount option specification
1717   * We don't use fsparam_flag_no because of the way we set the
1718   * options and the way we show them in _ext4_show_options(). To
1719   * keep the changes to a minimum, let's keep the negative options
1720   * separate for now.
1721   */
1722  static const struct fs_parameter_spec ext4_param_specs[] = {
1723  	fsparam_flag	("bsddf",		Opt_bsd_df),
1724  	fsparam_flag	("minixdf",		Opt_minix_df),
1725  	fsparam_flag	("grpid",		Opt_grpid),
1726  	fsparam_flag	("bsdgroups",		Opt_grpid),
1727  	fsparam_flag	("nogrpid",		Opt_nogrpid),
1728  	fsparam_flag	("sysvgroups",		Opt_nogrpid),
1729  	fsparam_gid	("resgid",		Opt_resgid),
1730  	fsparam_uid	("resuid",		Opt_resuid),
1731  	fsparam_u32	("sb",			Opt_sb),
1732  	fsparam_enum	("errors",		Opt_errors, ext4_param_errors),
1733  	fsparam_flag	("nouid32",		Opt_nouid32),
1734  	fsparam_flag	("debug",		Opt_debug),
1735  	fsparam_flag	("oldalloc",		Opt_removed),
1736  	fsparam_flag	("orlov",		Opt_removed),
1737  	fsparam_flag	("user_xattr",		Opt_user_xattr),
1738  	fsparam_flag	("acl",			Opt_acl),
1739  	fsparam_flag	("norecovery",		Opt_noload),
1740  	fsparam_flag	("noload",		Opt_noload),
1741  	fsparam_flag	("bh",			Opt_removed),
1742  	fsparam_flag	("nobh",		Opt_removed),
1743  	fsparam_u32	("commit",		Opt_commit),
1744  	fsparam_u32	("min_batch_time",	Opt_min_batch_time),
1745  	fsparam_u32	("max_batch_time",	Opt_max_batch_time),
1746  	fsparam_u32	("journal_dev",		Opt_journal_dev),
1747  	fsparam_bdev	("journal_path",	Opt_journal_path),
1748  	fsparam_flag	("journal_checksum",	Opt_journal_checksum),
1749  	fsparam_flag	("nojournal_checksum",	Opt_nojournal_checksum),
1750  	fsparam_flag	("journal_async_commit",Opt_journal_async_commit),
1751  	fsparam_flag	("abort",		Opt_abort),
1752  	fsparam_enum	("data",		Opt_data, ext4_param_data),
1753  	fsparam_enum	("data_err",		Opt_data_err,
1754  						ext4_param_data_err),
1755  	fsparam_string_empty
1756  			("usrjquota",		Opt_usrjquota),
1757  	fsparam_string_empty
1758  			("grpjquota",		Opt_grpjquota),
1759  	fsparam_enum	("jqfmt",		Opt_jqfmt, ext4_param_jqfmt),
1760  	fsparam_flag	("grpquota",		Opt_grpquota),
1761  	fsparam_flag	("quota",		Opt_quota),
1762  	fsparam_flag	("noquota",		Opt_noquota),
1763  	fsparam_flag	("usrquota",		Opt_usrquota),
1764  	fsparam_flag	("prjquota",		Opt_prjquota),
1765  	fsparam_flag	("barrier",		Opt_barrier),
1766  	fsparam_u32	("barrier",		Opt_barrier),
1767  	fsparam_flag	("nobarrier",		Opt_nobarrier),
1768  	fsparam_flag	("i_version",		Opt_removed),
1769  	fsparam_flag	("dax",			Opt_dax),
1770  	fsparam_enum	("dax",			Opt_dax_type, ext4_param_dax),
1771  	fsparam_u32	("stripe",		Opt_stripe),
1772  	fsparam_flag	("delalloc",		Opt_delalloc),
1773  	fsparam_flag	("nodelalloc",		Opt_nodelalloc),
1774  	fsparam_flag	("warn_on_error",	Opt_warn_on_error),
1775  	fsparam_flag	("nowarn_on_error",	Opt_nowarn_on_error),
1776  	fsparam_u32	("debug_want_extra_isize",
1777  						Opt_debug_want_extra_isize),
1778  	fsparam_flag	("mblk_io_submit",	Opt_removed),
1779  	fsparam_flag	("nomblk_io_submit",	Opt_removed),
1780  	fsparam_flag	("block_validity",	Opt_block_validity),
1781  	fsparam_flag	("noblock_validity",	Opt_noblock_validity),
1782  	fsparam_u32	("inode_readahead_blks",
1783  						Opt_inode_readahead_blks),
1784  	fsparam_u32	("journal_ioprio",	Opt_journal_ioprio),
1785  	fsparam_u32	("auto_da_alloc",	Opt_auto_da_alloc),
1786  	fsparam_flag	("auto_da_alloc",	Opt_auto_da_alloc),
1787  	fsparam_flag	("noauto_da_alloc",	Opt_noauto_da_alloc),
1788  	fsparam_flag	("dioread_nolock",	Opt_dioread_nolock),
1789  	fsparam_flag	("nodioread_nolock",	Opt_dioread_lock),
1790  	fsparam_flag	("dioread_lock",	Opt_dioread_lock),
1791  	fsparam_flag	("discard",		Opt_discard),
1792  	fsparam_flag	("nodiscard",		Opt_nodiscard),
1793  	fsparam_u32	("init_itable",		Opt_init_itable),
1794  	fsparam_flag	("init_itable",		Opt_init_itable),
1795  	fsparam_flag	("noinit_itable",	Opt_noinit_itable),
1796  #ifdef CONFIG_EXT4_DEBUG
1797  	fsparam_flag	("fc_debug_force",	Opt_fc_debug_force),
1798  	fsparam_u32	("fc_debug_max_replay",	Opt_fc_debug_max_replay),
1799  #endif
1800  	fsparam_u32	("max_dir_size_kb",	Opt_max_dir_size_kb),
1801  	fsparam_flag	("test_dummy_encryption",
1802  						Opt_test_dummy_encryption),
1803  	fsparam_string	("test_dummy_encryption",
1804  						Opt_test_dummy_encryption),
1805  	fsparam_flag	("inlinecrypt",		Opt_inlinecrypt),
1806  	fsparam_flag	("nombcache",		Opt_nombcache),
1807  	fsparam_flag	("no_mbcache",		Opt_nombcache),	/* for backward compatibility */
1808  	fsparam_flag	("prefetch_block_bitmaps",
1809  						Opt_removed),
1810  	fsparam_flag	("no_prefetch_block_bitmaps",
1811  						Opt_no_prefetch_block_bitmaps),
1812  	fsparam_s32	("mb_optimize_scan",	Opt_mb_optimize_scan),
1813  	fsparam_string	("check",		Opt_removed),	/* mount option from ext2/3 */
1814  	fsparam_flag	("nocheck",		Opt_removed),	/* mount option from ext2/3 */
1815  	fsparam_flag	("reservation",		Opt_removed),	/* mount option from ext2/3 */
1816  	fsparam_flag	("noreservation",	Opt_removed),	/* mount option from ext2/3 */
1817  	fsparam_u32	("journal",		Opt_removed),	/* mount option from ext2/3 */
1818  	{}
1819  };
1820  
1821  #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1822  
1823  #define MOPT_SET	0x0001
1824  #define MOPT_CLEAR	0x0002
1825  #define MOPT_NOSUPPORT	0x0004
1826  #define MOPT_EXPLICIT	0x0008
1827  #ifdef CONFIG_QUOTA
1828  #define MOPT_Q		0
1829  #define MOPT_QFMT	0x0010
1830  #else
1831  #define MOPT_Q		MOPT_NOSUPPORT
1832  #define MOPT_QFMT	MOPT_NOSUPPORT
1833  #endif
1834  #define MOPT_NO_EXT2	0x0020
1835  #define MOPT_NO_EXT3	0x0040
1836  #define MOPT_EXT4_ONLY	(MOPT_NO_EXT2 | MOPT_NO_EXT3)
1837  #define MOPT_SKIP	0x0080
1838  #define	MOPT_2		0x0100
1839  
1840  static const struct mount_opts {
1841  	int	token;
1842  	int	mount_opt;
1843  	int	flags;
1844  } ext4_mount_opts[] = {
1845  	{Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1846  	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1847  	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1848  	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1849  	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1850  	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1851  	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1852  	 MOPT_EXT4_ONLY | MOPT_SET},
1853  	{Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1854  	 MOPT_EXT4_ONLY | MOPT_CLEAR},
1855  	{Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1856  	{Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1857  	{Opt_delalloc, EXT4_MOUNT_DELALLOC,
1858  	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1859  	{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1860  	 MOPT_EXT4_ONLY | MOPT_CLEAR},
1861  	{Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1862  	{Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1863  	{Opt_commit, 0, MOPT_NO_EXT2},
1864  	{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1865  	 MOPT_EXT4_ONLY | MOPT_CLEAR},
1866  	{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1867  	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1868  	{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1869  				    EXT4_MOUNT_JOURNAL_CHECKSUM),
1870  	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1871  	{Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1872  	{Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
1873  	{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1874  	{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1875  	{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1876  	{Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1877  	{Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1878  	{Opt_dax_type, 0, MOPT_EXT4_ONLY},
1879  	{Opt_journal_dev, 0, MOPT_NO_EXT2},
1880  	{Opt_journal_path, 0, MOPT_NO_EXT2},
1881  	{Opt_journal_ioprio, 0, MOPT_NO_EXT2},
1882  	{Opt_data, 0, MOPT_NO_EXT2},
1883  	{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1884  #ifdef CONFIG_EXT4_FS_POSIX_ACL
1885  	{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1886  #else
1887  	{Opt_acl, 0, MOPT_NOSUPPORT},
1888  #endif
1889  	{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1890  	{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1891  	{Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1892  	{Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1893  							MOPT_SET | MOPT_Q},
1894  	{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1895  							MOPT_SET | MOPT_Q},
1896  	{Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1897  							MOPT_SET | MOPT_Q},
1898  	{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1899  		       EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1900  							MOPT_CLEAR | MOPT_Q},
1901  	{Opt_usrjquota, 0, MOPT_Q},
1902  	{Opt_grpjquota, 0, MOPT_Q},
1903  	{Opt_jqfmt, 0, MOPT_QFMT},
1904  	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1905  	{Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
1906  	 MOPT_SET},
1907  #ifdef CONFIG_EXT4_DEBUG
1908  	{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
1909  	 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
1910  #endif
1911  	{Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2},
1912  	{Opt_err, 0, 0}
1913  };
1914  
1915  #if IS_ENABLED(CONFIG_UNICODE)
1916  static const struct ext4_sb_encodings {
1917  	__u16 magic;
1918  	char *name;
1919  	unsigned int version;
1920  } ext4_sb_encoding_map[] = {
1921  	{EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
1922  };
1923  
1924  static const struct ext4_sb_encodings *
ext4_sb_read_encoding(const struct ext4_super_block * es)1925  ext4_sb_read_encoding(const struct ext4_super_block *es)
1926  {
1927  	__u16 magic = le16_to_cpu(es->s_encoding);
1928  	int i;
1929  
1930  	for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1931  		if (magic == ext4_sb_encoding_map[i].magic)
1932  			return &ext4_sb_encoding_map[i];
1933  
1934  	return NULL;
1935  }
1936  #endif
1937  
1938  #define EXT4_SPEC_JQUOTA			(1 <<  0)
1939  #define EXT4_SPEC_JQFMT				(1 <<  1)
1940  #define EXT4_SPEC_DATAJ				(1 <<  2)
1941  #define EXT4_SPEC_SB_BLOCK			(1 <<  3)
1942  #define EXT4_SPEC_JOURNAL_DEV			(1 <<  4)
1943  #define EXT4_SPEC_JOURNAL_IOPRIO		(1 <<  5)
1944  #define EXT4_SPEC_s_want_extra_isize		(1 <<  7)
1945  #define EXT4_SPEC_s_max_batch_time		(1 <<  8)
1946  #define EXT4_SPEC_s_min_batch_time		(1 <<  9)
1947  #define EXT4_SPEC_s_inode_readahead_blks	(1 << 10)
1948  #define EXT4_SPEC_s_li_wait_mult		(1 << 11)
1949  #define EXT4_SPEC_s_max_dir_size_kb		(1 << 12)
1950  #define EXT4_SPEC_s_stripe			(1 << 13)
1951  #define EXT4_SPEC_s_resuid			(1 << 14)
1952  #define EXT4_SPEC_s_resgid			(1 << 15)
1953  #define EXT4_SPEC_s_commit_interval		(1 << 16)
1954  #define EXT4_SPEC_s_fc_debug_max_replay		(1 << 17)
1955  #define EXT4_SPEC_s_sb_block			(1 << 18)
1956  #define EXT4_SPEC_mb_optimize_scan		(1 << 19)
1957  
1958  struct ext4_fs_context {
1959  	char		*s_qf_names[EXT4_MAXQUOTAS];
1960  	struct fscrypt_dummy_policy dummy_enc_policy;
1961  	int		s_jquota_fmt;	/* Format of quota to use */
1962  #ifdef CONFIG_EXT4_DEBUG
1963  	int s_fc_debug_max_replay;
1964  #endif
1965  	unsigned short	qname_spec;
1966  	unsigned long	vals_s_flags;	/* Bits to set in s_flags */
1967  	unsigned long	mask_s_flags;	/* Bits changed in s_flags */
1968  	unsigned long	journal_devnum;
1969  	unsigned long	s_commit_interval;
1970  	unsigned long	s_stripe;
1971  	unsigned int	s_inode_readahead_blks;
1972  	unsigned int	s_want_extra_isize;
1973  	unsigned int	s_li_wait_mult;
1974  	unsigned int	s_max_dir_size_kb;
1975  	unsigned int	journal_ioprio;
1976  	unsigned int	vals_s_mount_opt;
1977  	unsigned int	mask_s_mount_opt;
1978  	unsigned int	vals_s_mount_opt2;
1979  	unsigned int	mask_s_mount_opt2;
1980  	unsigned int	opt_flags;	/* MOPT flags */
1981  	unsigned int	spec;
1982  	u32		s_max_batch_time;
1983  	u32		s_min_batch_time;
1984  	kuid_t		s_resuid;
1985  	kgid_t		s_resgid;
1986  	ext4_fsblk_t	s_sb_block;
1987  };
1988  
ext4_fc_free(struct fs_context * fc)1989  static void ext4_fc_free(struct fs_context *fc)
1990  {
1991  	struct ext4_fs_context *ctx = fc->fs_private;
1992  	int i;
1993  
1994  	if (!ctx)
1995  		return;
1996  
1997  	for (i = 0; i < EXT4_MAXQUOTAS; i++)
1998  		kfree(ctx->s_qf_names[i]);
1999  
2000  	fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
2001  	kfree(ctx);
2002  }
2003  
ext4_init_fs_context(struct fs_context * fc)2004  int ext4_init_fs_context(struct fs_context *fc)
2005  {
2006  	struct ext4_fs_context *ctx;
2007  
2008  	ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2009  	if (!ctx)
2010  		return -ENOMEM;
2011  
2012  	fc->fs_private = ctx;
2013  	fc->ops = &ext4_context_ops;
2014  
2015  	return 0;
2016  }
2017  
2018  #ifdef CONFIG_QUOTA
2019  /*
2020   * Note the name of the specified quota file.
2021   */
note_qf_name(struct fs_context * fc,int qtype,struct fs_parameter * param)2022  static int note_qf_name(struct fs_context *fc, int qtype,
2023  		       struct fs_parameter *param)
2024  {
2025  	struct ext4_fs_context *ctx = fc->fs_private;
2026  	char *qname;
2027  
2028  	if (param->size < 1) {
2029  		ext4_msg(NULL, KERN_ERR, "Missing quota name");
2030  		return -EINVAL;
2031  	}
2032  	if (strchr(param->string, '/')) {
2033  		ext4_msg(NULL, KERN_ERR,
2034  			 "quotafile must be on filesystem root");
2035  		return -EINVAL;
2036  	}
2037  	if (ctx->s_qf_names[qtype]) {
2038  		if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
2039  			ext4_msg(NULL, KERN_ERR,
2040  				 "%s quota file already specified",
2041  				 QTYPE2NAME(qtype));
2042  			return -EINVAL;
2043  		}
2044  		return 0;
2045  	}
2046  
2047  	qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
2048  	if (!qname) {
2049  		ext4_msg(NULL, KERN_ERR,
2050  			 "Not enough memory for storing quotafile name");
2051  		return -ENOMEM;
2052  	}
2053  	ctx->s_qf_names[qtype] = qname;
2054  	ctx->qname_spec |= 1 << qtype;
2055  	ctx->spec |= EXT4_SPEC_JQUOTA;
2056  	return 0;
2057  }
2058  
2059  /*
2060   * Clear the name of the specified quota file.
2061   */
unnote_qf_name(struct fs_context * fc,int qtype)2062  static int unnote_qf_name(struct fs_context *fc, int qtype)
2063  {
2064  	struct ext4_fs_context *ctx = fc->fs_private;
2065  
2066  	kfree(ctx->s_qf_names[qtype]);
2067  
2068  	ctx->s_qf_names[qtype] = NULL;
2069  	ctx->qname_spec |= 1 << qtype;
2070  	ctx->spec |= EXT4_SPEC_JQUOTA;
2071  	return 0;
2072  }
2073  #endif
2074  
ext4_parse_test_dummy_encryption(const struct fs_parameter * param,struct ext4_fs_context * ctx)2075  static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
2076  					    struct ext4_fs_context *ctx)
2077  {
2078  	int err;
2079  
2080  	if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
2081  		ext4_msg(NULL, KERN_WARNING,
2082  			 "test_dummy_encryption option not supported");
2083  		return -EINVAL;
2084  	}
2085  	err = fscrypt_parse_test_dummy_encryption(param,
2086  						  &ctx->dummy_enc_policy);
2087  	if (err == -EINVAL) {
2088  		ext4_msg(NULL, KERN_WARNING,
2089  			 "Value of option \"%s\" is unrecognized", param->key);
2090  	} else if (err == -EEXIST) {
2091  		ext4_msg(NULL, KERN_WARNING,
2092  			 "Conflicting test_dummy_encryption options");
2093  		return -EINVAL;
2094  	}
2095  	return err;
2096  }
2097  
2098  #define EXT4_SET_CTX(name)						\
2099  static inline void ctx_set_##name(struct ext4_fs_context *ctx,		\
2100  				  unsigned long flag)			\
2101  {									\
2102  	ctx->mask_s_##name |= flag;					\
2103  	ctx->vals_s_##name |= flag;					\
2104  }
2105  
2106  #define EXT4_CLEAR_CTX(name)						\
2107  static inline void ctx_clear_##name(struct ext4_fs_context *ctx,	\
2108  				    unsigned long flag)			\
2109  {									\
2110  	ctx->mask_s_##name |= flag;					\
2111  	ctx->vals_s_##name &= ~flag;					\
2112  }
2113  
2114  #define EXT4_TEST_CTX(name)						\
2115  static inline unsigned long						\
2116  ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag)	\
2117  {									\
2118  	return (ctx->vals_s_##name & flag);				\
2119  }
2120  
2121  EXT4_SET_CTX(flags); /* set only */
2122  EXT4_SET_CTX(mount_opt);
2123  EXT4_CLEAR_CTX(mount_opt);
2124  EXT4_TEST_CTX(mount_opt);
2125  EXT4_SET_CTX(mount_opt2);
2126  EXT4_CLEAR_CTX(mount_opt2);
2127  EXT4_TEST_CTX(mount_opt2);
2128  
ext4_parse_param(struct fs_context * fc,struct fs_parameter * param)2129  static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
2130  {
2131  	struct ext4_fs_context *ctx = fc->fs_private;
2132  	struct fs_parse_result result;
2133  	const struct mount_opts *m;
2134  	int is_remount;
2135  	int token;
2136  
2137  	token = fs_parse(fc, ext4_param_specs, param, &result);
2138  	if (token < 0)
2139  		return token;
2140  	is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2141  
2142  	for (m = ext4_mount_opts; m->token != Opt_err; m++)
2143  		if (token == m->token)
2144  			break;
2145  
2146  	ctx->opt_flags |= m->flags;
2147  
2148  	if (m->flags & MOPT_EXPLICIT) {
2149  		if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2150  			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
2151  		} else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2152  			ctx_set_mount_opt2(ctx,
2153  				       EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
2154  		} else
2155  			return -EINVAL;
2156  	}
2157  
2158  	if (m->flags & MOPT_NOSUPPORT) {
2159  		ext4_msg(NULL, KERN_ERR, "%s option not supported",
2160  			 param->key);
2161  		return 0;
2162  	}
2163  
2164  	switch (token) {
2165  #ifdef CONFIG_QUOTA
2166  	case Opt_usrjquota:
2167  		if (!*param->string)
2168  			return unnote_qf_name(fc, USRQUOTA);
2169  		else
2170  			return note_qf_name(fc, USRQUOTA, param);
2171  	case Opt_grpjquota:
2172  		if (!*param->string)
2173  			return unnote_qf_name(fc, GRPQUOTA);
2174  		else
2175  			return note_qf_name(fc, GRPQUOTA, param);
2176  #endif
2177  	case Opt_sb:
2178  		if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2179  			ext4_msg(NULL, KERN_WARNING,
2180  				 "Ignoring %s option on remount", param->key);
2181  		} else {
2182  			ctx->s_sb_block = result.uint_32;
2183  			ctx->spec |= EXT4_SPEC_s_sb_block;
2184  		}
2185  		return 0;
2186  	case Opt_removed:
2187  		ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
2188  			 param->key);
2189  		return 0;
2190  	case Opt_inlinecrypt:
2191  #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2192  		ctx_set_flags(ctx, SB_INLINECRYPT);
2193  #else
2194  		ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
2195  #endif
2196  		return 0;
2197  	case Opt_errors:
2198  		ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
2199  		ctx_set_mount_opt(ctx, result.uint_32);
2200  		return 0;
2201  #ifdef CONFIG_QUOTA
2202  	case Opt_jqfmt:
2203  		ctx->s_jquota_fmt = result.uint_32;
2204  		ctx->spec |= EXT4_SPEC_JQFMT;
2205  		return 0;
2206  #endif
2207  	case Opt_data:
2208  		ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2209  		ctx_set_mount_opt(ctx, result.uint_32);
2210  		ctx->spec |= EXT4_SPEC_DATAJ;
2211  		return 0;
2212  	case Opt_commit:
2213  		if (result.uint_32 == 0)
2214  			result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
2215  		else if (result.uint_32 > INT_MAX / HZ) {
2216  			ext4_msg(NULL, KERN_ERR,
2217  				 "Invalid commit interval %d, "
2218  				 "must be smaller than %d",
2219  				 result.uint_32, INT_MAX / HZ);
2220  			return -EINVAL;
2221  		}
2222  		ctx->s_commit_interval = HZ * result.uint_32;
2223  		ctx->spec |= EXT4_SPEC_s_commit_interval;
2224  		return 0;
2225  	case Opt_debug_want_extra_isize:
2226  		if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
2227  			ext4_msg(NULL, KERN_ERR,
2228  				 "Invalid want_extra_isize %d", result.uint_32);
2229  			return -EINVAL;
2230  		}
2231  		ctx->s_want_extra_isize = result.uint_32;
2232  		ctx->spec |= EXT4_SPEC_s_want_extra_isize;
2233  		return 0;
2234  	case Opt_max_batch_time:
2235  		ctx->s_max_batch_time = result.uint_32;
2236  		ctx->spec |= EXT4_SPEC_s_max_batch_time;
2237  		return 0;
2238  	case Opt_min_batch_time:
2239  		ctx->s_min_batch_time = result.uint_32;
2240  		ctx->spec |= EXT4_SPEC_s_min_batch_time;
2241  		return 0;
2242  	case Opt_inode_readahead_blks:
2243  		if (result.uint_32 &&
2244  		    (result.uint_32 > (1 << 30) ||
2245  		     !is_power_of_2(result.uint_32))) {
2246  			ext4_msg(NULL, KERN_ERR,
2247  				 "EXT4-fs: inode_readahead_blks must be "
2248  				 "0 or a power of 2 smaller than 2^31");
2249  			return -EINVAL;
2250  		}
2251  		ctx->s_inode_readahead_blks = result.uint_32;
2252  		ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
2253  		return 0;
2254  	case Opt_init_itable:
2255  		ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2256  		ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
2257  		if (param->type == fs_value_is_string)
2258  			ctx->s_li_wait_mult = result.uint_32;
2259  		ctx->spec |= EXT4_SPEC_s_li_wait_mult;
2260  		return 0;
2261  	case Opt_max_dir_size_kb:
2262  		ctx->s_max_dir_size_kb = result.uint_32;
2263  		ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
2264  		return 0;
2265  #ifdef CONFIG_EXT4_DEBUG
2266  	case Opt_fc_debug_max_replay:
2267  		ctx->s_fc_debug_max_replay = result.uint_32;
2268  		ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
2269  		return 0;
2270  #endif
2271  	case Opt_stripe:
2272  		ctx->s_stripe = result.uint_32;
2273  		ctx->spec |= EXT4_SPEC_s_stripe;
2274  		return 0;
2275  	case Opt_resuid:
2276  		ctx->s_resuid = result.uid;
2277  		ctx->spec |= EXT4_SPEC_s_resuid;
2278  		return 0;
2279  	case Opt_resgid:
2280  		ctx->s_resgid = result.gid;
2281  		ctx->spec |= EXT4_SPEC_s_resgid;
2282  		return 0;
2283  	case Opt_journal_dev:
2284  		if (is_remount) {
2285  			ext4_msg(NULL, KERN_ERR,
2286  				 "Cannot specify journal on remount");
2287  			return -EINVAL;
2288  		}
2289  		ctx->journal_devnum = result.uint_32;
2290  		ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2291  		return 0;
2292  	case Opt_journal_path:
2293  	{
2294  		struct inode *journal_inode;
2295  		struct path path;
2296  		int error;
2297  
2298  		if (is_remount) {
2299  			ext4_msg(NULL, KERN_ERR,
2300  				 "Cannot specify journal on remount");
2301  			return -EINVAL;
2302  		}
2303  
2304  		error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
2305  		if (error) {
2306  			ext4_msg(NULL, KERN_ERR, "error: could not find "
2307  				 "journal device path");
2308  			return -EINVAL;
2309  		}
2310  
2311  		journal_inode = d_inode(path.dentry);
2312  		ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
2313  		ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2314  		path_put(&path);
2315  		return 0;
2316  	}
2317  	case Opt_journal_ioprio:
2318  		if (result.uint_32 > 7) {
2319  			ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
2320  				 " (must be 0-7)");
2321  			return -EINVAL;
2322  		}
2323  		ctx->journal_ioprio =
2324  			IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
2325  		ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
2326  		return 0;
2327  	case Opt_test_dummy_encryption:
2328  		return ext4_parse_test_dummy_encryption(param, ctx);
2329  	case Opt_dax:
2330  	case Opt_dax_type:
2331  #ifdef CONFIG_FS_DAX
2332  	{
2333  		int type = (token == Opt_dax) ?
2334  			   Opt_dax : result.uint_32;
2335  
2336  		switch (type) {
2337  		case Opt_dax:
2338  		case Opt_dax_always:
2339  			ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2340  			ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2341  			break;
2342  		case Opt_dax_never:
2343  			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2344  			ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2345  			break;
2346  		case Opt_dax_inode:
2347  			ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2348  			ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2349  			/* Strictly for printing options */
2350  			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
2351  			break;
2352  		}
2353  		return 0;
2354  	}
2355  #else
2356  		ext4_msg(NULL, KERN_INFO, "dax option not supported");
2357  		return -EINVAL;
2358  #endif
2359  	case Opt_data_err:
2360  		if (result.uint_32 == Opt_data_err_abort)
2361  			ctx_set_mount_opt(ctx, m->mount_opt);
2362  		else if (result.uint_32 == Opt_data_err_ignore)
2363  			ctx_clear_mount_opt(ctx, m->mount_opt);
2364  		return 0;
2365  	case Opt_mb_optimize_scan:
2366  		if (result.int_32 == 1) {
2367  			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2368  			ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2369  		} else if (result.int_32 == 0) {
2370  			ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2371  			ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2372  		} else {
2373  			ext4_msg(NULL, KERN_WARNING,
2374  				 "mb_optimize_scan should be set to 0 or 1.");
2375  			return -EINVAL;
2376  		}
2377  		return 0;
2378  	}
2379  
2380  	/*
2381  	 * At this point we should only be getting options requiring MOPT_SET,
2382  	 * or MOPT_CLEAR. Anything else is a bug
2383  	 */
2384  	if (m->token == Opt_err) {
2385  		ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
2386  			 param->key);
2387  		WARN_ON(1);
2388  		return -EINVAL;
2389  	}
2390  
2391  	else {
2392  		unsigned int set = 0;
2393  
2394  		if ((param->type == fs_value_is_flag) ||
2395  		    result.uint_32 > 0)
2396  			set = 1;
2397  
2398  		if (m->flags & MOPT_CLEAR)
2399  			set = !set;
2400  		else if (unlikely(!(m->flags & MOPT_SET))) {
2401  			ext4_msg(NULL, KERN_WARNING,
2402  				 "buggy handling of option %s",
2403  				 param->key);
2404  			WARN_ON(1);
2405  			return -EINVAL;
2406  		}
2407  		if (m->flags & MOPT_2) {
2408  			if (set != 0)
2409  				ctx_set_mount_opt2(ctx, m->mount_opt);
2410  			else
2411  				ctx_clear_mount_opt2(ctx, m->mount_opt);
2412  		} else {
2413  			if (set != 0)
2414  				ctx_set_mount_opt(ctx, m->mount_opt);
2415  			else
2416  				ctx_clear_mount_opt(ctx, m->mount_opt);
2417  		}
2418  	}
2419  
2420  	return 0;
2421  }
2422  
parse_options(struct fs_context * fc,char * options)2423  static int parse_options(struct fs_context *fc, char *options)
2424  {
2425  	struct fs_parameter param;
2426  	int ret;
2427  	char *key;
2428  
2429  	if (!options)
2430  		return 0;
2431  
2432  	while ((key = strsep(&options, ",")) != NULL) {
2433  		if (*key) {
2434  			size_t v_len = 0;
2435  			char *value = strchr(key, '=');
2436  
2437  			param.type = fs_value_is_flag;
2438  			param.string = NULL;
2439  
2440  			if (value) {
2441  				if (value == key)
2442  					continue;
2443  
2444  				*value++ = 0;
2445  				v_len = strlen(value);
2446  				param.string = kmemdup_nul(value, v_len,
2447  							   GFP_KERNEL);
2448  				if (!param.string)
2449  					return -ENOMEM;
2450  				param.type = fs_value_is_string;
2451  			}
2452  
2453  			param.key = key;
2454  			param.size = v_len;
2455  
2456  			ret = ext4_parse_param(fc, &param);
2457  			kfree(param.string);
2458  			if (ret < 0)
2459  				return ret;
2460  		}
2461  	}
2462  
2463  	ret = ext4_validate_options(fc);
2464  	if (ret < 0)
2465  		return ret;
2466  
2467  	return 0;
2468  }
2469  
parse_apply_sb_mount_options(struct super_block * sb,struct ext4_fs_context * m_ctx)2470  static int parse_apply_sb_mount_options(struct super_block *sb,
2471  					struct ext4_fs_context *m_ctx)
2472  {
2473  	struct ext4_sb_info *sbi = EXT4_SB(sb);
2474  	char *s_mount_opts = NULL;
2475  	struct ext4_fs_context *s_ctx = NULL;
2476  	struct fs_context *fc = NULL;
2477  	int ret = -ENOMEM;
2478  
2479  	if (!sbi->s_es->s_mount_opts[0])
2480  		return 0;
2481  
2482  	s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
2483  				sizeof(sbi->s_es->s_mount_opts),
2484  				GFP_KERNEL);
2485  	if (!s_mount_opts)
2486  		return ret;
2487  
2488  	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
2489  	if (!fc)
2490  		goto out_free;
2491  
2492  	s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2493  	if (!s_ctx)
2494  		goto out_free;
2495  
2496  	fc->fs_private = s_ctx;
2497  	fc->s_fs_info = sbi;
2498  
2499  	ret = parse_options(fc, s_mount_opts);
2500  	if (ret < 0)
2501  		goto parse_failed;
2502  
2503  	ret = ext4_check_opt_consistency(fc, sb);
2504  	if (ret < 0) {
2505  parse_failed:
2506  		ext4_msg(sb, KERN_WARNING,
2507  			 "failed to parse options in superblock: %s",
2508  			 s_mount_opts);
2509  		ret = 0;
2510  		goto out_free;
2511  	}
2512  
2513  	if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
2514  		m_ctx->journal_devnum = s_ctx->journal_devnum;
2515  	if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
2516  		m_ctx->journal_ioprio = s_ctx->journal_ioprio;
2517  
2518  	ext4_apply_options(fc, sb);
2519  	ret = 0;
2520  
2521  out_free:
2522  	if (fc) {
2523  		ext4_fc_free(fc);
2524  		kfree(fc);
2525  	}
2526  	kfree(s_mount_opts);
2527  	return ret;
2528  }
2529  
ext4_apply_quota_options(struct fs_context * fc,struct super_block * sb)2530  static void ext4_apply_quota_options(struct fs_context *fc,
2531  				     struct super_block *sb)
2532  {
2533  #ifdef CONFIG_QUOTA
2534  	bool quota_feature = ext4_has_feature_quota(sb);
2535  	struct ext4_fs_context *ctx = fc->fs_private;
2536  	struct ext4_sb_info *sbi = EXT4_SB(sb);
2537  	char *qname;
2538  	int i;
2539  
2540  	if (quota_feature)
2541  		return;
2542  
2543  	if (ctx->spec & EXT4_SPEC_JQUOTA) {
2544  		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2545  			if (!(ctx->qname_spec & (1 << i)))
2546  				continue;
2547  
2548  			qname = ctx->s_qf_names[i]; /* May be NULL */
2549  			if (qname)
2550  				set_opt(sb, QUOTA);
2551  			ctx->s_qf_names[i] = NULL;
2552  			qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
2553  						lockdep_is_held(&sb->s_umount));
2554  			if (qname)
2555  				kfree_rcu_mightsleep(qname);
2556  		}
2557  	}
2558  
2559  	if (ctx->spec & EXT4_SPEC_JQFMT)
2560  		sbi->s_jquota_fmt = ctx->s_jquota_fmt;
2561  #endif
2562  }
2563  
2564  /*
2565   * Check quota settings consistency.
2566   */
ext4_check_quota_consistency(struct fs_context * fc,struct super_block * sb)2567  static int ext4_check_quota_consistency(struct fs_context *fc,
2568  					struct super_block *sb)
2569  {
2570  #ifdef CONFIG_QUOTA
2571  	struct ext4_fs_context *ctx = fc->fs_private;
2572  	struct ext4_sb_info *sbi = EXT4_SB(sb);
2573  	bool quota_feature = ext4_has_feature_quota(sb);
2574  	bool quota_loaded = sb_any_quota_loaded(sb);
2575  	bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2576  	int quota_flags, i;
2577  
2578  	/*
2579  	 * We do the test below only for project quotas. 'usrquota' and
2580  	 * 'grpquota' mount options are allowed even without quota feature
2581  	 * to support legacy quotas in quota files.
2582  	 */
2583  	if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2584  	    !ext4_has_feature_project(sb)) {
2585  		ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2586  			 "Cannot enable project quota enforcement.");
2587  		return -EINVAL;
2588  	}
2589  
2590  	quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2591  		      EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2592  	if (quota_loaded &&
2593  	    ctx->mask_s_mount_opt & quota_flags &&
2594  	    !ctx_test_mount_opt(ctx, quota_flags))
2595  		goto err_quota_change;
2596  
2597  	if (ctx->spec & EXT4_SPEC_JQUOTA) {
2598  
2599  		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2600  			if (!(ctx->qname_spec & (1 << i)))
2601  				continue;
2602  
2603  			if (quota_loaded &&
2604  			    !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
2605  				goto err_jquota_change;
2606  
2607  			if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2608  			    strcmp(get_qf_name(sb, sbi, i),
2609  				   ctx->s_qf_names[i]) != 0)
2610  				goto err_jquota_specified;
2611  		}
2612  
2613  		if (quota_feature) {
2614  			ext4_msg(NULL, KERN_INFO,
2615  				 "Journaled quota options ignored when "
2616  				 "QUOTA feature is enabled");
2617  			return 0;
2618  		}
2619  	}
2620  
2621  	if (ctx->spec & EXT4_SPEC_JQFMT) {
2622  		if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
2623  			goto err_jquota_change;
2624  		if (quota_feature) {
2625  			ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2626  				 "ignored when QUOTA feature is enabled");
2627  			return 0;
2628  		}
2629  	}
2630  
2631  	/* Make sure we don't mix old and new quota format */
2632  	usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2633  		       ctx->s_qf_names[USRQUOTA]);
2634  	grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2635  		       ctx->s_qf_names[GRPQUOTA]);
2636  
2637  	usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2638  		    test_opt(sb, USRQUOTA));
2639  
2640  	grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2641  		    test_opt(sb, GRPQUOTA));
2642  
2643  	if (usr_qf_name) {
2644  		ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2645  		usrquota = false;
2646  	}
2647  	if (grp_qf_name) {
2648  		ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2649  		grpquota = false;
2650  	}
2651  
2652  	if (usr_qf_name || grp_qf_name) {
2653  		if (usrquota || grpquota) {
2654  			ext4_msg(NULL, KERN_ERR, "old and new quota "
2655  				 "format mixing");
2656  			return -EINVAL;
2657  		}
2658  
2659  		if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2660  			ext4_msg(NULL, KERN_ERR, "journaled quota format "
2661  				 "not specified");
2662  			return -EINVAL;
2663  		}
2664  	}
2665  
2666  	return 0;
2667  
2668  err_quota_change:
2669  	ext4_msg(NULL, KERN_ERR,
2670  		 "Cannot change quota options when quota turned on");
2671  	return -EINVAL;
2672  err_jquota_change:
2673  	ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2674  		 "options when quota turned on");
2675  	return -EINVAL;
2676  err_jquota_specified:
2677  	ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2678  		 QTYPE2NAME(i));
2679  	return -EINVAL;
2680  #else
2681  	return 0;
2682  #endif
2683  }
2684  
ext4_check_test_dummy_encryption(const struct fs_context * fc,struct super_block * sb)2685  static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
2686  					    struct super_block *sb)
2687  {
2688  	const struct ext4_fs_context *ctx = fc->fs_private;
2689  	const struct ext4_sb_info *sbi = EXT4_SB(sb);
2690  
2691  	if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
2692  		return 0;
2693  
2694  	if (!ext4_has_feature_encrypt(sb)) {
2695  		ext4_msg(NULL, KERN_WARNING,
2696  			 "test_dummy_encryption requires encrypt feature");
2697  		return -EINVAL;
2698  	}
2699  	/*
2700  	 * This mount option is just for testing, and it's not worthwhile to
2701  	 * implement the extra complexity (e.g. RCU protection) that would be
2702  	 * needed to allow it to be set or changed during remount.  We do allow
2703  	 * it to be specified during remount, but only if there is no change.
2704  	 */
2705  	if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2706  		if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2707  						 &ctx->dummy_enc_policy))
2708  			return 0;
2709  		ext4_msg(NULL, KERN_WARNING,
2710  			 "Can't set or change test_dummy_encryption on remount");
2711  		return -EINVAL;
2712  	}
2713  	/* Also make sure s_mount_opts didn't contain a conflicting value. */
2714  	if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
2715  		if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2716  						 &ctx->dummy_enc_policy))
2717  			return 0;
2718  		ext4_msg(NULL, KERN_WARNING,
2719  			 "Conflicting test_dummy_encryption options");
2720  		return -EINVAL;
2721  	}
2722  	return 0;
2723  }
2724  
ext4_apply_test_dummy_encryption(struct ext4_fs_context * ctx,struct super_block * sb)2725  static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
2726  					     struct super_block *sb)
2727  {
2728  	if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
2729  	    /* if already set, it was already verified to be the same */
2730  	    fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
2731  		return;
2732  	EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
2733  	memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
2734  	ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2735  }
2736  
ext4_check_opt_consistency(struct fs_context * fc,struct super_block * sb)2737  static int ext4_check_opt_consistency(struct fs_context *fc,
2738  				      struct super_block *sb)
2739  {
2740  	struct ext4_fs_context *ctx = fc->fs_private;
2741  	struct ext4_sb_info *sbi = fc->s_fs_info;
2742  	int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2743  	int err;
2744  
2745  	if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2746  		ext4_msg(NULL, KERN_ERR,
2747  			 "Mount option(s) incompatible with ext2");
2748  		return -EINVAL;
2749  	}
2750  	if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2751  		ext4_msg(NULL, KERN_ERR,
2752  			 "Mount option(s) incompatible with ext3");
2753  		return -EINVAL;
2754  	}
2755  
2756  	if (ctx->s_want_extra_isize >
2757  	    (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2758  		ext4_msg(NULL, KERN_ERR,
2759  			 "Invalid want_extra_isize %d",
2760  			 ctx->s_want_extra_isize);
2761  		return -EINVAL;
2762  	}
2763  
2764  	err = ext4_check_test_dummy_encryption(fc, sb);
2765  	if (err)
2766  		return err;
2767  
2768  	if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2769  		if (!sbi->s_journal) {
2770  			ext4_msg(NULL, KERN_WARNING,
2771  				 "Remounting file system with no journal "
2772  				 "so ignoring journalled data option");
2773  			ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2774  		} else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
2775  			   test_opt(sb, DATA_FLAGS)) {
2776  			ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2777  				 "on remount");
2778  			return -EINVAL;
2779  		}
2780  	}
2781  
2782  	if (is_remount) {
2783  		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2784  		    (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2785  			ext4_msg(NULL, KERN_ERR, "can't mount with "
2786  				 "both data=journal and dax");
2787  			return -EINVAL;
2788  		}
2789  
2790  		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2791  		    (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2792  		     (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2793  fail_dax_change_remount:
2794  			ext4_msg(NULL, KERN_ERR, "can't change "
2795  				 "dax mount option while remounting");
2796  			return -EINVAL;
2797  		} else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2798  			 (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2799  			  (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2800  			goto fail_dax_change_remount;
2801  		} else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2802  			   ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2803  			    (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2804  			    !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2805  			goto fail_dax_change_remount;
2806  		}
2807  	}
2808  
2809  	return ext4_check_quota_consistency(fc, sb);
2810  }
2811  
ext4_apply_options(struct fs_context * fc,struct super_block * sb)2812  static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
2813  {
2814  	struct ext4_fs_context *ctx = fc->fs_private;
2815  	struct ext4_sb_info *sbi = fc->s_fs_info;
2816  
2817  	sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2818  	sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2819  	sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2820  	sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2821  	sb->s_flags &= ~ctx->mask_s_flags;
2822  	sb->s_flags |= ctx->vals_s_flags;
2823  
2824  #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2825  	APPLY(s_commit_interval);
2826  	APPLY(s_stripe);
2827  	APPLY(s_max_batch_time);
2828  	APPLY(s_min_batch_time);
2829  	APPLY(s_want_extra_isize);
2830  	APPLY(s_inode_readahead_blks);
2831  	APPLY(s_max_dir_size_kb);
2832  	APPLY(s_li_wait_mult);
2833  	APPLY(s_resgid);
2834  	APPLY(s_resuid);
2835  
2836  #ifdef CONFIG_EXT4_DEBUG
2837  	APPLY(s_fc_debug_max_replay);
2838  #endif
2839  
2840  	ext4_apply_quota_options(fc, sb);
2841  	ext4_apply_test_dummy_encryption(ctx, sb);
2842  }
2843  
2844  
ext4_validate_options(struct fs_context * fc)2845  static int ext4_validate_options(struct fs_context *fc)
2846  {
2847  #ifdef CONFIG_QUOTA
2848  	struct ext4_fs_context *ctx = fc->fs_private;
2849  	char *usr_qf_name, *grp_qf_name;
2850  
2851  	usr_qf_name = ctx->s_qf_names[USRQUOTA];
2852  	grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2853  
2854  	if (usr_qf_name || grp_qf_name) {
2855  		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2856  			ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2857  
2858  		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2859  			ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2860  
2861  		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2862  		    ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
2863  			ext4_msg(NULL, KERN_ERR, "old and new quota "
2864  				 "format mixing");
2865  			return -EINVAL;
2866  		}
2867  	}
2868  #endif
2869  	return 1;
2870  }
2871  
ext4_show_quota_options(struct seq_file * seq,struct super_block * sb)2872  static inline void ext4_show_quota_options(struct seq_file *seq,
2873  					   struct super_block *sb)
2874  {
2875  #if defined(CONFIG_QUOTA)
2876  	struct ext4_sb_info *sbi = EXT4_SB(sb);
2877  	char *usr_qf_name, *grp_qf_name;
2878  
2879  	if (sbi->s_jquota_fmt) {
2880  		char *fmtname = "";
2881  
2882  		switch (sbi->s_jquota_fmt) {
2883  		case QFMT_VFS_OLD:
2884  			fmtname = "vfsold";
2885  			break;
2886  		case QFMT_VFS_V0:
2887  			fmtname = "vfsv0";
2888  			break;
2889  		case QFMT_VFS_V1:
2890  			fmtname = "vfsv1";
2891  			break;
2892  		}
2893  		seq_printf(seq, ",jqfmt=%s", fmtname);
2894  	}
2895  
2896  	rcu_read_lock();
2897  	usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2898  	grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2899  	if (usr_qf_name)
2900  		seq_show_option(seq, "usrjquota", usr_qf_name);
2901  	if (grp_qf_name)
2902  		seq_show_option(seq, "grpjquota", grp_qf_name);
2903  	rcu_read_unlock();
2904  #endif
2905  }
2906  
token2str(int token)2907  static const char *token2str(int token)
2908  {
2909  	const struct fs_parameter_spec *spec;
2910  
2911  	for (spec = ext4_param_specs; spec->name != NULL; spec++)
2912  		if (spec->opt == token && !spec->type)
2913  			break;
2914  	return spec->name;
2915  }
2916  
2917  /*
2918   * Show an option if
2919   *  - it's set to a non-default value OR
2920   *  - if the per-sb default is different from the global default
2921   */
_ext4_show_options(struct seq_file * seq,struct super_block * sb,int nodefs)2922  static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2923  			      int nodefs)
2924  {
2925  	struct ext4_sb_info *sbi = EXT4_SB(sb);
2926  	struct ext4_super_block *es = sbi->s_es;
2927  	int def_errors;
2928  	const struct mount_opts *m;
2929  	char sep = nodefs ? '\n' : ',';
2930  
2931  #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2932  #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2933  
2934  	if (sbi->s_sb_block != 1)
2935  		SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2936  
2937  	for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2938  		int want_set = m->flags & MOPT_SET;
2939  		int opt_2 = m->flags & MOPT_2;
2940  		unsigned int mount_opt, def_mount_opt;
2941  
2942  		if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2943  		    m->flags & MOPT_SKIP)
2944  			continue;
2945  
2946  		if (opt_2) {
2947  			mount_opt = sbi->s_mount_opt2;
2948  			def_mount_opt = sbi->s_def_mount_opt2;
2949  		} else {
2950  			mount_opt = sbi->s_mount_opt;
2951  			def_mount_opt = sbi->s_def_mount_opt;
2952  		}
2953  		/* skip if same as the default */
2954  		if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
2955  			continue;
2956  		/* select Opt_noFoo vs Opt_Foo */
2957  		if ((want_set &&
2958  		     (mount_opt & m->mount_opt) != m->mount_opt) ||
2959  		    (!want_set && (mount_opt & m->mount_opt)))
2960  			continue;
2961  		SEQ_OPTS_PRINT("%s", token2str(m->token));
2962  	}
2963  
2964  	if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2965  	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2966  		SEQ_OPTS_PRINT("resuid=%u",
2967  				from_kuid_munged(&init_user_ns, sbi->s_resuid));
2968  	if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2969  	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2970  		SEQ_OPTS_PRINT("resgid=%u",
2971  				from_kgid_munged(&init_user_ns, sbi->s_resgid));
2972  	def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2973  	if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2974  		SEQ_OPTS_PUTS("errors=remount-ro");
2975  	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2976  		SEQ_OPTS_PUTS("errors=continue");
2977  	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2978  		SEQ_OPTS_PUTS("errors=panic");
2979  	if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2980  		SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2981  	if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2982  		SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2983  	if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2984  		SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2985  	if (nodefs || sbi->s_stripe)
2986  		SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2987  	if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2988  			(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
2989  		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2990  			SEQ_OPTS_PUTS("data=journal");
2991  		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2992  			SEQ_OPTS_PUTS("data=ordered");
2993  		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2994  			SEQ_OPTS_PUTS("data=writeback");
2995  	}
2996  	if (nodefs ||
2997  	    sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2998  		SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2999  			       sbi->s_inode_readahead_blks);
3000  
3001  	if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
3002  		       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
3003  		SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
3004  	if (nodefs || sbi->s_max_dir_size_kb)
3005  		SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
3006  	if (test_opt(sb, DATA_ERR_ABORT))
3007  		SEQ_OPTS_PUTS("data_err=abort");
3008  
3009  	fscrypt_show_test_dummy_encryption(seq, sep, sb);
3010  
3011  	if (sb->s_flags & SB_INLINECRYPT)
3012  		SEQ_OPTS_PUTS("inlinecrypt");
3013  
3014  	if (test_opt(sb, DAX_ALWAYS)) {
3015  		if (IS_EXT2_SB(sb))
3016  			SEQ_OPTS_PUTS("dax");
3017  		else
3018  			SEQ_OPTS_PUTS("dax=always");
3019  	} else if (test_opt2(sb, DAX_NEVER)) {
3020  		SEQ_OPTS_PUTS("dax=never");
3021  	} else if (test_opt2(sb, DAX_INODE)) {
3022  		SEQ_OPTS_PUTS("dax=inode");
3023  	}
3024  
3025  	if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3026  			!test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3027  		SEQ_OPTS_PUTS("mb_optimize_scan=0");
3028  	} else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
3029  			test_opt2(sb, MB_OPTIMIZE_SCAN)) {
3030  		SEQ_OPTS_PUTS("mb_optimize_scan=1");
3031  	}
3032  
3033  	ext4_show_quota_options(seq, sb);
3034  	return 0;
3035  }
3036  
ext4_show_options(struct seq_file * seq,struct dentry * root)3037  static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3038  {
3039  	return _ext4_show_options(seq, root->d_sb, 0);
3040  }
3041  
ext4_seq_options_show(struct seq_file * seq,void * offset)3042  int ext4_seq_options_show(struct seq_file *seq, void *offset)
3043  {
3044  	struct super_block *sb = seq->private;
3045  	int rc;
3046  
3047  	seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
3048  	rc = _ext4_show_options(seq, sb, 1);
3049  	seq_putc(seq, '\n');
3050  	return rc;
3051  }
3052  
ext4_setup_super(struct super_block * sb,struct ext4_super_block * es,int read_only)3053  static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
3054  			    int read_only)
3055  {
3056  	struct ext4_sb_info *sbi = EXT4_SB(sb);
3057  	int err = 0;
3058  
3059  	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
3060  		ext4_msg(sb, KERN_ERR, "revision level too high, "
3061  			 "forcing read-only mode");
3062  		err = -EROFS;
3063  		goto done;
3064  	}
3065  	if (read_only)
3066  		goto done;
3067  	if (!(sbi->s_mount_state & EXT4_VALID_FS))
3068  		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3069  			 "running e2fsck is recommended");
3070  	else if (sbi->s_mount_state & EXT4_ERROR_FS)
3071  		ext4_msg(sb, KERN_WARNING,
3072  			 "warning: mounting fs with errors, "
3073  			 "running e2fsck is recommended");
3074  	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
3075  		 le16_to_cpu(es->s_mnt_count) >=
3076  		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
3077  		ext4_msg(sb, KERN_WARNING,
3078  			 "warning: maximal mount count reached, "
3079  			 "running e2fsck is recommended");
3080  	else if (le32_to_cpu(es->s_checkinterval) &&
3081  		 (ext4_get_tstamp(es, s_lastcheck) +
3082  		  le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
3083  		ext4_msg(sb, KERN_WARNING,
3084  			 "warning: checktime reached, "
3085  			 "running e2fsck is recommended");
3086  	if (!sbi->s_journal)
3087  		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
3088  	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
3089  		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
3090  	le16_add_cpu(&es->s_mnt_count, 1);
3091  	ext4_update_tstamp(es, s_mtime);
3092  	if (sbi->s_journal) {
3093  		ext4_set_feature_journal_needs_recovery(sb);
3094  		if (ext4_has_feature_orphan_file(sb))
3095  			ext4_set_feature_orphan_present(sb);
3096  	}
3097  
3098  	err = ext4_commit_super(sb);
3099  done:
3100  	if (test_opt(sb, DEBUG))
3101  		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
3102  				"bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
3103  			sb->s_blocksize,
3104  			sbi->s_groups_count,
3105  			EXT4_BLOCKS_PER_GROUP(sb),
3106  			EXT4_INODES_PER_GROUP(sb),
3107  			sbi->s_mount_opt, sbi->s_mount_opt2);
3108  	return err;
3109  }
3110  
ext4_alloc_flex_bg_array(struct super_block * sb,ext4_group_t ngroup)3111  int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3112  {
3113  	struct ext4_sb_info *sbi = EXT4_SB(sb);
3114  	struct flex_groups **old_groups, **new_groups;
3115  	int size, i, j;
3116  
3117  	if (!sbi->s_log_groups_per_flex)
3118  		return 0;
3119  
3120  	size = ext4_flex_group(sbi, ngroup - 1) + 1;
3121  	if (size <= sbi->s_flex_groups_allocated)
3122  		return 0;
3123  
3124  	new_groups = kvzalloc(roundup_pow_of_two(size *
3125  			      sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
3126  	if (!new_groups) {
3127  		ext4_msg(sb, KERN_ERR,
3128  			 "not enough memory for %d flex group pointers", size);
3129  		return -ENOMEM;
3130  	}
3131  	for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3132  		new_groups[i] = kvzalloc(roundup_pow_of_two(
3133  					 sizeof(struct flex_groups)),
3134  					 GFP_KERNEL);
3135  		if (!new_groups[i]) {
3136  			for (j = sbi->s_flex_groups_allocated; j < i; j++)
3137  				kvfree(new_groups[j]);
3138  			kvfree(new_groups);
3139  			ext4_msg(sb, KERN_ERR,
3140  				 "not enough memory for %d flex groups", size);
3141  			return -ENOMEM;
3142  		}
3143  	}
3144  	rcu_read_lock();
3145  	old_groups = rcu_dereference(sbi->s_flex_groups);
3146  	if (old_groups)
3147  		memcpy(new_groups, old_groups,
3148  		       (sbi->s_flex_groups_allocated *
3149  			sizeof(struct flex_groups *)));
3150  	rcu_read_unlock();
3151  	rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3152  	sbi->s_flex_groups_allocated = size;
3153  	if (old_groups)
3154  		ext4_kvfree_array_rcu(old_groups);
3155  	return 0;
3156  }
3157  
ext4_fill_flex_info(struct super_block * sb)3158  static int ext4_fill_flex_info(struct super_block *sb)
3159  {
3160  	struct ext4_sb_info *sbi = EXT4_SB(sb);
3161  	struct ext4_group_desc *gdp = NULL;
3162  	struct flex_groups *fg;
3163  	ext4_group_t flex_group;
3164  	int i, err;
3165  
3166  	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
3167  	if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
3168  		sbi->s_log_groups_per_flex = 0;
3169  		return 1;
3170  	}
3171  
3172  	err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3173  	if (err)
3174  		goto failed;
3175  
3176  	for (i = 0; i < sbi->s_groups_count; i++) {
3177  		gdp = ext4_get_group_desc(sb, i, NULL);
3178  
3179  		flex_group = ext4_flex_group(sbi, i);
3180  		fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3181  		atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
3182  		atomic64_add(ext4_free_group_clusters(sb, gdp),
3183  			     &fg->free_clusters);
3184  		atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
3185  	}
3186  
3187  	return 1;
3188  failed:
3189  	return 0;
3190  }
3191  
ext4_group_desc_csum(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3192  static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
3193  				   struct ext4_group_desc *gdp)
3194  {
3195  	int offset = offsetof(struct ext4_group_desc, bg_checksum);
3196  	__u16 crc = 0;
3197  	__le32 le_group = cpu_to_le32(block_group);
3198  	struct ext4_sb_info *sbi = EXT4_SB(sb);
3199  
3200  	if (ext4_has_metadata_csum(sbi->s_sb)) {
3201  		/* Use new metadata_csum algorithm */
3202  		__u32 csum32;
3203  		__u16 dummy_csum = 0;
3204  
3205  		csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
3206  				     sizeof(le_group));
3207  		csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
3208  		csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
3209  				     sizeof(dummy_csum));
3210  		offset += sizeof(dummy_csum);
3211  		if (offset < sbi->s_desc_size)
3212  			csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
3213  					     sbi->s_desc_size - offset);
3214  
3215  		crc = csum32 & 0xFFFF;
3216  		goto out;
3217  	}
3218  
3219  	/* old crc16 code */
3220  	if (!ext4_has_feature_gdt_csum(sb))
3221  		return 0;
3222  
3223  	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3224  	crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3225  	crc = crc16(crc, (__u8 *)gdp, offset);
3226  	offset += sizeof(gdp->bg_checksum); /* skip checksum */
3227  	/* for checksum of struct ext4_group_desc do the rest...*/
3228  	if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
3229  		crc = crc16(crc, (__u8 *)gdp + offset,
3230  			    sbi->s_desc_size - offset);
3231  
3232  out:
3233  	return cpu_to_le16(crc);
3234  }
3235  
ext4_group_desc_csum_verify(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3236  int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
3237  				struct ext4_group_desc *gdp)
3238  {
3239  	if (ext4_has_group_desc_csum(sb) &&
3240  	    (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
3241  		return 0;
3242  
3243  	return 1;
3244  }
3245  
ext4_group_desc_csum_set(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3246  void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3247  			      struct ext4_group_desc *gdp)
3248  {
3249  	if (!ext4_has_group_desc_csum(sb))
3250  		return;
3251  	gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
3252  }
3253  
3254  /* Called at mount-time, super-block is locked */
ext4_check_descriptors(struct super_block * sb,ext4_fsblk_t sb_block,ext4_group_t * first_not_zeroed)3255  static int ext4_check_descriptors(struct super_block *sb,
3256  				  ext4_fsblk_t sb_block,
3257  				  ext4_group_t *first_not_zeroed)
3258  {
3259  	struct ext4_sb_info *sbi = EXT4_SB(sb);
3260  	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3261  	ext4_fsblk_t last_block;
3262  	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
3263  	ext4_fsblk_t block_bitmap;
3264  	ext4_fsblk_t inode_bitmap;
3265  	ext4_fsblk_t inode_table;
3266  	int flexbg_flag = 0;
3267  	ext4_group_t i, grp = sbi->s_groups_count;
3268  
3269  	if (ext4_has_feature_flex_bg(sb))
3270  		flexbg_flag = 1;
3271  
3272  	ext4_debug("Checking group descriptors");
3273  
3274  	for (i = 0; i < sbi->s_groups_count; i++) {
3275  		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3276  
3277  		if (i == sbi->s_groups_count - 1 || flexbg_flag)
3278  			last_block = ext4_blocks_count(sbi->s_es) - 1;
3279  		else
3280  			last_block = first_block +
3281  				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
3282  
3283  		if ((grp == sbi->s_groups_count) &&
3284  		   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3285  			grp = i;
3286  
3287  		block_bitmap = ext4_block_bitmap(sb, gdp);
3288  		if (block_bitmap == sb_block) {
3289  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3290  				 "Block bitmap for group %u overlaps "
3291  				 "superblock", i);
3292  			if (!sb_rdonly(sb))
3293  				return 0;
3294  		}
3295  		if (block_bitmap >= sb_block + 1 &&
3296  		    block_bitmap <= last_bg_block) {
3297  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3298  				 "Block bitmap for group %u overlaps "
3299  				 "block group descriptors", i);
3300  			if (!sb_rdonly(sb))
3301  				return 0;
3302  		}
3303  		if (block_bitmap < first_block || block_bitmap > last_block) {
3304  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3305  			       "Block bitmap for group %u not in group "
3306  			       "(block %llu)!", i, block_bitmap);
3307  			return 0;
3308  		}
3309  		inode_bitmap = ext4_inode_bitmap(sb, gdp);
3310  		if (inode_bitmap == sb_block) {
3311  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3312  				 "Inode bitmap for group %u overlaps "
3313  				 "superblock", i);
3314  			if (!sb_rdonly(sb))
3315  				return 0;
3316  		}
3317  		if (inode_bitmap >= sb_block + 1 &&
3318  		    inode_bitmap <= last_bg_block) {
3319  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3320  				 "Inode bitmap for group %u overlaps "
3321  				 "block group descriptors", i);
3322  			if (!sb_rdonly(sb))
3323  				return 0;
3324  		}
3325  		if (inode_bitmap < first_block || inode_bitmap > last_block) {
3326  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3327  			       "Inode bitmap for group %u not in group "
3328  			       "(block %llu)!", i, inode_bitmap);
3329  			return 0;
3330  		}
3331  		inode_table = ext4_inode_table(sb, gdp);
3332  		if (inode_table == sb_block) {
3333  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3334  				 "Inode table for group %u overlaps "
3335  				 "superblock", i);
3336  			if (!sb_rdonly(sb))
3337  				return 0;
3338  		}
3339  		if (inode_table >= sb_block + 1 &&
3340  		    inode_table <= last_bg_block) {
3341  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3342  				 "Inode table for group %u overlaps "
3343  				 "block group descriptors", i);
3344  			if (!sb_rdonly(sb))
3345  				return 0;
3346  		}
3347  		if (inode_table < first_block ||
3348  		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
3349  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3350  			       "Inode table for group %u not in group "
3351  			       "(block %llu)!", i, inode_table);
3352  			return 0;
3353  		}
3354  		ext4_lock_group(sb, i);
3355  		if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
3356  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3357  				 "Checksum for group %u failed (%u!=%u)",
3358  				 i, le16_to_cpu(ext4_group_desc_csum(sb, i,
3359  				     gdp)), le16_to_cpu(gdp->bg_checksum));
3360  			if (!sb_rdonly(sb)) {
3361  				ext4_unlock_group(sb, i);
3362  				return 0;
3363  			}
3364  		}
3365  		ext4_unlock_group(sb, i);
3366  		if (!flexbg_flag)
3367  			first_block += EXT4_BLOCKS_PER_GROUP(sb);
3368  	}
3369  	if (NULL != first_not_zeroed)
3370  		*first_not_zeroed = grp;
3371  	return 1;
3372  }
3373  
3374  /*
3375   * Maximal extent format file size.
3376   * Resulting logical blkno at s_maxbytes must fit in our on-disk
3377   * extent format containers, within a sector_t, and within i_blocks
3378   * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
3379   * so that won't be a limiting factor.
3380   *
3381   * However there is other limiting factor. We do store extents in the form
3382   * of starting block and length, hence the resulting length of the extent
3383   * covering maximum file size must fit into on-disk format containers as
3384   * well. Given that length is always by 1 unit bigger than max unit (because
3385   * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3386   *
3387   * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3388   */
ext4_max_size(int blkbits,int has_huge_files)3389  static loff_t ext4_max_size(int blkbits, int has_huge_files)
3390  {
3391  	loff_t res;
3392  	loff_t upper_limit = MAX_LFS_FILESIZE;
3393  
3394  	BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3395  
3396  	if (!has_huge_files) {
3397  		upper_limit = (1LL << 32) - 1;
3398  
3399  		/* total blocks in file system block size */
3400  		upper_limit >>= (blkbits - 9);
3401  		upper_limit <<= blkbits;
3402  	}
3403  
3404  	/*
3405  	 * 32-bit extent-start container, ee_block. We lower the maxbytes
3406  	 * by one fs block, so ee_len can cover the extent of maximum file
3407  	 * size
3408  	 */
3409  	res = (1LL << 32) - 1;
3410  	res <<= blkbits;
3411  
3412  	/* Sanity check against vm- & vfs- imposed limits */
3413  	if (res > upper_limit)
3414  		res = upper_limit;
3415  
3416  	return res;
3417  }
3418  
3419  /*
3420   * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
3421   * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3422   * We need to be 1 filesystem block less than the 2^48 sector limit.
3423   */
ext4_max_bitmap_size(int bits,int has_huge_files)3424  static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
3425  {
3426  	loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
3427  	int meta_blocks;
3428  	unsigned int ppb = 1 << (bits - 2);
3429  
3430  	/*
3431  	 * This is calculated to be the largest file size for a dense, block
3432  	 * mapped file such that the file's total number of 512-byte sectors,
3433  	 * including data and all indirect blocks, does not exceed (2^48 - 1).
3434  	 *
3435  	 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3436  	 * number of 512-byte sectors of the file.
3437  	 */
3438  	if (!has_huge_files) {
3439  		/*
3440  		 * !has_huge_files or implies that the inode i_block field
3441  		 * represents total file blocks in 2^32 512-byte sectors ==
3442  		 * size of vfs inode i_blocks * 8
3443  		 */
3444  		upper_limit = (1LL << 32) - 1;
3445  
3446  		/* total blocks in file system block size */
3447  		upper_limit >>= (bits - 9);
3448  
3449  	} else {
3450  		/*
3451  		 * We use 48 bit ext4_inode i_blocks
3452  		 * With EXT4_HUGE_FILE_FL set the i_blocks
3453  		 * represent total number of blocks in
3454  		 * file system block size
3455  		 */
3456  		upper_limit = (1LL << 48) - 1;
3457  
3458  	}
3459  
3460  	/* Compute how many blocks we can address by block tree */
3461  	res += ppb;
3462  	res += ppb * ppb;
3463  	res += ((loff_t)ppb) * ppb * ppb;
3464  	/* Compute how many metadata blocks are needed */
3465  	meta_blocks = 1;
3466  	meta_blocks += 1 + ppb;
3467  	meta_blocks += 1 + ppb + ppb * ppb;
3468  	/* Does block tree limit file size? */
3469  	if (res + meta_blocks <= upper_limit)
3470  		goto check_lfs;
3471  
3472  	res = upper_limit;
3473  	/* How many metadata blocks are needed for addressing upper_limit? */
3474  	upper_limit -= EXT4_NDIR_BLOCKS;
3475  	/* indirect blocks */
3476  	meta_blocks = 1;
3477  	upper_limit -= ppb;
3478  	/* double indirect blocks */
3479  	if (upper_limit < ppb * ppb) {
3480  		meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
3481  		res -= meta_blocks;
3482  		goto check_lfs;
3483  	}
3484  	meta_blocks += 1 + ppb;
3485  	upper_limit -= ppb * ppb;
3486  	/* tripple indirect blocks for the rest */
3487  	meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
3488  		DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
3489  	res -= meta_blocks;
3490  check_lfs:
3491  	res <<= bits;
3492  	if (res > MAX_LFS_FILESIZE)
3493  		res = MAX_LFS_FILESIZE;
3494  
3495  	return res;
3496  }
3497  
descriptor_loc(struct super_block * sb,ext4_fsblk_t logical_sb_block,int nr)3498  static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3499  				   ext4_fsblk_t logical_sb_block, int nr)
3500  {
3501  	struct ext4_sb_info *sbi = EXT4_SB(sb);
3502  	ext4_group_t bg, first_meta_bg;
3503  	int has_super = 0;
3504  
3505  	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3506  
3507  	if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3508  		return logical_sb_block + nr + 1;
3509  	bg = sbi->s_desc_per_block * nr;
3510  	if (ext4_bg_has_super(sb, bg))
3511  		has_super = 1;
3512  
3513  	/*
3514  	 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3515  	 * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
3516  	 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3517  	 * compensate.
3518  	 */
3519  	if (sb->s_blocksize == 1024 && nr == 0 &&
3520  	    le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3521  		has_super++;
3522  
3523  	return (has_super + ext4_group_first_block_no(sb, bg));
3524  }
3525  
3526  /**
3527   * ext4_get_stripe_size: Get the stripe size.
3528   * @sbi: In memory super block info
3529   *
3530   * If we have specified it via mount option, then
3531   * use the mount option value. If the value specified at mount time is
3532   * greater than the blocks per group use the super block value.
3533   * If the super block value is greater than blocks per group return 0.
3534   * Allocator needs it be less than blocks per group.
3535   *
3536   */
ext4_get_stripe_size(struct ext4_sb_info * sbi)3537  static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3538  {
3539  	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3540  	unsigned long stripe_width =
3541  			le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3542  	int ret;
3543  
3544  	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3545  		ret = sbi->s_stripe;
3546  	else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3547  		ret = stripe_width;
3548  	else if (stride && stride <= sbi->s_blocks_per_group)
3549  		ret = stride;
3550  	else
3551  		ret = 0;
3552  
3553  	/*
3554  	 * If the stripe width is 1, this makes no sense and
3555  	 * we set it to 0 to turn off stripe handling code.
3556  	 */
3557  	if (ret <= 1)
3558  		ret = 0;
3559  
3560  	return ret;
3561  }
3562  
3563  /*
3564   * Check whether this filesystem can be mounted based on
3565   * the features present and the RDONLY/RDWR mount requested.
3566   * Returns 1 if this filesystem can be mounted as requested,
3567   * 0 if it cannot be.
3568   */
ext4_feature_set_ok(struct super_block * sb,int readonly)3569  int ext4_feature_set_ok(struct super_block *sb, int readonly)
3570  {
3571  	if (ext4_has_unknown_ext4_incompat_features(sb)) {
3572  		ext4_msg(sb, KERN_ERR,
3573  			"Couldn't mount because of "
3574  			"unsupported optional features (%x)",
3575  			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3576  			~EXT4_FEATURE_INCOMPAT_SUPP));
3577  		return 0;
3578  	}
3579  
3580  	if (!IS_ENABLED(CONFIG_UNICODE) && ext4_has_feature_casefold(sb)) {
3581  		ext4_msg(sb, KERN_ERR,
3582  			 "Filesystem with casefold feature cannot be "
3583  			 "mounted without CONFIG_UNICODE");
3584  		return 0;
3585  	}
3586  
3587  	if (readonly)
3588  		return 1;
3589  
3590  	if (ext4_has_feature_readonly(sb)) {
3591  		ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3592  		sb->s_flags |= SB_RDONLY;
3593  		return 1;
3594  	}
3595  
3596  	/* Check that feature set is OK for a read-write mount */
3597  	if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3598  		ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3599  			 "unsupported optional features (%x)",
3600  			 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3601  				~EXT4_FEATURE_RO_COMPAT_SUPP));
3602  		return 0;
3603  	}
3604  	if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3605  		ext4_msg(sb, KERN_ERR,
3606  			 "Can't support bigalloc feature without "
3607  			 "extents feature\n");
3608  		return 0;
3609  	}
3610  
3611  #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3612  	if (!readonly && (ext4_has_feature_quota(sb) ||
3613  			  ext4_has_feature_project(sb))) {
3614  		ext4_msg(sb, KERN_ERR,
3615  			 "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3616  		return 0;
3617  	}
3618  #endif  /* CONFIG_QUOTA */
3619  	return 1;
3620  }
3621  
3622  /*
3623   * This function is called once a day if we have errors logged
3624   * on the file system
3625   */
print_daily_error_info(struct timer_list * t)3626  static void print_daily_error_info(struct timer_list *t)
3627  {
3628  	struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3629  	struct super_block *sb = sbi->s_sb;
3630  	struct ext4_super_block *es = sbi->s_es;
3631  
3632  	if (es->s_error_count)
3633  		/* fsck newer than v1.41.13 is needed to clean this condition. */
3634  		ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3635  			 le32_to_cpu(es->s_error_count));
3636  	if (es->s_first_error_time) {
3637  		printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3638  		       sb->s_id,
3639  		       ext4_get_tstamp(es, s_first_error_time),
3640  		       (int) sizeof(es->s_first_error_func),
3641  		       es->s_first_error_func,
3642  		       le32_to_cpu(es->s_first_error_line));
3643  		if (es->s_first_error_ino)
3644  			printk(KERN_CONT ": inode %u",
3645  			       le32_to_cpu(es->s_first_error_ino));
3646  		if (es->s_first_error_block)
3647  			printk(KERN_CONT ": block %llu", (unsigned long long)
3648  			       le64_to_cpu(es->s_first_error_block));
3649  		printk(KERN_CONT "\n");
3650  	}
3651  	if (es->s_last_error_time) {
3652  		printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3653  		       sb->s_id,
3654  		       ext4_get_tstamp(es, s_last_error_time),
3655  		       (int) sizeof(es->s_last_error_func),
3656  		       es->s_last_error_func,
3657  		       le32_to_cpu(es->s_last_error_line));
3658  		if (es->s_last_error_ino)
3659  			printk(KERN_CONT ": inode %u",
3660  			       le32_to_cpu(es->s_last_error_ino));
3661  		if (es->s_last_error_block)
3662  			printk(KERN_CONT ": block %llu", (unsigned long long)
3663  			       le64_to_cpu(es->s_last_error_block));
3664  		printk(KERN_CONT "\n");
3665  	}
3666  	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3667  }
3668  
3669  /* Find next suitable group and run ext4_init_inode_table */
ext4_run_li_request(struct ext4_li_request * elr)3670  static int ext4_run_li_request(struct ext4_li_request *elr)
3671  {
3672  	struct ext4_group_desc *gdp = NULL;
3673  	struct super_block *sb = elr->lr_super;
3674  	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3675  	ext4_group_t group = elr->lr_next_group;
3676  	unsigned int prefetch_ios = 0;
3677  	int ret = 0;
3678  	int nr = EXT4_SB(sb)->s_mb_prefetch;
3679  	u64 start_time;
3680  
3681  	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3682  		elr->lr_next_group = ext4_mb_prefetch(sb, group, nr, &prefetch_ios);
3683  		ext4_mb_prefetch_fini(sb, elr->lr_next_group, nr);
3684  		trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group, nr);
3685  		if (group >= elr->lr_next_group) {
3686  			ret = 1;
3687  			if (elr->lr_first_not_zeroed != ngroups &&
3688  			    !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3689  				elr->lr_next_group = elr->lr_first_not_zeroed;
3690  				elr->lr_mode = EXT4_LI_MODE_ITABLE;
3691  				ret = 0;
3692  			}
3693  		}
3694  		return ret;
3695  	}
3696  
3697  	for (; group < ngroups; group++) {
3698  		gdp = ext4_get_group_desc(sb, group, NULL);
3699  		if (!gdp) {
3700  			ret = 1;
3701  			break;
3702  		}
3703  
3704  		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3705  			break;
3706  	}
3707  
3708  	if (group >= ngroups)
3709  		ret = 1;
3710  
3711  	if (!ret) {
3712  		start_time = ktime_get_real_ns();
3713  		ret = ext4_init_inode_table(sb, group,
3714  					    elr->lr_timeout ? 0 : 1);
3715  		trace_ext4_lazy_itable_init(sb, group);
3716  		if (elr->lr_timeout == 0) {
3717  			elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3718  				EXT4_SB(elr->lr_super)->s_li_wait_mult);
3719  		}
3720  		elr->lr_next_sched = jiffies + elr->lr_timeout;
3721  		elr->lr_next_group = group + 1;
3722  	}
3723  	return ret;
3724  }
3725  
3726  /*
3727   * Remove lr_request from the list_request and free the
3728   * request structure. Should be called with li_list_mtx held
3729   */
ext4_remove_li_request(struct ext4_li_request * elr)3730  static void ext4_remove_li_request(struct ext4_li_request *elr)
3731  {
3732  	if (!elr)
3733  		return;
3734  
3735  	list_del(&elr->lr_request);
3736  	EXT4_SB(elr->lr_super)->s_li_request = NULL;
3737  	kfree(elr);
3738  }
3739  
ext4_unregister_li_request(struct super_block * sb)3740  static void ext4_unregister_li_request(struct super_block *sb)
3741  {
3742  	mutex_lock(&ext4_li_mtx);
3743  	if (!ext4_li_info) {
3744  		mutex_unlock(&ext4_li_mtx);
3745  		return;
3746  	}
3747  
3748  	mutex_lock(&ext4_li_info->li_list_mtx);
3749  	ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3750  	mutex_unlock(&ext4_li_info->li_list_mtx);
3751  	mutex_unlock(&ext4_li_mtx);
3752  }
3753  
3754  static struct task_struct *ext4_lazyinit_task;
3755  
3756  /*
3757   * This is the function where ext4lazyinit thread lives. It walks
3758   * through the request list searching for next scheduled filesystem.
3759   * When such a fs is found, run the lazy initialization request
3760   * (ext4_rn_li_request) and keep track of the time spend in this
3761   * function. Based on that time we compute next schedule time of
3762   * the request. When walking through the list is complete, compute
3763   * next waking time and put itself into sleep.
3764   */
ext4_lazyinit_thread(void * arg)3765  static int ext4_lazyinit_thread(void *arg)
3766  {
3767  	struct ext4_lazy_init *eli = arg;
3768  	struct list_head *pos, *n;
3769  	struct ext4_li_request *elr;
3770  	unsigned long next_wakeup, cur;
3771  
3772  	BUG_ON(NULL == eli);
3773  	set_freezable();
3774  
3775  cont_thread:
3776  	while (true) {
3777  		next_wakeup = MAX_JIFFY_OFFSET;
3778  
3779  		mutex_lock(&eli->li_list_mtx);
3780  		if (list_empty(&eli->li_request_list)) {
3781  			mutex_unlock(&eli->li_list_mtx);
3782  			goto exit_thread;
3783  		}
3784  		list_for_each_safe(pos, n, &eli->li_request_list) {
3785  			int err = 0;
3786  			int progress = 0;
3787  			elr = list_entry(pos, struct ext4_li_request,
3788  					 lr_request);
3789  
3790  			if (time_before(jiffies, elr->lr_next_sched)) {
3791  				if (time_before(elr->lr_next_sched, next_wakeup))
3792  					next_wakeup = elr->lr_next_sched;
3793  				continue;
3794  			}
3795  			if (down_read_trylock(&elr->lr_super->s_umount)) {
3796  				if (sb_start_write_trylock(elr->lr_super)) {
3797  					progress = 1;
3798  					/*
3799  					 * We hold sb->s_umount, sb can not
3800  					 * be removed from the list, it is
3801  					 * now safe to drop li_list_mtx
3802  					 */
3803  					mutex_unlock(&eli->li_list_mtx);
3804  					err = ext4_run_li_request(elr);
3805  					sb_end_write(elr->lr_super);
3806  					mutex_lock(&eli->li_list_mtx);
3807  					n = pos->next;
3808  				}
3809  				up_read((&elr->lr_super->s_umount));
3810  			}
3811  			/* error, remove the lazy_init job */
3812  			if (err) {
3813  				ext4_remove_li_request(elr);
3814  				continue;
3815  			}
3816  			if (!progress) {
3817  				elr->lr_next_sched = jiffies +
3818  					get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3819  			}
3820  			if (time_before(elr->lr_next_sched, next_wakeup))
3821  				next_wakeup = elr->lr_next_sched;
3822  		}
3823  		mutex_unlock(&eli->li_list_mtx);
3824  
3825  		try_to_freeze();
3826  
3827  		cur = jiffies;
3828  		if ((time_after_eq(cur, next_wakeup)) ||
3829  		    (MAX_JIFFY_OFFSET == next_wakeup)) {
3830  			cond_resched();
3831  			continue;
3832  		}
3833  
3834  		schedule_timeout_interruptible(next_wakeup - cur);
3835  
3836  		if (kthread_should_stop()) {
3837  			ext4_clear_request_list();
3838  			goto exit_thread;
3839  		}
3840  	}
3841  
3842  exit_thread:
3843  	/*
3844  	 * It looks like the request list is empty, but we need
3845  	 * to check it under the li_list_mtx lock, to prevent any
3846  	 * additions into it, and of course we should lock ext4_li_mtx
3847  	 * to atomically free the list and ext4_li_info, because at
3848  	 * this point another ext4 filesystem could be registering
3849  	 * new one.
3850  	 */
3851  	mutex_lock(&ext4_li_mtx);
3852  	mutex_lock(&eli->li_list_mtx);
3853  	if (!list_empty(&eli->li_request_list)) {
3854  		mutex_unlock(&eli->li_list_mtx);
3855  		mutex_unlock(&ext4_li_mtx);
3856  		goto cont_thread;
3857  	}
3858  	mutex_unlock(&eli->li_list_mtx);
3859  	kfree(ext4_li_info);
3860  	ext4_li_info = NULL;
3861  	mutex_unlock(&ext4_li_mtx);
3862  
3863  	return 0;
3864  }
3865  
ext4_clear_request_list(void)3866  static void ext4_clear_request_list(void)
3867  {
3868  	struct list_head *pos, *n;
3869  	struct ext4_li_request *elr;
3870  
3871  	mutex_lock(&ext4_li_info->li_list_mtx);
3872  	list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3873  		elr = list_entry(pos, struct ext4_li_request,
3874  				 lr_request);
3875  		ext4_remove_li_request(elr);
3876  	}
3877  	mutex_unlock(&ext4_li_info->li_list_mtx);
3878  }
3879  
ext4_run_lazyinit_thread(void)3880  static int ext4_run_lazyinit_thread(void)
3881  {
3882  	ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3883  					 ext4_li_info, "ext4lazyinit");
3884  	if (IS_ERR(ext4_lazyinit_task)) {
3885  		int err = PTR_ERR(ext4_lazyinit_task);
3886  		ext4_clear_request_list();
3887  		kfree(ext4_li_info);
3888  		ext4_li_info = NULL;
3889  		printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3890  				 "initialization thread\n",
3891  				 err);
3892  		return err;
3893  	}
3894  	ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3895  	return 0;
3896  }
3897  
3898  /*
3899   * Check whether it make sense to run itable init. thread or not.
3900   * If there is at least one uninitialized inode table, return
3901   * corresponding group number, else the loop goes through all
3902   * groups and return total number of groups.
3903   */
ext4_has_uninit_itable(struct super_block * sb)3904  static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3905  {
3906  	ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3907  	struct ext4_group_desc *gdp = NULL;
3908  
3909  	if (!ext4_has_group_desc_csum(sb))
3910  		return ngroups;
3911  
3912  	for (group = 0; group < ngroups; group++) {
3913  		gdp = ext4_get_group_desc(sb, group, NULL);
3914  		if (!gdp)
3915  			continue;
3916  
3917  		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3918  			break;
3919  	}
3920  
3921  	return group;
3922  }
3923  
ext4_li_info_new(void)3924  static int ext4_li_info_new(void)
3925  {
3926  	struct ext4_lazy_init *eli = NULL;
3927  
3928  	eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3929  	if (!eli)
3930  		return -ENOMEM;
3931  
3932  	INIT_LIST_HEAD(&eli->li_request_list);
3933  	mutex_init(&eli->li_list_mtx);
3934  
3935  	eli->li_state |= EXT4_LAZYINIT_QUIT;
3936  
3937  	ext4_li_info = eli;
3938  
3939  	return 0;
3940  }
3941  
ext4_li_request_new(struct super_block * sb,ext4_group_t start)3942  static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3943  					    ext4_group_t start)
3944  {
3945  	struct ext4_li_request *elr;
3946  
3947  	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3948  	if (!elr)
3949  		return NULL;
3950  
3951  	elr->lr_super = sb;
3952  	elr->lr_first_not_zeroed = start;
3953  	if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3954  		elr->lr_mode = EXT4_LI_MODE_ITABLE;
3955  		elr->lr_next_group = start;
3956  	} else {
3957  		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3958  	}
3959  
3960  	/*
3961  	 * Randomize first schedule time of the request to
3962  	 * spread the inode table initialization requests
3963  	 * better.
3964  	 */
3965  	elr->lr_next_sched = jiffies + get_random_u32_below(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3966  	return elr;
3967  }
3968  
ext4_register_li_request(struct super_block * sb,ext4_group_t first_not_zeroed)3969  int ext4_register_li_request(struct super_block *sb,
3970  			     ext4_group_t first_not_zeroed)
3971  {
3972  	struct ext4_sb_info *sbi = EXT4_SB(sb);
3973  	struct ext4_li_request *elr = NULL;
3974  	ext4_group_t ngroups = sbi->s_groups_count;
3975  	int ret = 0;
3976  
3977  	mutex_lock(&ext4_li_mtx);
3978  	if (sbi->s_li_request != NULL) {
3979  		/*
3980  		 * Reset timeout so it can be computed again, because
3981  		 * s_li_wait_mult might have changed.
3982  		 */
3983  		sbi->s_li_request->lr_timeout = 0;
3984  		goto out;
3985  	}
3986  
3987  	if (sb_rdonly(sb) ||
3988  	    (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
3989  	     (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
3990  		goto out;
3991  
3992  	elr = ext4_li_request_new(sb, first_not_zeroed);
3993  	if (!elr) {
3994  		ret = -ENOMEM;
3995  		goto out;
3996  	}
3997  
3998  	if (NULL == ext4_li_info) {
3999  		ret = ext4_li_info_new();
4000  		if (ret)
4001  			goto out;
4002  	}
4003  
4004  	mutex_lock(&ext4_li_info->li_list_mtx);
4005  	list_add(&elr->lr_request, &ext4_li_info->li_request_list);
4006  	mutex_unlock(&ext4_li_info->li_list_mtx);
4007  
4008  	sbi->s_li_request = elr;
4009  	/*
4010  	 * set elr to NULL here since it has been inserted to
4011  	 * the request_list and the removal and free of it is
4012  	 * handled by ext4_clear_request_list from now on.
4013  	 */
4014  	elr = NULL;
4015  
4016  	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
4017  		ret = ext4_run_lazyinit_thread();
4018  		if (ret)
4019  			goto out;
4020  	}
4021  out:
4022  	mutex_unlock(&ext4_li_mtx);
4023  	if (ret)
4024  		kfree(elr);
4025  	return ret;
4026  }
4027  
4028  /*
4029   * We do not need to lock anything since this is called on
4030   * module unload.
4031   */
ext4_destroy_lazyinit_thread(void)4032  static void ext4_destroy_lazyinit_thread(void)
4033  {
4034  	/*
4035  	 * If thread exited earlier
4036  	 * there's nothing to be done.
4037  	 */
4038  	if (!ext4_li_info || !ext4_lazyinit_task)
4039  		return;
4040  
4041  	kthread_stop(ext4_lazyinit_task);
4042  }
4043  
set_journal_csum_feature_set(struct super_block * sb)4044  static int set_journal_csum_feature_set(struct super_block *sb)
4045  {
4046  	int ret = 1;
4047  	int compat, incompat;
4048  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4049  
4050  	if (ext4_has_metadata_csum(sb)) {
4051  		/* journal checksum v3 */
4052  		compat = 0;
4053  		incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
4054  	} else {
4055  		/* journal checksum v1 */
4056  		compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4057  		incompat = 0;
4058  	}
4059  
4060  	jbd2_journal_clear_features(sbi->s_journal,
4061  			JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4062  			JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4063  			JBD2_FEATURE_INCOMPAT_CSUM_V2);
4064  	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4065  		ret = jbd2_journal_set_features(sbi->s_journal,
4066  				compat, 0,
4067  				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4068  				incompat);
4069  	} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4070  		ret = jbd2_journal_set_features(sbi->s_journal,
4071  				compat, 0,
4072  				incompat);
4073  		jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4074  				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4075  	} else {
4076  		jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4077  				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4078  	}
4079  
4080  	return ret;
4081  }
4082  
4083  /*
4084   * Note: calculating the overhead so we can be compatible with
4085   * historical BSD practice is quite difficult in the face of
4086   * clusters/bigalloc.  This is because multiple metadata blocks from
4087   * different block group can end up in the same allocation cluster.
4088   * Calculating the exact overhead in the face of clustered allocation
4089   * requires either O(all block bitmaps) in memory or O(number of block
4090   * groups**2) in time.  We will still calculate the superblock for
4091   * older file systems --- and if we come across with a bigalloc file
4092   * system with zero in s_overhead_clusters the estimate will be close to
4093   * correct especially for very large cluster sizes --- but for newer
4094   * file systems, it's better to calculate this figure once at mkfs
4095   * time, and store it in the superblock.  If the superblock value is
4096   * present (even for non-bigalloc file systems), we will use it.
4097   */
count_overhead(struct super_block * sb,ext4_group_t grp,char * buf)4098  static int count_overhead(struct super_block *sb, ext4_group_t grp,
4099  			  char *buf)
4100  {
4101  	struct ext4_sb_info	*sbi = EXT4_SB(sb);
4102  	struct ext4_group_desc	*gdp;
4103  	ext4_fsblk_t		first_block, last_block, b;
4104  	ext4_group_t		i, ngroups = ext4_get_groups_count(sb);
4105  	int			s, j, count = 0;
4106  	int			has_super = ext4_bg_has_super(sb, grp);
4107  
4108  	if (!ext4_has_feature_bigalloc(sb))
4109  		return (has_super + ext4_bg_num_gdb(sb, grp) +
4110  			(has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
4111  			sbi->s_itb_per_group + 2);
4112  
4113  	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4114  		(grp * EXT4_BLOCKS_PER_GROUP(sb));
4115  	last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4116  	for (i = 0; i < ngroups; i++) {
4117  		gdp = ext4_get_group_desc(sb, i, NULL);
4118  		b = ext4_block_bitmap(sb, gdp);
4119  		if (b >= first_block && b <= last_block) {
4120  			ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4121  			count++;
4122  		}
4123  		b = ext4_inode_bitmap(sb, gdp);
4124  		if (b >= first_block && b <= last_block) {
4125  			ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4126  			count++;
4127  		}
4128  		b = ext4_inode_table(sb, gdp);
4129  		if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4130  			for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4131  				int c = EXT4_B2C(sbi, b - first_block);
4132  				ext4_set_bit(c, buf);
4133  				count++;
4134  			}
4135  		if (i != grp)
4136  			continue;
4137  		s = 0;
4138  		if (ext4_bg_has_super(sb, grp)) {
4139  			ext4_set_bit(s++, buf);
4140  			count++;
4141  		}
4142  		j = ext4_bg_num_gdb(sb, grp);
4143  		if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4144  			ext4_error(sb, "Invalid number of block group "
4145  				   "descriptor blocks: %d", j);
4146  			j = EXT4_BLOCKS_PER_GROUP(sb) - s;
4147  		}
4148  		count += j;
4149  		for (; j > 0; j--)
4150  			ext4_set_bit(EXT4_B2C(sbi, s++), buf);
4151  	}
4152  	if (!count)
4153  		return 0;
4154  	return EXT4_CLUSTERS_PER_GROUP(sb) -
4155  		ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4156  }
4157  
4158  /*
4159   * Compute the overhead and stash it in sbi->s_overhead
4160   */
ext4_calculate_overhead(struct super_block * sb)4161  int ext4_calculate_overhead(struct super_block *sb)
4162  {
4163  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4164  	struct ext4_super_block *es = sbi->s_es;
4165  	struct inode *j_inode;
4166  	unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
4167  	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4168  	ext4_fsblk_t overhead = 0;
4169  	char *buf = (char *) get_zeroed_page(GFP_NOFS);
4170  
4171  	if (!buf)
4172  		return -ENOMEM;
4173  
4174  	/*
4175  	 * Compute the overhead (FS structures).  This is constant
4176  	 * for a given filesystem unless the number of block groups
4177  	 * changes so we cache the previous value until it does.
4178  	 */
4179  
4180  	/*
4181  	 * All of the blocks before first_data_block are overhead
4182  	 */
4183  	overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4184  
4185  	/*
4186  	 * Add the overhead found in each block group
4187  	 */
4188  	for (i = 0; i < ngroups; i++) {
4189  		int blks;
4190  
4191  		blks = count_overhead(sb, i, buf);
4192  		overhead += blks;
4193  		if (blks)
4194  			memset(buf, 0, PAGE_SIZE);
4195  		cond_resched();
4196  	}
4197  
4198  	/*
4199  	 * Add the internal journal blocks whether the journal has been
4200  	 * loaded or not
4201  	 */
4202  	if (sbi->s_journal && !sbi->s_journal_bdev_file)
4203  		overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
4204  	else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4205  		/* j_inum for internal journal is non-zero */
4206  		j_inode = ext4_get_journal_inode(sb, j_inum);
4207  		if (!IS_ERR(j_inode)) {
4208  			j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4209  			overhead += EXT4_NUM_B2C(sbi, j_blocks);
4210  			iput(j_inode);
4211  		} else {
4212  			ext4_msg(sb, KERN_ERR, "can't get journal size");
4213  		}
4214  	}
4215  	sbi->s_overhead = overhead;
4216  	smp_wmb();
4217  	free_page((unsigned long) buf);
4218  	return 0;
4219  }
4220  
ext4_set_resv_clusters(struct super_block * sb)4221  static void ext4_set_resv_clusters(struct super_block *sb)
4222  {
4223  	ext4_fsblk_t resv_clusters;
4224  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4225  
4226  	/*
4227  	 * There's no need to reserve anything when we aren't using extents.
4228  	 * The space estimates are exact, there are no unwritten extents,
4229  	 * hole punching doesn't need new metadata... This is needed especially
4230  	 * to keep ext2/3 backward compatibility.
4231  	 */
4232  	if (!ext4_has_feature_extents(sb))
4233  		return;
4234  	/*
4235  	 * By default we reserve 2% or 4096 clusters, whichever is smaller.
4236  	 * This should cover the situations where we can not afford to run
4237  	 * out of space like for example punch hole, or converting
4238  	 * unwritten extents in delalloc path. In most cases such
4239  	 * allocation would require 1, or 2 blocks, higher numbers are
4240  	 * very rare.
4241  	 */
4242  	resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4243  			 sbi->s_cluster_bits);
4244  
4245  	do_div(resv_clusters, 50);
4246  	resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4247  
4248  	atomic64_set(&sbi->s_resv_clusters, resv_clusters);
4249  }
4250  
ext4_quota_mode(struct super_block * sb)4251  static const char *ext4_quota_mode(struct super_block *sb)
4252  {
4253  #ifdef CONFIG_QUOTA
4254  	if (!ext4_quota_capable(sb))
4255  		return "none";
4256  
4257  	if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4258  		return "journalled";
4259  	else
4260  		return "writeback";
4261  #else
4262  	return "disabled";
4263  #endif
4264  }
4265  
ext4_setup_csum_trigger(struct super_block * sb,enum ext4_journal_trigger_type type,void (* trigger)(struct jbd2_buffer_trigger_type * type,struct buffer_head * bh,void * mapped_data,size_t size))4266  static void ext4_setup_csum_trigger(struct super_block *sb,
4267  				    enum ext4_journal_trigger_type type,
4268  				    void (*trigger)(
4269  					struct jbd2_buffer_trigger_type *type,
4270  					struct buffer_head *bh,
4271  					void *mapped_data,
4272  					size_t size))
4273  {
4274  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4275  
4276  	sbi->s_journal_triggers[type].sb = sb;
4277  	sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4278  }
4279  
ext4_free_sbi(struct ext4_sb_info * sbi)4280  static void ext4_free_sbi(struct ext4_sb_info *sbi)
4281  {
4282  	if (!sbi)
4283  		return;
4284  
4285  	kfree(sbi->s_blockgroup_lock);
4286  	fs_put_dax(sbi->s_daxdev, NULL);
4287  	kfree(sbi);
4288  }
4289  
ext4_alloc_sbi(struct super_block * sb)4290  static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
4291  {
4292  	struct ext4_sb_info *sbi;
4293  
4294  	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
4295  	if (!sbi)
4296  		return NULL;
4297  
4298  	sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
4299  					   NULL, NULL);
4300  
4301  	sbi->s_blockgroup_lock =
4302  		kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
4303  
4304  	if (!sbi->s_blockgroup_lock)
4305  		goto err_out;
4306  
4307  	sb->s_fs_info = sbi;
4308  	sbi->s_sb = sb;
4309  	return sbi;
4310  err_out:
4311  	fs_put_dax(sbi->s_daxdev, NULL);
4312  	kfree(sbi);
4313  	return NULL;
4314  }
4315  
ext4_set_def_opts(struct super_block * sb,struct ext4_super_block * es)4316  static void ext4_set_def_opts(struct super_block *sb,
4317  			      struct ext4_super_block *es)
4318  {
4319  	unsigned long def_mount_opts;
4320  
4321  	/* Set defaults before we parse the mount options */
4322  	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
4323  	set_opt(sb, INIT_INODE_TABLE);
4324  	if (def_mount_opts & EXT4_DEFM_DEBUG)
4325  		set_opt(sb, DEBUG);
4326  	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
4327  		set_opt(sb, GRPID);
4328  	if (def_mount_opts & EXT4_DEFM_UID16)
4329  		set_opt(sb, NO_UID32);
4330  	/* xattr user namespace & acls are now defaulted on */
4331  	set_opt(sb, XATTR_USER);
4332  #ifdef CONFIG_EXT4_FS_POSIX_ACL
4333  	set_opt(sb, POSIX_ACL);
4334  #endif
4335  	if (ext4_has_feature_fast_commit(sb))
4336  		set_opt2(sb, JOURNAL_FAST_COMMIT);
4337  	/* don't forget to enable journal_csum when metadata_csum is enabled. */
4338  	if (ext4_has_metadata_csum(sb))
4339  		set_opt(sb, JOURNAL_CHECKSUM);
4340  
4341  	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
4342  		set_opt(sb, JOURNAL_DATA);
4343  	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
4344  		set_opt(sb, ORDERED_DATA);
4345  	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
4346  		set_opt(sb, WRITEBACK_DATA);
4347  
4348  	if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
4349  		set_opt(sb, ERRORS_PANIC);
4350  	else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
4351  		set_opt(sb, ERRORS_CONT);
4352  	else
4353  		set_opt(sb, ERRORS_RO);
4354  	/* block_validity enabled by default; disable with noblock_validity */
4355  	set_opt(sb, BLOCK_VALIDITY);
4356  	if (def_mount_opts & EXT4_DEFM_DISCARD)
4357  		set_opt(sb, DISCARD);
4358  
4359  	if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
4360  		set_opt(sb, BARRIER);
4361  
4362  	/*
4363  	 * enable delayed allocation by default
4364  	 * Use -o nodelalloc to turn it off
4365  	 */
4366  	if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
4367  	    ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
4368  		set_opt(sb, DELALLOC);
4369  
4370  	if (sb->s_blocksize <= PAGE_SIZE)
4371  		set_opt(sb, DIOREAD_NOLOCK);
4372  }
4373  
ext4_handle_clustersize(struct super_block * sb)4374  static int ext4_handle_clustersize(struct super_block *sb)
4375  {
4376  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4377  	struct ext4_super_block *es = sbi->s_es;
4378  	int clustersize;
4379  
4380  	/* Handle clustersize */
4381  	clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4382  	if (ext4_has_feature_bigalloc(sb)) {
4383  		if (clustersize < sb->s_blocksize) {
4384  			ext4_msg(sb, KERN_ERR,
4385  				 "cluster size (%d) smaller than "
4386  				 "block size (%lu)", clustersize, sb->s_blocksize);
4387  			return -EINVAL;
4388  		}
4389  		sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4390  			le32_to_cpu(es->s_log_block_size);
4391  	} else {
4392  		if (clustersize != sb->s_blocksize) {
4393  			ext4_msg(sb, KERN_ERR,
4394  				 "fragment/cluster size (%d) != "
4395  				 "block size (%lu)", clustersize, sb->s_blocksize);
4396  			return -EINVAL;
4397  		}
4398  		if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
4399  			ext4_msg(sb, KERN_ERR,
4400  				 "#blocks per group too big: %lu",
4401  				 sbi->s_blocks_per_group);
4402  			return -EINVAL;
4403  		}
4404  		sbi->s_cluster_bits = 0;
4405  	}
4406  	sbi->s_clusters_per_group = le32_to_cpu(es->s_clusters_per_group);
4407  	if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
4408  		ext4_msg(sb, KERN_ERR, "#clusters per group too big: %lu",
4409  			 sbi->s_clusters_per_group);
4410  		return -EINVAL;
4411  	}
4412  	if (sbi->s_blocks_per_group !=
4413  	    (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
4414  		ext4_msg(sb, KERN_ERR,
4415  			 "blocks per group (%lu) and clusters per group (%lu) inconsistent",
4416  			 sbi->s_blocks_per_group, sbi->s_clusters_per_group);
4417  		return -EINVAL;
4418  	}
4419  	sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
4420  
4421  	/* Do we have standard group size of clustersize * 8 blocks ? */
4422  	if (sbi->s_blocks_per_group == clustersize << 3)
4423  		set_opt2(sb, STD_GROUP_SIZE);
4424  
4425  	return 0;
4426  }
4427  
ext4_fast_commit_init(struct super_block * sb)4428  static void ext4_fast_commit_init(struct super_block *sb)
4429  {
4430  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4431  
4432  	/* Initialize fast commit stuff */
4433  	atomic_set(&sbi->s_fc_subtid, 0);
4434  	INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4435  	INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4436  	INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4437  	INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4438  	sbi->s_fc_bytes = 0;
4439  	ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4440  	sbi->s_fc_ineligible_tid = 0;
4441  	spin_lock_init(&sbi->s_fc_lock);
4442  	memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
4443  	sbi->s_fc_replay_state.fc_regions = NULL;
4444  	sbi->s_fc_replay_state.fc_regions_size = 0;
4445  	sbi->s_fc_replay_state.fc_regions_used = 0;
4446  	sbi->s_fc_replay_state.fc_regions_valid = 0;
4447  	sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4448  	sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4449  	sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
4450  }
4451  
ext4_inode_info_init(struct super_block * sb,struct ext4_super_block * es)4452  static int ext4_inode_info_init(struct super_block *sb,
4453  				struct ext4_super_block *es)
4454  {
4455  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4456  
4457  	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4458  		sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4459  		sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4460  	} else {
4461  		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4462  		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4463  		if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4464  			ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4465  				 sbi->s_first_ino);
4466  			return -EINVAL;
4467  		}
4468  		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4469  		    (!is_power_of_2(sbi->s_inode_size)) ||
4470  		    (sbi->s_inode_size > sb->s_blocksize)) {
4471  			ext4_msg(sb, KERN_ERR,
4472  			       "unsupported inode size: %d",
4473  			       sbi->s_inode_size);
4474  			ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
4475  			return -EINVAL;
4476  		}
4477  		/*
4478  		 * i_atime_extra is the last extra field available for
4479  		 * [acm]times in struct ext4_inode. Checking for that
4480  		 * field should suffice to ensure we have extra space
4481  		 * for all three.
4482  		 */
4483  		if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4484  			sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4485  			sb->s_time_gran = 1;
4486  			sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4487  		} else {
4488  			sb->s_time_gran = NSEC_PER_SEC;
4489  			sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4490  		}
4491  		sb->s_time_min = EXT4_TIMESTAMP_MIN;
4492  	}
4493  
4494  	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4495  		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4496  			EXT4_GOOD_OLD_INODE_SIZE;
4497  		if (ext4_has_feature_extra_isize(sb)) {
4498  			unsigned v, max = (sbi->s_inode_size -
4499  					   EXT4_GOOD_OLD_INODE_SIZE);
4500  
4501  			v = le16_to_cpu(es->s_want_extra_isize);
4502  			if (v > max) {
4503  				ext4_msg(sb, KERN_ERR,
4504  					 "bad s_want_extra_isize: %d", v);
4505  				return -EINVAL;
4506  			}
4507  			if (sbi->s_want_extra_isize < v)
4508  				sbi->s_want_extra_isize = v;
4509  
4510  			v = le16_to_cpu(es->s_min_extra_isize);
4511  			if (v > max) {
4512  				ext4_msg(sb, KERN_ERR,
4513  					 "bad s_min_extra_isize: %d", v);
4514  				return -EINVAL;
4515  			}
4516  			if (sbi->s_want_extra_isize < v)
4517  				sbi->s_want_extra_isize = v;
4518  		}
4519  	}
4520  
4521  	return 0;
4522  }
4523  
4524  #if IS_ENABLED(CONFIG_UNICODE)
ext4_encoding_init(struct super_block * sb,struct ext4_super_block * es)4525  static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4526  {
4527  	const struct ext4_sb_encodings *encoding_info;
4528  	struct unicode_map *encoding;
4529  	__u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
4530  
4531  	if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
4532  		return 0;
4533  
4534  	encoding_info = ext4_sb_read_encoding(es);
4535  	if (!encoding_info) {
4536  		ext4_msg(sb, KERN_ERR,
4537  			"Encoding requested by superblock is unknown");
4538  		return -EINVAL;
4539  	}
4540  
4541  	encoding = utf8_load(encoding_info->version);
4542  	if (IS_ERR(encoding)) {
4543  		ext4_msg(sb, KERN_ERR,
4544  			"can't mount with superblock charset: %s-%u.%u.%u "
4545  			"not supported by the kernel. flags: 0x%x.",
4546  			encoding_info->name,
4547  			unicode_major(encoding_info->version),
4548  			unicode_minor(encoding_info->version),
4549  			unicode_rev(encoding_info->version),
4550  			encoding_flags);
4551  		return -EINVAL;
4552  	}
4553  	ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4554  		"%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4555  		unicode_major(encoding_info->version),
4556  		unicode_minor(encoding_info->version),
4557  		unicode_rev(encoding_info->version),
4558  		encoding_flags);
4559  
4560  	sb->s_encoding = encoding;
4561  	sb->s_encoding_flags = encoding_flags;
4562  
4563  	return 0;
4564  }
4565  #else
ext4_encoding_init(struct super_block * sb,struct ext4_super_block * es)4566  static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4567  {
4568  	return 0;
4569  }
4570  #endif
4571  
ext4_init_metadata_csum(struct super_block * sb,struct ext4_super_block * es)4572  static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
4573  {
4574  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4575  
4576  	/* Warn if metadata_csum and gdt_csum are both set. */
4577  	if (ext4_has_feature_metadata_csum(sb) &&
4578  	    ext4_has_feature_gdt_csum(sb))
4579  		ext4_warning(sb, "metadata_csum and uninit_bg are "
4580  			     "redundant flags; please run fsck.");
4581  
4582  	/* Check for a known checksum algorithm */
4583  	if (!ext4_verify_csum_type(sb, es)) {
4584  		ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4585  			 "unknown checksum algorithm.");
4586  		return -EINVAL;
4587  	}
4588  	ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4589  				ext4_orphan_file_block_trigger);
4590  
4591  	/* Load the checksum driver */
4592  	sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
4593  	if (IS_ERR(sbi->s_chksum_driver)) {
4594  		int ret = PTR_ERR(sbi->s_chksum_driver);
4595  		ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
4596  		sbi->s_chksum_driver = NULL;
4597  		return ret;
4598  	}
4599  
4600  	/* Check superblock checksum */
4601  	if (!ext4_superblock_csum_verify(sb, es)) {
4602  		ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4603  			 "invalid superblock checksum.  Run e2fsck?");
4604  		return -EFSBADCRC;
4605  	}
4606  
4607  	/* Precompute checksum seed for all metadata */
4608  	if (ext4_has_feature_csum_seed(sb))
4609  		sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
4610  	else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
4611  		sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
4612  					       sizeof(es->s_uuid));
4613  	return 0;
4614  }
4615  
ext4_check_feature_compatibility(struct super_block * sb,struct ext4_super_block * es,int silent)4616  static int ext4_check_feature_compatibility(struct super_block *sb,
4617  					    struct ext4_super_block *es,
4618  					    int silent)
4619  {
4620  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4621  
4622  	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4623  	    (ext4_has_compat_features(sb) ||
4624  	     ext4_has_ro_compat_features(sb) ||
4625  	     ext4_has_incompat_features(sb)))
4626  		ext4_msg(sb, KERN_WARNING,
4627  		       "feature flags set on rev 0 fs, "
4628  		       "running e2fsck is recommended");
4629  
4630  	if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4631  		set_opt2(sb, HURD_COMPAT);
4632  		if (ext4_has_feature_64bit(sb)) {
4633  			ext4_msg(sb, KERN_ERR,
4634  				 "The Hurd can't support 64-bit file systems");
4635  			return -EINVAL;
4636  		}
4637  
4638  		/*
4639  		 * ea_inode feature uses l_i_version field which is not
4640  		 * available in HURD_COMPAT mode.
4641  		 */
4642  		if (ext4_has_feature_ea_inode(sb)) {
4643  			ext4_msg(sb, KERN_ERR,
4644  				 "ea_inode feature is not supported for Hurd");
4645  			return -EINVAL;
4646  		}
4647  	}
4648  
4649  	if (IS_EXT2_SB(sb)) {
4650  		if (ext2_feature_set_ok(sb))
4651  			ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4652  				 "using the ext4 subsystem");
4653  		else {
4654  			/*
4655  			 * If we're probing be silent, if this looks like
4656  			 * it's actually an ext[34] filesystem.
4657  			 */
4658  			if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4659  				return -EINVAL;
4660  			ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4661  				 "to feature incompatibilities");
4662  			return -EINVAL;
4663  		}
4664  	}
4665  
4666  	if (IS_EXT3_SB(sb)) {
4667  		if (ext3_feature_set_ok(sb))
4668  			ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4669  				 "using the ext4 subsystem");
4670  		else {
4671  			/*
4672  			 * If we're probing be silent, if this looks like
4673  			 * it's actually an ext4 filesystem.
4674  			 */
4675  			if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4676  				return -EINVAL;
4677  			ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4678  				 "to feature incompatibilities");
4679  			return -EINVAL;
4680  		}
4681  	}
4682  
4683  	/*
4684  	 * Check feature flags regardless of the revision level, since we
4685  	 * previously didn't change the revision level when setting the flags,
4686  	 * so there is a chance incompat flags are set on a rev 0 filesystem.
4687  	 */
4688  	if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4689  		return -EINVAL;
4690  
4691  	if (sbi->s_daxdev) {
4692  		if (sb->s_blocksize == PAGE_SIZE)
4693  			set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
4694  		else
4695  			ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
4696  	}
4697  
4698  	if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
4699  		if (ext4_has_feature_inline_data(sb)) {
4700  			ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
4701  					" that may contain inline data");
4702  			return -EINVAL;
4703  		}
4704  		if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
4705  			ext4_msg(sb, KERN_ERR,
4706  				"DAX unsupported by block device.");
4707  			return -EINVAL;
4708  		}
4709  	}
4710  
4711  	if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
4712  		ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
4713  			 es->s_encryption_level);
4714  		return -EINVAL;
4715  	}
4716  
4717  	return 0;
4718  }
4719  
ext4_check_geometry(struct super_block * sb,struct ext4_super_block * es)4720  static int ext4_check_geometry(struct super_block *sb,
4721  			       struct ext4_super_block *es)
4722  {
4723  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4724  	__u64 blocks_count;
4725  	int err;
4726  
4727  	if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
4728  		ext4_msg(sb, KERN_ERR,
4729  			 "Number of reserved GDT blocks insanely large: %d",
4730  			 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
4731  		return -EINVAL;
4732  	}
4733  	/*
4734  	 * Test whether we have more sectors than will fit in sector_t,
4735  	 * and whether the max offset is addressable by the page cache.
4736  	 */
4737  	err = generic_check_addressable(sb->s_blocksize_bits,
4738  					ext4_blocks_count(es));
4739  	if (err) {
4740  		ext4_msg(sb, KERN_ERR, "filesystem"
4741  			 " too large to mount safely on this system");
4742  		return err;
4743  	}
4744  
4745  	/* check blocks count against device size */
4746  	blocks_count = sb_bdev_nr_blocks(sb);
4747  	if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4748  		ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4749  		       "exceeds size of device (%llu blocks)",
4750  		       ext4_blocks_count(es), blocks_count);
4751  		return -EINVAL;
4752  	}
4753  
4754  	/*
4755  	 * It makes no sense for the first data block to be beyond the end
4756  	 * of the filesystem.
4757  	 */
4758  	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4759  		ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4760  			 "block %u is beyond end of filesystem (%llu)",
4761  			 le32_to_cpu(es->s_first_data_block),
4762  			 ext4_blocks_count(es));
4763  		return -EINVAL;
4764  	}
4765  	if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4766  	    (sbi->s_cluster_ratio == 1)) {
4767  		ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4768  			 "block is 0 with a 1k block and cluster size");
4769  		return -EINVAL;
4770  	}
4771  
4772  	blocks_count = (ext4_blocks_count(es) -
4773  			le32_to_cpu(es->s_first_data_block) +
4774  			EXT4_BLOCKS_PER_GROUP(sb) - 1);
4775  	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4776  	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4777  		ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4778  		       "(block count %llu, first data block %u, "
4779  		       "blocks per group %lu)", blocks_count,
4780  		       ext4_blocks_count(es),
4781  		       le32_to_cpu(es->s_first_data_block),
4782  		       EXT4_BLOCKS_PER_GROUP(sb));
4783  		return -EINVAL;
4784  	}
4785  	sbi->s_groups_count = blocks_count;
4786  	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4787  			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4788  	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4789  	    le32_to_cpu(es->s_inodes_count)) {
4790  		ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4791  			 le32_to_cpu(es->s_inodes_count),
4792  			 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4793  		return -EINVAL;
4794  	}
4795  
4796  	return 0;
4797  }
4798  
ext4_group_desc_init(struct super_block * sb,struct ext4_super_block * es,ext4_fsblk_t logical_sb_block,ext4_group_t * first_not_zeroed)4799  static int ext4_group_desc_init(struct super_block *sb,
4800  				struct ext4_super_block *es,
4801  				ext4_fsblk_t logical_sb_block,
4802  				ext4_group_t *first_not_zeroed)
4803  {
4804  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4805  	unsigned int db_count;
4806  	ext4_fsblk_t block;
4807  	int i;
4808  
4809  	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4810  		   EXT4_DESC_PER_BLOCK(sb);
4811  	if (ext4_has_feature_meta_bg(sb)) {
4812  		if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4813  			ext4_msg(sb, KERN_WARNING,
4814  				 "first meta block group too large: %u "
4815  				 "(group descriptor block count %u)",
4816  				 le32_to_cpu(es->s_first_meta_bg), db_count);
4817  			return -EINVAL;
4818  		}
4819  	}
4820  	rcu_assign_pointer(sbi->s_group_desc,
4821  			   kvmalloc_array(db_count,
4822  					  sizeof(struct buffer_head *),
4823  					  GFP_KERNEL));
4824  	if (sbi->s_group_desc == NULL) {
4825  		ext4_msg(sb, KERN_ERR, "not enough memory");
4826  		return -ENOMEM;
4827  	}
4828  
4829  	bgl_lock_init(sbi->s_blockgroup_lock);
4830  
4831  	/* Pre-read the descriptors into the buffer cache */
4832  	for (i = 0; i < db_count; i++) {
4833  		block = descriptor_loc(sb, logical_sb_block, i);
4834  		ext4_sb_breadahead_unmovable(sb, block);
4835  	}
4836  
4837  	for (i = 0; i < db_count; i++) {
4838  		struct buffer_head *bh;
4839  
4840  		block = descriptor_loc(sb, logical_sb_block, i);
4841  		bh = ext4_sb_bread_unmovable(sb, block);
4842  		if (IS_ERR(bh)) {
4843  			ext4_msg(sb, KERN_ERR,
4844  			       "can't read group descriptor %d", i);
4845  			sbi->s_gdb_count = i;
4846  			return PTR_ERR(bh);
4847  		}
4848  		rcu_read_lock();
4849  		rcu_dereference(sbi->s_group_desc)[i] = bh;
4850  		rcu_read_unlock();
4851  	}
4852  	sbi->s_gdb_count = db_count;
4853  	if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
4854  		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4855  		return -EFSCORRUPTED;
4856  	}
4857  
4858  	return 0;
4859  }
4860  
ext4_load_and_init_journal(struct super_block * sb,struct ext4_super_block * es,struct ext4_fs_context * ctx)4861  static int ext4_load_and_init_journal(struct super_block *sb,
4862  				      struct ext4_super_block *es,
4863  				      struct ext4_fs_context *ctx)
4864  {
4865  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4866  	int err;
4867  
4868  	err = ext4_load_journal(sb, es, ctx->journal_devnum);
4869  	if (err)
4870  		return err;
4871  
4872  	if (ext4_has_feature_64bit(sb) &&
4873  	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4874  				       JBD2_FEATURE_INCOMPAT_64BIT)) {
4875  		ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4876  		goto out;
4877  	}
4878  
4879  	if (!set_journal_csum_feature_set(sb)) {
4880  		ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4881  			 "feature set");
4882  		goto out;
4883  	}
4884  
4885  	if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4886  		!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4887  					  JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4888  		ext4_msg(sb, KERN_ERR,
4889  			"Failed to set fast commit journal feature");
4890  		goto out;
4891  	}
4892  
4893  	/* We have now updated the journal if required, so we can
4894  	 * validate the data journaling mode. */
4895  	switch (test_opt(sb, DATA_FLAGS)) {
4896  	case 0:
4897  		/* No mode set, assume a default based on the journal
4898  		 * capabilities: ORDERED_DATA if the journal can
4899  		 * cope, else JOURNAL_DATA
4900  		 */
4901  		if (jbd2_journal_check_available_features
4902  		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4903  			set_opt(sb, ORDERED_DATA);
4904  			sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4905  		} else {
4906  			set_opt(sb, JOURNAL_DATA);
4907  			sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4908  		}
4909  		break;
4910  
4911  	case EXT4_MOUNT_ORDERED_DATA:
4912  	case EXT4_MOUNT_WRITEBACK_DATA:
4913  		if (!jbd2_journal_check_available_features
4914  		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4915  			ext4_msg(sb, KERN_ERR, "Journal does not support "
4916  			       "requested data journaling mode");
4917  			goto out;
4918  		}
4919  		break;
4920  	default:
4921  		break;
4922  	}
4923  
4924  	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4925  	    test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4926  		ext4_msg(sb, KERN_ERR, "can't mount with "
4927  			"journal_async_commit in data=ordered mode");
4928  		goto out;
4929  	}
4930  
4931  	set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
4932  
4933  	sbi->s_journal->j_submit_inode_data_buffers =
4934  		ext4_journal_submit_inode_data_buffers;
4935  	sbi->s_journal->j_finish_inode_data_buffers =
4936  		ext4_journal_finish_inode_data_buffers;
4937  
4938  	return 0;
4939  
4940  out:
4941  	/* flush s_sb_upd_work before destroying the journal. */
4942  	flush_work(&sbi->s_sb_upd_work);
4943  	jbd2_journal_destroy(sbi->s_journal);
4944  	sbi->s_journal = NULL;
4945  	return -EINVAL;
4946  }
4947  
ext4_check_journal_data_mode(struct super_block * sb)4948  static int ext4_check_journal_data_mode(struct super_block *sb)
4949  {
4950  	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4951  		printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
4952  			    "data=journal disables delayed allocation, "
4953  			    "dioread_nolock, O_DIRECT and fast_commit support!\n");
4954  		/* can't mount with both data=journal and dioread_nolock. */
4955  		clear_opt(sb, DIOREAD_NOLOCK);
4956  		clear_opt2(sb, JOURNAL_FAST_COMMIT);
4957  		if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4958  			ext4_msg(sb, KERN_ERR, "can't mount with "
4959  				 "both data=journal and delalloc");
4960  			return -EINVAL;
4961  		}
4962  		if (test_opt(sb, DAX_ALWAYS)) {
4963  			ext4_msg(sb, KERN_ERR, "can't mount with "
4964  				 "both data=journal and dax");
4965  			return -EINVAL;
4966  		}
4967  		if (ext4_has_feature_encrypt(sb)) {
4968  			ext4_msg(sb, KERN_WARNING,
4969  				 "encrypted files will use data=ordered "
4970  				 "instead of data journaling mode");
4971  		}
4972  		if (test_opt(sb, DELALLOC))
4973  			clear_opt(sb, DELALLOC);
4974  	} else {
4975  		sb->s_iflags |= SB_I_CGROUPWB;
4976  	}
4977  
4978  	return 0;
4979  }
4980  
ext4_load_super(struct super_block * sb,ext4_fsblk_t * lsb,int silent)4981  static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
4982  			   int silent)
4983  {
4984  	struct ext4_sb_info *sbi = EXT4_SB(sb);
4985  	struct ext4_super_block *es;
4986  	ext4_fsblk_t logical_sb_block;
4987  	unsigned long offset = 0;
4988  	struct buffer_head *bh;
4989  	int ret = -EINVAL;
4990  	int blocksize;
4991  
4992  	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
4993  	if (!blocksize) {
4994  		ext4_msg(sb, KERN_ERR, "unable to set blocksize");
4995  		return -EINVAL;
4996  	}
4997  
4998  	/*
4999  	 * The ext4 superblock will not be buffer aligned for other than 1kB
5000  	 * block sizes.  We need to calculate the offset from buffer start.
5001  	 */
5002  	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
5003  		logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5004  		offset = do_div(logical_sb_block, blocksize);
5005  	} else {
5006  		logical_sb_block = sbi->s_sb_block;
5007  	}
5008  
5009  	bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5010  	if (IS_ERR(bh)) {
5011  		ext4_msg(sb, KERN_ERR, "unable to read superblock");
5012  		return PTR_ERR(bh);
5013  	}
5014  	/*
5015  	 * Note: s_es must be initialized as soon as possible because
5016  	 *       some ext4 macro-instructions depend on its value
5017  	 */
5018  	es = (struct ext4_super_block *) (bh->b_data + offset);
5019  	sbi->s_es = es;
5020  	sb->s_magic = le16_to_cpu(es->s_magic);
5021  	if (sb->s_magic != EXT4_SUPER_MAGIC) {
5022  		if (!silent)
5023  			ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5024  		goto out;
5025  	}
5026  
5027  	if (le32_to_cpu(es->s_log_block_size) >
5028  	    (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5029  		ext4_msg(sb, KERN_ERR,
5030  			 "Invalid log block size: %u",
5031  			 le32_to_cpu(es->s_log_block_size));
5032  		goto out;
5033  	}
5034  	if (le32_to_cpu(es->s_log_cluster_size) >
5035  	    (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
5036  		ext4_msg(sb, KERN_ERR,
5037  			 "Invalid log cluster size: %u",
5038  			 le32_to_cpu(es->s_log_cluster_size));
5039  		goto out;
5040  	}
5041  
5042  	blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
5043  
5044  	/*
5045  	 * If the default block size is not the same as the real block size,
5046  	 * we need to reload it.
5047  	 */
5048  	if (sb->s_blocksize == blocksize) {
5049  		*lsb = logical_sb_block;
5050  		sbi->s_sbh = bh;
5051  		return 0;
5052  	}
5053  
5054  	/*
5055  	 * bh must be released before kill_bdev(), otherwise
5056  	 * it won't be freed and its page also. kill_bdev()
5057  	 * is called by sb_set_blocksize().
5058  	 */
5059  	brelse(bh);
5060  	/* Validate the filesystem blocksize */
5061  	if (!sb_set_blocksize(sb, blocksize)) {
5062  		ext4_msg(sb, KERN_ERR, "bad block size %d",
5063  				blocksize);
5064  		bh = NULL;
5065  		goto out;
5066  	}
5067  
5068  	logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5069  	offset = do_div(logical_sb_block, blocksize);
5070  	bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5071  	if (IS_ERR(bh)) {
5072  		ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
5073  		ret = PTR_ERR(bh);
5074  		bh = NULL;
5075  		goto out;
5076  	}
5077  	es = (struct ext4_super_block *)(bh->b_data + offset);
5078  	sbi->s_es = es;
5079  	if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
5080  		ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
5081  		goto out;
5082  	}
5083  	*lsb = logical_sb_block;
5084  	sbi->s_sbh = bh;
5085  	return 0;
5086  out:
5087  	brelse(bh);
5088  	return ret;
5089  }
5090  
ext4_hash_info_init(struct super_block * sb)5091  static int ext4_hash_info_init(struct super_block *sb)
5092  {
5093  	struct ext4_sb_info *sbi = EXT4_SB(sb);
5094  	struct ext4_super_block *es = sbi->s_es;
5095  	unsigned int i;
5096  
5097  	sbi->s_def_hash_version = es->s_def_hash_version;
5098  
5099  	if (sbi->s_def_hash_version > DX_HASH_LAST) {
5100  		ext4_msg(sb, KERN_ERR,
5101  			 "Invalid default hash set in the superblock");
5102  		return -EINVAL;
5103  	} else if (sbi->s_def_hash_version == DX_HASH_SIPHASH) {
5104  		ext4_msg(sb, KERN_ERR,
5105  			 "SIPHASH is not a valid default hash value");
5106  		return -EINVAL;
5107  	}
5108  
5109  	for (i = 0; i < 4; i++)
5110  		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
5111  
5112  	if (ext4_has_feature_dir_index(sb)) {
5113  		i = le32_to_cpu(es->s_flags);
5114  		if (i & EXT2_FLAGS_UNSIGNED_HASH)
5115  			sbi->s_hash_unsigned = 3;
5116  		else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
5117  #ifdef __CHAR_UNSIGNED__
5118  			if (!sb_rdonly(sb))
5119  				es->s_flags |=
5120  					cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
5121  			sbi->s_hash_unsigned = 3;
5122  #else
5123  			if (!sb_rdonly(sb))
5124  				es->s_flags |=
5125  					cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
5126  #endif
5127  		}
5128  	}
5129  	return 0;
5130  }
5131  
ext4_block_group_meta_init(struct super_block * sb,int silent)5132  static int ext4_block_group_meta_init(struct super_block *sb, int silent)
5133  {
5134  	struct ext4_sb_info *sbi = EXT4_SB(sb);
5135  	struct ext4_super_block *es = sbi->s_es;
5136  	int has_huge_files;
5137  
5138  	has_huge_files = ext4_has_feature_huge_file(sb);
5139  	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
5140  						      has_huge_files);
5141  	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
5142  
5143  	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
5144  	if (ext4_has_feature_64bit(sb)) {
5145  		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
5146  		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
5147  		    !is_power_of_2(sbi->s_desc_size)) {
5148  			ext4_msg(sb, KERN_ERR,
5149  			       "unsupported descriptor size %lu",
5150  			       sbi->s_desc_size);
5151  			return -EINVAL;
5152  		}
5153  	} else
5154  		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
5155  
5156  	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
5157  	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
5158  
5159  	sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
5160  	if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
5161  		if (!silent)
5162  			ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5163  		return -EINVAL;
5164  	}
5165  	if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
5166  	    sbi->s_inodes_per_group > sb->s_blocksize * 8) {
5167  		ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
5168  			 sbi->s_inodes_per_group);
5169  		return -EINVAL;
5170  	}
5171  	sbi->s_itb_per_group = sbi->s_inodes_per_group /
5172  					sbi->s_inodes_per_block;
5173  	sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
5174  	sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
5175  	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
5176  	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
5177  
5178  	return 0;
5179  }
5180  
5181  /*
5182   * It's hard to get stripe aligned blocks if stripe is not aligned with
5183   * cluster, just disable stripe and alert user to simplify code and avoid
5184   * stripe aligned allocation which will rarely succeed.
5185   */
ext4_is_stripe_incompatible(struct super_block * sb,unsigned long stripe)5186  static bool ext4_is_stripe_incompatible(struct super_block *sb, unsigned long stripe)
5187  {
5188  	struct ext4_sb_info *sbi = EXT4_SB(sb);
5189  	return (stripe > 0 && sbi->s_cluster_ratio > 1 &&
5190  		stripe % sbi->s_cluster_ratio != 0);
5191  }
5192  
__ext4_fill_super(struct fs_context * fc,struct super_block * sb)5193  static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
5194  {
5195  	struct ext4_super_block *es = NULL;
5196  	struct ext4_sb_info *sbi = EXT4_SB(sb);
5197  	ext4_fsblk_t logical_sb_block;
5198  	struct inode *root;
5199  	int needs_recovery;
5200  	int err;
5201  	ext4_group_t first_not_zeroed;
5202  	struct ext4_fs_context *ctx = fc->fs_private;
5203  	int silent = fc->sb_flags & SB_SILENT;
5204  
5205  	/* Set defaults for the variables that will be set during parsing */
5206  	if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
5207  		ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5208  
5209  	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
5210  	sbi->s_sectors_written_start =
5211  		part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
5212  
5213  	err = ext4_load_super(sb, &logical_sb_block, silent);
5214  	if (err)
5215  		goto out_fail;
5216  
5217  	es = sbi->s_es;
5218  	sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
5219  
5220  	err = ext4_init_metadata_csum(sb, es);
5221  	if (err)
5222  		goto failed_mount;
5223  
5224  	ext4_set_def_opts(sb, es);
5225  
5226  	sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
5227  	sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
5228  	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
5229  	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
5230  	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
5231  
5232  	/*
5233  	 * set default s_li_wait_mult for lazyinit, for the case there is
5234  	 * no mount option specified.
5235  	 */
5236  	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
5237  
5238  	err = ext4_inode_info_init(sb, es);
5239  	if (err)
5240  		goto failed_mount;
5241  
5242  	err = parse_apply_sb_mount_options(sb, ctx);
5243  	if (err < 0)
5244  		goto failed_mount;
5245  
5246  	sbi->s_def_mount_opt = sbi->s_mount_opt;
5247  	sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
5248  
5249  	err = ext4_check_opt_consistency(fc, sb);
5250  	if (err < 0)
5251  		goto failed_mount;
5252  
5253  	ext4_apply_options(fc, sb);
5254  
5255  	err = ext4_encoding_init(sb, es);
5256  	if (err)
5257  		goto failed_mount;
5258  
5259  	err = ext4_check_journal_data_mode(sb);
5260  	if (err)
5261  		goto failed_mount;
5262  
5263  	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5264  		(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5265  
5266  	/* i_version is always enabled now */
5267  	sb->s_flags |= SB_I_VERSION;
5268  
5269  	err = ext4_check_feature_compatibility(sb, es, silent);
5270  	if (err)
5271  		goto failed_mount;
5272  
5273  	err = ext4_block_group_meta_init(sb, silent);
5274  	if (err)
5275  		goto failed_mount;
5276  
5277  	err = ext4_hash_info_init(sb);
5278  	if (err)
5279  		goto failed_mount;
5280  
5281  	err = ext4_handle_clustersize(sb);
5282  	if (err)
5283  		goto failed_mount;
5284  
5285  	err = ext4_check_geometry(sb, es);
5286  	if (err)
5287  		goto failed_mount;
5288  
5289  	timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
5290  	spin_lock_init(&sbi->s_error_lock);
5291  	INIT_WORK(&sbi->s_sb_upd_work, update_super_work);
5292  
5293  	err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
5294  	if (err)
5295  		goto failed_mount3;
5296  
5297  	err = ext4_es_register_shrinker(sbi);
5298  	if (err)
5299  		goto failed_mount3;
5300  
5301  	sbi->s_stripe = ext4_get_stripe_size(sbi);
5302  	if (ext4_is_stripe_incompatible(sb, sbi->s_stripe)) {
5303  		ext4_msg(sb, KERN_WARNING,
5304  			 "stripe (%lu) is not aligned with cluster size (%u), "
5305  			 "stripe is disabled",
5306  			 sbi->s_stripe, sbi->s_cluster_ratio);
5307  		sbi->s_stripe = 0;
5308  	}
5309  	sbi->s_extent_max_zeroout_kb = 32;
5310  
5311  	/*
5312  	 * set up enough so that it can read an inode
5313  	 */
5314  	sb->s_op = &ext4_sops;
5315  	sb->s_export_op = &ext4_export_ops;
5316  	sb->s_xattr = ext4_xattr_handlers;
5317  #ifdef CONFIG_FS_ENCRYPTION
5318  	sb->s_cop = &ext4_cryptops;
5319  #endif
5320  #ifdef CONFIG_FS_VERITY
5321  	sb->s_vop = &ext4_verityops;
5322  #endif
5323  #ifdef CONFIG_QUOTA
5324  	sb->dq_op = &ext4_quota_operations;
5325  	if (ext4_has_feature_quota(sb))
5326  		sb->s_qcop = &dquot_quotactl_sysfile_ops;
5327  	else
5328  		sb->s_qcop = &ext4_qctl_operations;
5329  	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
5330  #endif
5331  	super_set_uuid(sb, es->s_uuid, sizeof(es->s_uuid));
5332  	super_set_sysfs_name_bdev(sb);
5333  
5334  	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5335  	mutex_init(&sbi->s_orphan_lock);
5336  
5337  	spin_lock_init(&sbi->s_bdev_wb_lock);
5338  
5339  	ext4_fast_commit_init(sb);
5340  
5341  	sb->s_root = NULL;
5342  
5343  	needs_recovery = (es->s_last_orphan != 0 ||
5344  			  ext4_has_feature_orphan_present(sb) ||
5345  			  ext4_has_feature_journal_needs_recovery(sb));
5346  
5347  	if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
5348  		err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
5349  		if (err)
5350  			goto failed_mount3a;
5351  	}
5352  
5353  	err = -EINVAL;
5354  	/*
5355  	 * The first inode we look at is the journal inode.  Don't try
5356  	 * root first: it may be modified in the journal!
5357  	 */
5358  	if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
5359  		err = ext4_load_and_init_journal(sb, es, ctx);
5360  		if (err)
5361  			goto failed_mount3a;
5362  	} else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
5363  		   ext4_has_feature_journal_needs_recovery(sb)) {
5364  		ext4_msg(sb, KERN_ERR, "required journal recovery "
5365  		       "suppressed and not mounted read-only");
5366  		goto failed_mount3a;
5367  	} else {
5368  		/* Nojournal mode, all journal mount options are illegal */
5369  		if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5370  			ext4_msg(sb, KERN_ERR, "can't mount with "
5371  				 "journal_async_commit, fs mounted w/o journal");
5372  			goto failed_mount3a;
5373  		}
5374  
5375  		if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
5376  			ext4_msg(sb, KERN_ERR, "can't mount with "
5377  				 "journal_checksum, fs mounted w/o journal");
5378  			goto failed_mount3a;
5379  		}
5380  		if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
5381  			ext4_msg(sb, KERN_ERR, "can't mount with "
5382  				 "commit=%lu, fs mounted w/o journal",
5383  				 sbi->s_commit_interval / HZ);
5384  			goto failed_mount3a;
5385  		}
5386  		if (EXT4_MOUNT_DATA_FLAGS &
5387  		    (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
5388  			ext4_msg(sb, KERN_ERR, "can't mount with "
5389  				 "data=, fs mounted w/o journal");
5390  			goto failed_mount3a;
5391  		}
5392  		sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
5393  		clear_opt(sb, JOURNAL_CHECKSUM);
5394  		clear_opt(sb, DATA_FLAGS);
5395  		clear_opt2(sb, JOURNAL_FAST_COMMIT);
5396  		sbi->s_journal = NULL;
5397  		needs_recovery = 0;
5398  	}
5399  
5400  	if (!test_opt(sb, NO_MBCACHE)) {
5401  		sbi->s_ea_block_cache = ext4_xattr_create_cache();
5402  		if (!sbi->s_ea_block_cache) {
5403  			ext4_msg(sb, KERN_ERR,
5404  				 "Failed to create ea_block_cache");
5405  			err = -EINVAL;
5406  			goto failed_mount_wq;
5407  		}
5408  
5409  		if (ext4_has_feature_ea_inode(sb)) {
5410  			sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5411  			if (!sbi->s_ea_inode_cache) {
5412  				ext4_msg(sb, KERN_ERR,
5413  					 "Failed to create ea_inode_cache");
5414  				err = -EINVAL;
5415  				goto failed_mount_wq;
5416  			}
5417  		}
5418  	}
5419  
5420  	/*
5421  	 * Get the # of file system overhead blocks from the
5422  	 * superblock if present.
5423  	 */
5424  	sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5425  	/* ignore the precalculated value if it is ridiculous */
5426  	if (sbi->s_overhead > ext4_blocks_count(es))
5427  		sbi->s_overhead = 0;
5428  	/*
5429  	 * If the bigalloc feature is not enabled recalculating the
5430  	 * overhead doesn't take long, so we might as well just redo
5431  	 * it to make sure we are using the correct value.
5432  	 */
5433  	if (!ext4_has_feature_bigalloc(sb))
5434  		sbi->s_overhead = 0;
5435  	if (sbi->s_overhead == 0) {
5436  		err = ext4_calculate_overhead(sb);
5437  		if (err)
5438  			goto failed_mount_wq;
5439  	}
5440  
5441  	/*
5442  	 * The maximum number of concurrent works can be high and
5443  	 * concurrency isn't really necessary.  Limit it to 1.
5444  	 */
5445  	EXT4_SB(sb)->rsv_conversion_wq =
5446  		alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5447  	if (!EXT4_SB(sb)->rsv_conversion_wq) {
5448  		printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
5449  		err = -ENOMEM;
5450  		goto failed_mount4;
5451  	}
5452  
5453  	/*
5454  	 * The jbd2_journal_load will have done any necessary log recovery,
5455  	 * so we can safely mount the rest of the filesystem now.
5456  	 */
5457  
5458  	root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
5459  	if (IS_ERR(root)) {
5460  		ext4_msg(sb, KERN_ERR, "get root inode failed");
5461  		err = PTR_ERR(root);
5462  		root = NULL;
5463  		goto failed_mount4;
5464  	}
5465  	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
5466  		ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
5467  		iput(root);
5468  		err = -EFSCORRUPTED;
5469  		goto failed_mount4;
5470  	}
5471  
5472  	generic_set_sb_d_ops(sb);
5473  	sb->s_root = d_make_root(root);
5474  	if (!sb->s_root) {
5475  		ext4_msg(sb, KERN_ERR, "get root dentry failed");
5476  		err = -ENOMEM;
5477  		goto failed_mount4;
5478  	}
5479  
5480  	err = ext4_setup_super(sb, es, sb_rdonly(sb));
5481  	if (err == -EROFS) {
5482  		sb->s_flags |= SB_RDONLY;
5483  	} else if (err)
5484  		goto failed_mount4a;
5485  
5486  	ext4_set_resv_clusters(sb);
5487  
5488  	if (test_opt(sb, BLOCK_VALIDITY)) {
5489  		err = ext4_setup_system_zone(sb);
5490  		if (err) {
5491  			ext4_msg(sb, KERN_ERR, "failed to initialize system "
5492  				 "zone (%d)", err);
5493  			goto failed_mount4a;
5494  		}
5495  	}
5496  	ext4_fc_replay_cleanup(sb);
5497  
5498  	ext4_ext_init(sb);
5499  
5500  	/*
5501  	 * Enable optimize_scan if number of groups is > threshold. This can be
5502  	 * turned off by passing "mb_optimize_scan=0". This can also be
5503  	 * turned on forcefully by passing "mb_optimize_scan=1".
5504  	 */
5505  	if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
5506  		if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5507  			set_opt2(sb, MB_OPTIMIZE_SCAN);
5508  		else
5509  			clear_opt2(sb, MB_OPTIMIZE_SCAN);
5510  	}
5511  
5512  	err = ext4_mb_init(sb);
5513  	if (err) {
5514  		ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5515  			 err);
5516  		goto failed_mount5;
5517  	}
5518  
5519  	/*
5520  	 * We can only set up the journal commit callback once
5521  	 * mballoc is initialized
5522  	 */
5523  	if (sbi->s_journal)
5524  		sbi->s_journal->j_commit_callback =
5525  			ext4_journal_commit_callback;
5526  
5527  	err = ext4_percpu_param_init(sbi);
5528  	if (err)
5529  		goto failed_mount6;
5530  
5531  	if (ext4_has_feature_flex_bg(sb))
5532  		if (!ext4_fill_flex_info(sb)) {
5533  			ext4_msg(sb, KERN_ERR,
5534  			       "unable to initialize "
5535  			       "flex_bg meta info!");
5536  			err = -ENOMEM;
5537  			goto failed_mount6;
5538  		}
5539  
5540  	err = ext4_register_li_request(sb, first_not_zeroed);
5541  	if (err)
5542  		goto failed_mount6;
5543  
5544  	err = ext4_init_orphan_info(sb);
5545  	if (err)
5546  		goto failed_mount7;
5547  #ifdef CONFIG_QUOTA
5548  	/* Enable quota usage during mount. */
5549  	if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
5550  		err = ext4_enable_quotas(sb);
5551  		if (err)
5552  			goto failed_mount8;
5553  	}
5554  #endif  /* CONFIG_QUOTA */
5555  
5556  	/*
5557  	 * Save the original bdev mapping's wb_err value which could be
5558  	 * used to detect the metadata async write error.
5559  	 */
5560  	errseq_check_and_advance(&sb->s_bdev->bd_mapping->wb_err,
5561  				 &sbi->s_bdev_wb_err);
5562  	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5563  	ext4_orphan_cleanup(sb, es);
5564  	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
5565  	/*
5566  	 * Update the checksum after updating free space/inode counters and
5567  	 * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
5568  	 * checksum in the buffer cache until it is written out and
5569  	 * e2fsprogs programs trying to open a file system immediately
5570  	 * after it is mounted can fail.
5571  	 */
5572  	ext4_superblock_csum_set(sb);
5573  	if (needs_recovery) {
5574  		ext4_msg(sb, KERN_INFO, "recovery complete");
5575  		err = ext4_mark_recovery_complete(sb, es);
5576  		if (err)
5577  			goto failed_mount9;
5578  	}
5579  
5580  	if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
5581  		ext4_msg(sb, KERN_WARNING,
5582  			 "mounting with \"discard\" option, but the device does not support discard");
5583  
5584  	if (es->s_error_count)
5585  		mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
5586  
5587  	/* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5588  	ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5589  	ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5590  	ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5591  	atomic_set(&sbi->s_warning_count, 0);
5592  	atomic_set(&sbi->s_msg_count, 0);
5593  
5594  	/* Register sysfs after all initializations are complete. */
5595  	err = ext4_register_sysfs(sb);
5596  	if (err)
5597  		goto failed_mount9;
5598  
5599  	return 0;
5600  
5601  failed_mount9:
5602  	ext4_quotas_off(sb, EXT4_MAXQUOTAS);
5603  failed_mount8: __maybe_unused
5604  	ext4_release_orphan_info(sb);
5605  failed_mount7:
5606  	ext4_unregister_li_request(sb);
5607  failed_mount6:
5608  	ext4_mb_release(sb);
5609  	ext4_flex_groups_free(sbi);
5610  	ext4_percpu_param_destroy(sbi);
5611  failed_mount5:
5612  	ext4_ext_release(sb);
5613  	ext4_release_system_zone(sb);
5614  failed_mount4a:
5615  	dput(sb->s_root);
5616  	sb->s_root = NULL;
5617  failed_mount4:
5618  	ext4_msg(sb, KERN_ERR, "mount failed");
5619  	if (EXT4_SB(sb)->rsv_conversion_wq)
5620  		destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5621  failed_mount_wq:
5622  	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5623  	sbi->s_ea_inode_cache = NULL;
5624  
5625  	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5626  	sbi->s_ea_block_cache = NULL;
5627  
5628  	if (sbi->s_journal) {
5629  		/* flush s_sb_upd_work before journal destroy. */
5630  		flush_work(&sbi->s_sb_upd_work);
5631  		jbd2_journal_destroy(sbi->s_journal);
5632  		sbi->s_journal = NULL;
5633  	}
5634  failed_mount3a:
5635  	ext4_es_unregister_shrinker(sbi);
5636  failed_mount3:
5637  	/* flush s_sb_upd_work before sbi destroy */
5638  	flush_work(&sbi->s_sb_upd_work);
5639  	ext4_stop_mmpd(sbi);
5640  	del_timer_sync(&sbi->s_err_report);
5641  	ext4_group_desc_free(sbi);
5642  failed_mount:
5643  	if (sbi->s_chksum_driver)
5644  		crypto_free_shash(sbi->s_chksum_driver);
5645  
5646  #if IS_ENABLED(CONFIG_UNICODE)
5647  	utf8_unload(sb->s_encoding);
5648  #endif
5649  
5650  #ifdef CONFIG_QUOTA
5651  	for (unsigned int i = 0; i < EXT4_MAXQUOTAS; i++)
5652  		kfree(get_qf_name(sb, sbi, i));
5653  #endif
5654  	fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5655  	brelse(sbi->s_sbh);
5656  	if (sbi->s_journal_bdev_file) {
5657  		invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
5658  		bdev_fput(sbi->s_journal_bdev_file);
5659  	}
5660  out_fail:
5661  	invalidate_bdev(sb->s_bdev);
5662  	sb->s_fs_info = NULL;
5663  	return err;
5664  }
5665  
ext4_fill_super(struct super_block * sb,struct fs_context * fc)5666  static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
5667  {
5668  	struct ext4_fs_context *ctx = fc->fs_private;
5669  	struct ext4_sb_info *sbi;
5670  	const char *descr;
5671  	int ret;
5672  
5673  	sbi = ext4_alloc_sbi(sb);
5674  	if (!sbi)
5675  		return -ENOMEM;
5676  
5677  	fc->s_fs_info = sbi;
5678  
5679  	/* Cleanup superblock name */
5680  	strreplace(sb->s_id, '/', '!');
5681  
5682  	sbi->s_sb_block = 1;	/* Default super block location */
5683  	if (ctx->spec & EXT4_SPEC_s_sb_block)
5684  		sbi->s_sb_block = ctx->s_sb_block;
5685  
5686  	ret = __ext4_fill_super(fc, sb);
5687  	if (ret < 0)
5688  		goto free_sbi;
5689  
5690  	if (sbi->s_journal) {
5691  		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5692  			descr = " journalled data mode";
5693  		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5694  			descr = " ordered data mode";
5695  		else
5696  			descr = " writeback data mode";
5697  	} else
5698  		descr = "out journal";
5699  
5700  	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5701  		ext4_msg(sb, KERN_INFO, "mounted filesystem %pU %s with%s. "
5702  			 "Quota mode: %s.", &sb->s_uuid,
5703  			 sb_rdonly(sb) ? "ro" : "r/w", descr,
5704  			 ext4_quota_mode(sb));
5705  
5706  	/* Update the s_overhead_clusters if necessary */
5707  	ext4_update_overhead(sb, false);
5708  	return 0;
5709  
5710  free_sbi:
5711  	ext4_free_sbi(sbi);
5712  	fc->s_fs_info = NULL;
5713  	return ret;
5714  }
5715  
ext4_get_tree(struct fs_context * fc)5716  static int ext4_get_tree(struct fs_context *fc)
5717  {
5718  	return get_tree_bdev(fc, ext4_fill_super);
5719  }
5720  
5721  /*
5722   * Setup any per-fs journal parameters now.  We'll do this both on
5723   * initial mount, once the journal has been initialised but before we've
5724   * done any recovery; and again on any subsequent remount.
5725   */
ext4_init_journal_params(struct super_block * sb,journal_t * journal)5726  static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
5727  {
5728  	struct ext4_sb_info *sbi = EXT4_SB(sb);
5729  
5730  	journal->j_commit_interval = sbi->s_commit_interval;
5731  	journal->j_min_batch_time = sbi->s_min_batch_time;
5732  	journal->j_max_batch_time = sbi->s_max_batch_time;
5733  	ext4_fc_init(sb, journal);
5734  
5735  	write_lock(&journal->j_state_lock);
5736  	if (test_opt(sb, BARRIER))
5737  		journal->j_flags |= JBD2_BARRIER;
5738  	else
5739  		journal->j_flags &= ~JBD2_BARRIER;
5740  	if (test_opt(sb, DATA_ERR_ABORT))
5741  		journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5742  	else
5743  		journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
5744  	/*
5745  	 * Always enable journal cycle record option, letting the journal
5746  	 * records log transactions continuously between each mount.
5747  	 */
5748  	journal->j_flags |= JBD2_CYCLE_RECORD;
5749  	write_unlock(&journal->j_state_lock);
5750  }
5751  
ext4_get_journal_inode(struct super_block * sb,unsigned int journal_inum)5752  static struct inode *ext4_get_journal_inode(struct super_block *sb,
5753  					     unsigned int journal_inum)
5754  {
5755  	struct inode *journal_inode;
5756  
5757  	/*
5758  	 * Test for the existence of a valid inode on disk.  Bad things
5759  	 * happen if we iget() an unused inode, as the subsequent iput()
5760  	 * will try to delete it.
5761  	 */
5762  	journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
5763  	if (IS_ERR(journal_inode)) {
5764  		ext4_msg(sb, KERN_ERR, "no journal found");
5765  		return ERR_CAST(journal_inode);
5766  	}
5767  	if (!journal_inode->i_nlink) {
5768  		make_bad_inode(journal_inode);
5769  		iput(journal_inode);
5770  		ext4_msg(sb, KERN_ERR, "journal inode is deleted");
5771  		return ERR_PTR(-EFSCORRUPTED);
5772  	}
5773  	if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
5774  		ext4_msg(sb, KERN_ERR, "invalid journal inode");
5775  		iput(journal_inode);
5776  		return ERR_PTR(-EFSCORRUPTED);
5777  	}
5778  
5779  	ext4_debug("Journal inode found at %p: %lld bytes\n",
5780  		  journal_inode, journal_inode->i_size);
5781  	return journal_inode;
5782  }
5783  
ext4_journal_bmap(journal_t * journal,sector_t * block)5784  static int ext4_journal_bmap(journal_t *journal, sector_t *block)
5785  {
5786  	struct ext4_map_blocks map;
5787  	int ret;
5788  
5789  	if (journal->j_inode == NULL)
5790  		return 0;
5791  
5792  	map.m_lblk = *block;
5793  	map.m_len = 1;
5794  	ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
5795  	if (ret <= 0) {
5796  		ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
5797  			 "journal bmap failed: block %llu ret %d\n",
5798  			 *block, ret);
5799  		jbd2_journal_abort(journal, ret ? ret : -EIO);
5800  		return ret;
5801  	}
5802  	*block = map.m_pblk;
5803  	return 0;
5804  }
5805  
ext4_open_inode_journal(struct super_block * sb,unsigned int journal_inum)5806  static journal_t *ext4_open_inode_journal(struct super_block *sb,
5807  					  unsigned int journal_inum)
5808  {
5809  	struct inode *journal_inode;
5810  	journal_t *journal;
5811  
5812  	journal_inode = ext4_get_journal_inode(sb, journal_inum);
5813  	if (IS_ERR(journal_inode))
5814  		return ERR_CAST(journal_inode);
5815  
5816  	journal = jbd2_journal_init_inode(journal_inode);
5817  	if (IS_ERR(journal)) {
5818  		ext4_msg(sb, KERN_ERR, "Could not load journal inode");
5819  		iput(journal_inode);
5820  		return ERR_CAST(journal);
5821  	}
5822  	journal->j_private = sb;
5823  	journal->j_bmap = ext4_journal_bmap;
5824  	ext4_init_journal_params(sb, journal);
5825  	return journal;
5826  }
5827  
ext4_get_journal_blkdev(struct super_block * sb,dev_t j_dev,ext4_fsblk_t * j_start,ext4_fsblk_t * j_len)5828  static struct file *ext4_get_journal_blkdev(struct super_block *sb,
5829  					dev_t j_dev, ext4_fsblk_t *j_start,
5830  					ext4_fsblk_t *j_len)
5831  {
5832  	struct buffer_head *bh;
5833  	struct block_device *bdev;
5834  	struct file *bdev_file;
5835  	int hblock, blocksize;
5836  	ext4_fsblk_t sb_block;
5837  	unsigned long offset;
5838  	struct ext4_super_block *es;
5839  	int errno;
5840  
5841  	bdev_file = bdev_file_open_by_dev(j_dev,
5842  		BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
5843  		sb, &fs_holder_ops);
5844  	if (IS_ERR(bdev_file)) {
5845  		ext4_msg(sb, KERN_ERR,
5846  			 "failed to open journal device unknown-block(%u,%u) %ld",
5847  			 MAJOR(j_dev), MINOR(j_dev), PTR_ERR(bdev_file));
5848  		return bdev_file;
5849  	}
5850  
5851  	bdev = file_bdev(bdev_file);
5852  	blocksize = sb->s_blocksize;
5853  	hblock = bdev_logical_block_size(bdev);
5854  	if (blocksize < hblock) {
5855  		ext4_msg(sb, KERN_ERR,
5856  			"blocksize too small for journal device");
5857  		errno = -EINVAL;
5858  		goto out_bdev;
5859  	}
5860  
5861  	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5862  	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5863  	set_blocksize(bdev_file, blocksize);
5864  	bh = __bread(bdev, sb_block, blocksize);
5865  	if (!bh) {
5866  		ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5867  		       "external journal");
5868  		errno = -EINVAL;
5869  		goto out_bdev;
5870  	}
5871  
5872  	es = (struct ext4_super_block *) (bh->b_data + offset);
5873  	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5874  	    !(le32_to_cpu(es->s_feature_incompat) &
5875  	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5876  		ext4_msg(sb, KERN_ERR, "external journal has bad superblock");
5877  		errno = -EFSCORRUPTED;
5878  		goto out_bh;
5879  	}
5880  
5881  	if ((le32_to_cpu(es->s_feature_ro_compat) &
5882  	     EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5883  	    es->s_checksum != ext4_superblock_csum(sb, es)) {
5884  		ext4_msg(sb, KERN_ERR, "external journal has corrupt superblock");
5885  		errno = -EFSCORRUPTED;
5886  		goto out_bh;
5887  	}
5888  
5889  	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5890  		ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5891  		errno = -EFSCORRUPTED;
5892  		goto out_bh;
5893  	}
5894  
5895  	*j_start = sb_block + 1;
5896  	*j_len = ext4_blocks_count(es);
5897  	brelse(bh);
5898  	return bdev_file;
5899  
5900  out_bh:
5901  	brelse(bh);
5902  out_bdev:
5903  	bdev_fput(bdev_file);
5904  	return ERR_PTR(errno);
5905  }
5906  
ext4_open_dev_journal(struct super_block * sb,dev_t j_dev)5907  static journal_t *ext4_open_dev_journal(struct super_block *sb,
5908  					dev_t j_dev)
5909  {
5910  	journal_t *journal;
5911  	ext4_fsblk_t j_start;
5912  	ext4_fsblk_t j_len;
5913  	struct file *bdev_file;
5914  	int errno = 0;
5915  
5916  	bdev_file = ext4_get_journal_blkdev(sb, j_dev, &j_start, &j_len);
5917  	if (IS_ERR(bdev_file))
5918  		return ERR_CAST(bdev_file);
5919  
5920  	journal = jbd2_journal_init_dev(file_bdev(bdev_file), sb->s_bdev, j_start,
5921  					j_len, sb->s_blocksize);
5922  	if (IS_ERR(journal)) {
5923  		ext4_msg(sb, KERN_ERR, "failed to create device journal");
5924  		errno = PTR_ERR(journal);
5925  		goto out_bdev;
5926  	}
5927  	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5928  		ext4_msg(sb, KERN_ERR, "External journal has more than one "
5929  					"user (unsupported) - %d",
5930  			be32_to_cpu(journal->j_superblock->s_nr_users));
5931  		errno = -EINVAL;
5932  		goto out_journal;
5933  	}
5934  	journal->j_private = sb;
5935  	EXT4_SB(sb)->s_journal_bdev_file = bdev_file;
5936  	ext4_init_journal_params(sb, journal);
5937  	return journal;
5938  
5939  out_journal:
5940  	jbd2_journal_destroy(journal);
5941  out_bdev:
5942  	bdev_fput(bdev_file);
5943  	return ERR_PTR(errno);
5944  }
5945  
ext4_load_journal(struct super_block * sb,struct ext4_super_block * es,unsigned long journal_devnum)5946  static int ext4_load_journal(struct super_block *sb,
5947  			     struct ext4_super_block *es,
5948  			     unsigned long journal_devnum)
5949  {
5950  	journal_t *journal;
5951  	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5952  	dev_t journal_dev;
5953  	int err = 0;
5954  	int really_read_only;
5955  	int journal_dev_ro;
5956  
5957  	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5958  		return -EFSCORRUPTED;
5959  
5960  	if (journal_devnum &&
5961  	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5962  		ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5963  			"numbers have changed");
5964  		journal_dev = new_decode_dev(journal_devnum);
5965  	} else
5966  		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5967  
5968  	if (journal_inum && journal_dev) {
5969  		ext4_msg(sb, KERN_ERR,
5970  			 "filesystem has both journal inode and journal device!");
5971  		return -EINVAL;
5972  	}
5973  
5974  	if (journal_inum) {
5975  		journal = ext4_open_inode_journal(sb, journal_inum);
5976  		if (IS_ERR(journal))
5977  			return PTR_ERR(journal);
5978  	} else {
5979  		journal = ext4_open_dev_journal(sb, journal_dev);
5980  		if (IS_ERR(journal))
5981  			return PTR_ERR(journal);
5982  	}
5983  
5984  	journal_dev_ro = bdev_read_only(journal->j_dev);
5985  	really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
5986  
5987  	if (journal_dev_ro && !sb_rdonly(sb)) {
5988  		ext4_msg(sb, KERN_ERR,
5989  			 "journal device read-only, try mounting with '-o ro'");
5990  		err = -EROFS;
5991  		goto err_out;
5992  	}
5993  
5994  	/*
5995  	 * Are we loading a blank journal or performing recovery after a
5996  	 * crash?  For recovery, we need to check in advance whether we
5997  	 * can get read-write access to the device.
5998  	 */
5999  	if (ext4_has_feature_journal_needs_recovery(sb)) {
6000  		if (sb_rdonly(sb)) {
6001  			ext4_msg(sb, KERN_INFO, "INFO: recovery "
6002  					"required on readonly filesystem");
6003  			if (really_read_only) {
6004  				ext4_msg(sb, KERN_ERR, "write access "
6005  					"unavailable, cannot proceed "
6006  					"(try mounting with noload)");
6007  				err = -EROFS;
6008  				goto err_out;
6009  			}
6010  			ext4_msg(sb, KERN_INFO, "write access will "
6011  			       "be enabled during recovery");
6012  		}
6013  	}
6014  
6015  	if (!(journal->j_flags & JBD2_BARRIER))
6016  		ext4_msg(sb, KERN_INFO, "barriers disabled");
6017  
6018  	if (!ext4_has_feature_journal_needs_recovery(sb))
6019  		err = jbd2_journal_wipe(journal, !really_read_only);
6020  	if (!err) {
6021  		char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
6022  		__le16 orig_state;
6023  		bool changed = false;
6024  
6025  		if (save)
6026  			memcpy(save, ((char *) es) +
6027  			       EXT4_S_ERR_START, EXT4_S_ERR_LEN);
6028  		err = jbd2_journal_load(journal);
6029  		if (save && memcmp(((char *) es) + EXT4_S_ERR_START,
6030  				   save, EXT4_S_ERR_LEN)) {
6031  			memcpy(((char *) es) + EXT4_S_ERR_START,
6032  			       save, EXT4_S_ERR_LEN);
6033  			changed = true;
6034  		}
6035  		kfree(save);
6036  		orig_state = es->s_state;
6037  		es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
6038  					   EXT4_ERROR_FS);
6039  		if (orig_state != es->s_state)
6040  			changed = true;
6041  		/* Write out restored error information to the superblock */
6042  		if (changed && !really_read_only) {
6043  			int err2;
6044  			err2 = ext4_commit_super(sb);
6045  			err = err ? : err2;
6046  		}
6047  	}
6048  
6049  	if (err) {
6050  		ext4_msg(sb, KERN_ERR, "error loading journal");
6051  		goto err_out;
6052  	}
6053  
6054  	EXT4_SB(sb)->s_journal = journal;
6055  	err = ext4_clear_journal_err(sb, es);
6056  	if (err) {
6057  		EXT4_SB(sb)->s_journal = NULL;
6058  		jbd2_journal_destroy(journal);
6059  		return err;
6060  	}
6061  
6062  	if (!really_read_only && journal_devnum &&
6063  	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
6064  		es->s_journal_dev = cpu_to_le32(journal_devnum);
6065  		ext4_commit_super(sb);
6066  	}
6067  	if (!really_read_only && journal_inum &&
6068  	    journal_inum != le32_to_cpu(es->s_journal_inum)) {
6069  		es->s_journal_inum = cpu_to_le32(journal_inum);
6070  		ext4_commit_super(sb);
6071  	}
6072  
6073  	return 0;
6074  
6075  err_out:
6076  	jbd2_journal_destroy(journal);
6077  	return err;
6078  }
6079  
6080  /* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
ext4_update_super(struct super_block * sb)6081  static void ext4_update_super(struct super_block *sb)
6082  {
6083  	struct ext4_sb_info *sbi = EXT4_SB(sb);
6084  	struct ext4_super_block *es = sbi->s_es;
6085  	struct buffer_head *sbh = sbi->s_sbh;
6086  
6087  	lock_buffer(sbh);
6088  	/*
6089  	 * If the file system is mounted read-only, don't update the
6090  	 * superblock write time.  This avoids updating the superblock
6091  	 * write time when we are mounting the root file system
6092  	 * read/only but we need to replay the journal; at that point,
6093  	 * for people who are east of GMT and who make their clock
6094  	 * tick in localtime for Windows bug-for-bug compatibility,
6095  	 * the clock is set in the future, and this will cause e2fsck
6096  	 * to complain and force a full file system check.
6097  	 */
6098  	if (!sb_rdonly(sb))
6099  		ext4_update_tstamp(es, s_wtime);
6100  	es->s_kbytes_written =
6101  		cpu_to_le64(sbi->s_kbytes_written +
6102  		    ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
6103  		      sbi->s_sectors_written_start) >> 1));
6104  	if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
6105  		ext4_free_blocks_count_set(es,
6106  			EXT4_C2B(sbi, percpu_counter_sum_positive(
6107  				&sbi->s_freeclusters_counter)));
6108  	if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
6109  		es->s_free_inodes_count =
6110  			cpu_to_le32(percpu_counter_sum_positive(
6111  				&sbi->s_freeinodes_counter));
6112  	/* Copy error information to the on-disk superblock */
6113  	spin_lock(&sbi->s_error_lock);
6114  	if (sbi->s_add_error_count > 0) {
6115  		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6116  		if (!es->s_first_error_time && !es->s_first_error_time_hi) {
6117  			__ext4_update_tstamp(&es->s_first_error_time,
6118  					     &es->s_first_error_time_hi,
6119  					     sbi->s_first_error_time);
6120  			strtomem_pad(es->s_first_error_func,
6121  				     sbi->s_first_error_func, 0);
6122  			es->s_first_error_line =
6123  				cpu_to_le32(sbi->s_first_error_line);
6124  			es->s_first_error_ino =
6125  				cpu_to_le32(sbi->s_first_error_ino);
6126  			es->s_first_error_block =
6127  				cpu_to_le64(sbi->s_first_error_block);
6128  			es->s_first_error_errcode =
6129  				ext4_errno_to_code(sbi->s_first_error_code);
6130  		}
6131  		__ext4_update_tstamp(&es->s_last_error_time,
6132  				     &es->s_last_error_time_hi,
6133  				     sbi->s_last_error_time);
6134  		strtomem_pad(es->s_last_error_func, sbi->s_last_error_func, 0);
6135  		es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
6136  		es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
6137  		es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
6138  		es->s_last_error_errcode =
6139  				ext4_errno_to_code(sbi->s_last_error_code);
6140  		/*
6141  		 * Start the daily error reporting function if it hasn't been
6142  		 * started already
6143  		 */
6144  		if (!es->s_error_count)
6145  			mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
6146  		le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
6147  		sbi->s_add_error_count = 0;
6148  	}
6149  	spin_unlock(&sbi->s_error_lock);
6150  
6151  	ext4_superblock_csum_set(sb);
6152  	unlock_buffer(sbh);
6153  }
6154  
ext4_commit_super(struct super_block * sb)6155  static int ext4_commit_super(struct super_block *sb)
6156  {
6157  	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
6158  
6159  	if (!sbh)
6160  		return -EINVAL;
6161  
6162  	ext4_update_super(sb);
6163  
6164  	lock_buffer(sbh);
6165  	/* Buffer got discarded which means block device got invalidated */
6166  	if (!buffer_mapped(sbh)) {
6167  		unlock_buffer(sbh);
6168  		return -EIO;
6169  	}
6170  
6171  	if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
6172  		/*
6173  		 * Oh, dear.  A previous attempt to write the
6174  		 * superblock failed.  This could happen because the
6175  		 * USB device was yanked out.  Or it could happen to
6176  		 * be a transient write error and maybe the block will
6177  		 * be remapped.  Nothing we can do but to retry the
6178  		 * write and hope for the best.
6179  		 */
6180  		ext4_msg(sb, KERN_ERR, "previous I/O error to "
6181  		       "superblock detected");
6182  		clear_buffer_write_io_error(sbh);
6183  		set_buffer_uptodate(sbh);
6184  	}
6185  	get_bh(sbh);
6186  	/* Clear potential dirty bit if it was journalled update */
6187  	clear_buffer_dirty(sbh);
6188  	sbh->b_end_io = end_buffer_write_sync;
6189  	submit_bh(REQ_OP_WRITE | REQ_SYNC |
6190  		  (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
6191  	wait_on_buffer(sbh);
6192  	if (buffer_write_io_error(sbh)) {
6193  		ext4_msg(sb, KERN_ERR, "I/O error while writing "
6194  		       "superblock");
6195  		clear_buffer_write_io_error(sbh);
6196  		set_buffer_uptodate(sbh);
6197  		return -EIO;
6198  	}
6199  	return 0;
6200  }
6201  
6202  /*
6203   * Have we just finished recovery?  If so, and if we are mounting (or
6204   * remounting) the filesystem readonly, then we will end up with a
6205   * consistent fs on disk.  Record that fact.
6206   */
ext4_mark_recovery_complete(struct super_block * sb,struct ext4_super_block * es)6207  static int ext4_mark_recovery_complete(struct super_block *sb,
6208  				       struct ext4_super_block *es)
6209  {
6210  	int err;
6211  	journal_t *journal = EXT4_SB(sb)->s_journal;
6212  
6213  	if (!ext4_has_feature_journal(sb)) {
6214  		if (journal != NULL) {
6215  			ext4_error(sb, "Journal got removed while the fs was "
6216  				   "mounted!");
6217  			return -EFSCORRUPTED;
6218  		}
6219  		return 0;
6220  	}
6221  	jbd2_journal_lock_updates(journal);
6222  	err = jbd2_journal_flush(journal, 0);
6223  	if (err < 0)
6224  		goto out;
6225  
6226  	if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
6227  	    ext4_has_feature_orphan_present(sb))) {
6228  		if (!ext4_orphan_file_empty(sb)) {
6229  			ext4_error(sb, "Orphan file not empty on read-only fs.");
6230  			err = -EFSCORRUPTED;
6231  			goto out;
6232  		}
6233  		ext4_clear_feature_journal_needs_recovery(sb);
6234  		ext4_clear_feature_orphan_present(sb);
6235  		ext4_commit_super(sb);
6236  	}
6237  out:
6238  	jbd2_journal_unlock_updates(journal);
6239  	return err;
6240  }
6241  
6242  /*
6243   * If we are mounting (or read-write remounting) a filesystem whose journal
6244   * has recorded an error from a previous lifetime, move that error to the
6245   * main filesystem now.
6246   */
ext4_clear_journal_err(struct super_block * sb,struct ext4_super_block * es)6247  static int ext4_clear_journal_err(struct super_block *sb,
6248  				   struct ext4_super_block *es)
6249  {
6250  	journal_t *journal;
6251  	int j_errno;
6252  	const char *errstr;
6253  
6254  	if (!ext4_has_feature_journal(sb)) {
6255  		ext4_error(sb, "Journal got removed while the fs was mounted!");
6256  		return -EFSCORRUPTED;
6257  	}
6258  
6259  	journal = EXT4_SB(sb)->s_journal;
6260  
6261  	/*
6262  	 * Now check for any error status which may have been recorded in the
6263  	 * journal by a prior ext4_error() or ext4_abort()
6264  	 */
6265  
6266  	j_errno = jbd2_journal_errno(journal);
6267  	if (j_errno) {
6268  		char nbuf[16];
6269  
6270  		errstr = ext4_decode_error(sb, j_errno, nbuf);
6271  		ext4_warning(sb, "Filesystem error recorded "
6272  			     "from previous mount: %s", errstr);
6273  
6274  		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6275  		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6276  		j_errno = ext4_commit_super(sb);
6277  		if (j_errno)
6278  			return j_errno;
6279  		ext4_warning(sb, "Marked fs in need of filesystem check.");
6280  
6281  		jbd2_journal_clear_err(journal);
6282  		jbd2_journal_update_sb_errno(journal);
6283  	}
6284  	return 0;
6285  }
6286  
6287  /*
6288   * Force the running and committing transactions to commit,
6289   * and wait on the commit.
6290   */
ext4_force_commit(struct super_block * sb)6291  int ext4_force_commit(struct super_block *sb)
6292  {
6293  	return ext4_journal_force_commit(EXT4_SB(sb)->s_journal);
6294  }
6295  
ext4_sync_fs(struct super_block * sb,int wait)6296  static int ext4_sync_fs(struct super_block *sb, int wait)
6297  {
6298  	int ret = 0;
6299  	tid_t target;
6300  	bool needs_barrier = false;
6301  	struct ext4_sb_info *sbi = EXT4_SB(sb);
6302  
6303  	if (unlikely(ext4_forced_shutdown(sb)))
6304  		return 0;
6305  
6306  	trace_ext4_sync_fs(sb, wait);
6307  	flush_workqueue(sbi->rsv_conversion_wq);
6308  	/*
6309  	 * Writeback quota in non-journalled quota case - journalled quota has
6310  	 * no dirty dquots
6311  	 */
6312  	dquot_writeback_dquots(sb, -1);
6313  	/*
6314  	 * Data writeback is possible w/o journal transaction, so barrier must
6315  	 * being sent at the end of the function. But we can skip it if
6316  	 * transaction_commit will do it for us.
6317  	 */
6318  	if (sbi->s_journal) {
6319  		target = jbd2_get_latest_transaction(sbi->s_journal);
6320  		if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6321  		    !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6322  			needs_barrier = true;
6323  
6324  		if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6325  			if (wait)
6326  				ret = jbd2_log_wait_commit(sbi->s_journal,
6327  							   target);
6328  		}
6329  	} else if (wait && test_opt(sb, BARRIER))
6330  		needs_barrier = true;
6331  	if (needs_barrier) {
6332  		int err;
6333  		err = blkdev_issue_flush(sb->s_bdev);
6334  		if (!ret)
6335  			ret = err;
6336  	}
6337  
6338  	return ret;
6339  }
6340  
6341  /*
6342   * LVM calls this function before a (read-only) snapshot is created.  This
6343   * gives us a chance to flush the journal completely and mark the fs clean.
6344   *
6345   * Note that only this function cannot bring a filesystem to be in a clean
6346   * state independently. It relies on upper layer to stop all data & metadata
6347   * modifications.
6348   */
ext4_freeze(struct super_block * sb)6349  static int ext4_freeze(struct super_block *sb)
6350  {
6351  	int error = 0;
6352  	journal_t *journal = EXT4_SB(sb)->s_journal;
6353  
6354  	if (journal) {
6355  		/* Now we set up the journal barrier. */
6356  		jbd2_journal_lock_updates(journal);
6357  
6358  		/*
6359  		 * Don't clear the needs_recovery flag if we failed to
6360  		 * flush the journal.
6361  		 */
6362  		error = jbd2_journal_flush(journal, 0);
6363  		if (error < 0)
6364  			goto out;
6365  
6366  		/* Journal blocked and flushed, clear needs_recovery flag. */
6367  		ext4_clear_feature_journal_needs_recovery(sb);
6368  		if (ext4_orphan_file_empty(sb))
6369  			ext4_clear_feature_orphan_present(sb);
6370  	}
6371  
6372  	error = ext4_commit_super(sb);
6373  out:
6374  	if (journal)
6375  		/* we rely on upper layer to stop further updates */
6376  		jbd2_journal_unlock_updates(journal);
6377  	return error;
6378  }
6379  
6380  /*
6381   * Called by LVM after the snapshot is done.  We need to reset the RECOVER
6382   * flag here, even though the filesystem is not technically dirty yet.
6383   */
ext4_unfreeze(struct super_block * sb)6384  static int ext4_unfreeze(struct super_block *sb)
6385  {
6386  	if (ext4_forced_shutdown(sb))
6387  		return 0;
6388  
6389  	if (EXT4_SB(sb)->s_journal) {
6390  		/* Reset the needs_recovery flag before the fs is unlocked. */
6391  		ext4_set_feature_journal_needs_recovery(sb);
6392  		if (ext4_has_feature_orphan_file(sb))
6393  			ext4_set_feature_orphan_present(sb);
6394  	}
6395  
6396  	ext4_commit_super(sb);
6397  	return 0;
6398  }
6399  
6400  /*
6401   * Structure to save mount options for ext4_remount's benefit
6402   */
6403  struct ext4_mount_options {
6404  	unsigned long s_mount_opt;
6405  	unsigned long s_mount_opt2;
6406  	kuid_t s_resuid;
6407  	kgid_t s_resgid;
6408  	unsigned long s_commit_interval;
6409  	u32 s_min_batch_time, s_max_batch_time;
6410  #ifdef CONFIG_QUOTA
6411  	int s_jquota_fmt;
6412  	char *s_qf_names[EXT4_MAXQUOTAS];
6413  #endif
6414  };
6415  
__ext4_remount(struct fs_context * fc,struct super_block * sb)6416  static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
6417  {
6418  	struct ext4_fs_context *ctx = fc->fs_private;
6419  	struct ext4_super_block *es;
6420  	struct ext4_sb_info *sbi = EXT4_SB(sb);
6421  	unsigned long old_sb_flags;
6422  	struct ext4_mount_options old_opts;
6423  	ext4_group_t g;
6424  	int err = 0;
6425  	int alloc_ctx;
6426  #ifdef CONFIG_QUOTA
6427  	int enable_quota = 0;
6428  	int i, j;
6429  	char *to_free[EXT4_MAXQUOTAS];
6430  #endif
6431  
6432  
6433  	/* Store the original options */
6434  	old_sb_flags = sb->s_flags;
6435  	old_opts.s_mount_opt = sbi->s_mount_opt;
6436  	old_opts.s_mount_opt2 = sbi->s_mount_opt2;
6437  	old_opts.s_resuid = sbi->s_resuid;
6438  	old_opts.s_resgid = sbi->s_resgid;
6439  	old_opts.s_commit_interval = sbi->s_commit_interval;
6440  	old_opts.s_min_batch_time = sbi->s_min_batch_time;
6441  	old_opts.s_max_batch_time = sbi->s_max_batch_time;
6442  #ifdef CONFIG_QUOTA
6443  	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
6444  	for (i = 0; i < EXT4_MAXQUOTAS; i++)
6445  		if (sbi->s_qf_names[i]) {
6446  			char *qf_name = get_qf_name(sb, sbi, i);
6447  
6448  			old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
6449  			if (!old_opts.s_qf_names[i]) {
6450  				for (j = 0; j < i; j++)
6451  					kfree(old_opts.s_qf_names[j]);
6452  				return -ENOMEM;
6453  			}
6454  		} else
6455  			old_opts.s_qf_names[i] = NULL;
6456  #endif
6457  	if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
6458  		if (sbi->s_journal && sbi->s_journal->j_task->io_context)
6459  			ctx->journal_ioprio =
6460  				sbi->s_journal->j_task->io_context->ioprio;
6461  		else
6462  			ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
6463  
6464  	}
6465  
6466  	if ((ctx->spec & EXT4_SPEC_s_stripe) &&
6467  	    ext4_is_stripe_incompatible(sb, ctx->s_stripe)) {
6468  		ext4_msg(sb, KERN_WARNING,
6469  			 "stripe (%lu) is not aligned with cluster size (%u), "
6470  			 "stripe is disabled",
6471  			 ctx->s_stripe, sbi->s_cluster_ratio);
6472  		ctx->s_stripe = 0;
6473  	}
6474  
6475  	/*
6476  	 * Changing the DIOREAD_NOLOCK or DELALLOC mount options may cause
6477  	 * two calls to ext4_should_dioread_nolock() to return inconsistent
6478  	 * values, triggering WARN_ON in ext4_add_complete_io(). we grab
6479  	 * here s_writepages_rwsem to avoid race between writepages ops and
6480  	 * remount.
6481  	 */
6482  	alloc_ctx = ext4_writepages_down_write(sb);
6483  	ext4_apply_options(fc, sb);
6484  	ext4_writepages_up_write(sb, alloc_ctx);
6485  
6486  	if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
6487  	    test_opt(sb, JOURNAL_CHECKSUM)) {
6488  		ext4_msg(sb, KERN_ERR, "changing journal_checksum "
6489  			 "during remount not supported; ignoring");
6490  		sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6491  	}
6492  
6493  	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6494  		if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6495  			ext4_msg(sb, KERN_ERR, "can't mount with "
6496  				 "both data=journal and delalloc");
6497  			err = -EINVAL;
6498  			goto restore_opts;
6499  		}
6500  		if (test_opt(sb, DIOREAD_NOLOCK)) {
6501  			ext4_msg(sb, KERN_ERR, "can't mount with "
6502  				 "both data=journal and dioread_nolock");
6503  			err = -EINVAL;
6504  			goto restore_opts;
6505  		}
6506  	} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6507  		if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6508  			ext4_msg(sb, KERN_ERR, "can't mount with "
6509  				"journal_async_commit in data=ordered mode");
6510  			err = -EINVAL;
6511  			goto restore_opts;
6512  		}
6513  	}
6514  
6515  	if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6516  		ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6517  		err = -EINVAL;
6518  		goto restore_opts;
6519  	}
6520  
6521  	if (test_opt2(sb, ABORT))
6522  		ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
6523  
6524  	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6525  		(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
6526  
6527  	es = sbi->s_es;
6528  
6529  	if (sbi->s_journal) {
6530  		ext4_init_journal_params(sb, sbi->s_journal);
6531  		set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
6532  	}
6533  
6534  	/* Flush outstanding errors before changing fs state */
6535  	flush_work(&sbi->s_sb_upd_work);
6536  
6537  	if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
6538  		if (ext4_forced_shutdown(sb)) {
6539  			err = -EROFS;
6540  			goto restore_opts;
6541  		}
6542  
6543  		if (fc->sb_flags & SB_RDONLY) {
6544  			err = sync_filesystem(sb);
6545  			if (err < 0)
6546  				goto restore_opts;
6547  			err = dquot_suspend(sb, -1);
6548  			if (err < 0)
6549  				goto restore_opts;
6550  
6551  			/*
6552  			 * First of all, the unconditional stuff we have to do
6553  			 * to disable replay of the journal when we next remount
6554  			 */
6555  			sb->s_flags |= SB_RDONLY;
6556  
6557  			/*
6558  			 * OK, test if we are remounting a valid rw partition
6559  			 * readonly, and if so set the rdonly flag and then
6560  			 * mark the partition as valid again.
6561  			 */
6562  			if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6563  			    (sbi->s_mount_state & EXT4_VALID_FS))
6564  				es->s_state = cpu_to_le16(sbi->s_mount_state);
6565  
6566  			if (sbi->s_journal) {
6567  				/*
6568  				 * We let remount-ro finish even if marking fs
6569  				 * as clean failed...
6570  				 */
6571  				ext4_mark_recovery_complete(sb, es);
6572  			}
6573  		} else {
6574  			/* Make sure we can mount this feature set readwrite */
6575  			if (ext4_has_feature_readonly(sb) ||
6576  			    !ext4_feature_set_ok(sb, 0)) {
6577  				err = -EROFS;
6578  				goto restore_opts;
6579  			}
6580  			/*
6581  			 * Make sure the group descriptor checksums
6582  			 * are sane.  If they aren't, refuse to remount r/w.
6583  			 */
6584  			for (g = 0; g < sbi->s_groups_count; g++) {
6585  				struct ext4_group_desc *gdp =
6586  					ext4_get_group_desc(sb, g, NULL);
6587  
6588  				if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
6589  					ext4_msg(sb, KERN_ERR,
6590  	       "ext4_remount: Checksum for group %u failed (%u!=%u)",
6591  		g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
6592  					       le16_to_cpu(gdp->bg_checksum));
6593  					err = -EFSBADCRC;
6594  					goto restore_opts;
6595  				}
6596  			}
6597  
6598  			/*
6599  			 * If we have an unprocessed orphan list hanging
6600  			 * around from a previously readonly bdev mount,
6601  			 * require a full umount/remount for now.
6602  			 */
6603  			if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
6604  				ext4_msg(sb, KERN_WARNING, "Couldn't "
6605  				       "remount RDWR because of unprocessed "
6606  				       "orphan inode list.  Please "
6607  				       "umount/remount instead");
6608  				err = -EINVAL;
6609  				goto restore_opts;
6610  			}
6611  
6612  			/*
6613  			 * Mounting a RDONLY partition read-write, so reread
6614  			 * and store the current valid flag.  (It may have
6615  			 * been changed by e2fsck since we originally mounted
6616  			 * the partition.)
6617  			 */
6618  			if (sbi->s_journal) {
6619  				err = ext4_clear_journal_err(sb, es);
6620  				if (err)
6621  					goto restore_opts;
6622  			}
6623  			sbi->s_mount_state = (le16_to_cpu(es->s_state) &
6624  					      ~EXT4_FC_REPLAY);
6625  
6626  			err = ext4_setup_super(sb, es, 0);
6627  			if (err)
6628  				goto restore_opts;
6629  
6630  			sb->s_flags &= ~SB_RDONLY;
6631  			if (ext4_has_feature_mmp(sb)) {
6632  				err = ext4_multi_mount_protect(sb,
6633  						le64_to_cpu(es->s_mmp_block));
6634  				if (err)
6635  					goto restore_opts;
6636  			}
6637  #ifdef CONFIG_QUOTA
6638  			enable_quota = 1;
6639  #endif
6640  		}
6641  	}
6642  
6643  	/*
6644  	 * Handle creation of system zone data early because it can fail.
6645  	 * Releasing of existing data is done when we are sure remount will
6646  	 * succeed.
6647  	 */
6648  	if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6649  		err = ext4_setup_system_zone(sb);
6650  		if (err)
6651  			goto restore_opts;
6652  	}
6653  
6654  	if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6655  		err = ext4_commit_super(sb);
6656  		if (err)
6657  			goto restore_opts;
6658  	}
6659  
6660  #ifdef CONFIG_QUOTA
6661  	if (enable_quota) {
6662  		if (sb_any_quota_suspended(sb))
6663  			dquot_resume(sb, -1);
6664  		else if (ext4_has_feature_quota(sb)) {
6665  			err = ext4_enable_quotas(sb);
6666  			if (err)
6667  				goto restore_opts;
6668  		}
6669  	}
6670  	/* Release old quota file names */
6671  	for (i = 0; i < EXT4_MAXQUOTAS; i++)
6672  		kfree(old_opts.s_qf_names[i]);
6673  #endif
6674  	if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6675  		ext4_release_system_zone(sb);
6676  
6677  	/*
6678  	 * Reinitialize lazy itable initialization thread based on
6679  	 * current settings
6680  	 */
6681  	if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
6682  		ext4_unregister_li_request(sb);
6683  	else {
6684  		ext4_group_t first_not_zeroed;
6685  		first_not_zeroed = ext4_has_uninit_itable(sb);
6686  		ext4_register_li_request(sb, first_not_zeroed);
6687  	}
6688  
6689  	if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6690  		ext4_stop_mmpd(sbi);
6691  
6692  	return 0;
6693  
6694  restore_opts:
6695  	/*
6696  	 * If there was a failing r/w to ro transition, we may need to
6697  	 * re-enable quota
6698  	 */
6699  	if (sb_rdonly(sb) && !(old_sb_flags & SB_RDONLY) &&
6700  	    sb_any_quota_suspended(sb))
6701  		dquot_resume(sb, -1);
6702  
6703  	alloc_ctx = ext4_writepages_down_write(sb);
6704  	sb->s_flags = old_sb_flags;
6705  	sbi->s_mount_opt = old_opts.s_mount_opt;
6706  	sbi->s_mount_opt2 = old_opts.s_mount_opt2;
6707  	sbi->s_resuid = old_opts.s_resuid;
6708  	sbi->s_resgid = old_opts.s_resgid;
6709  	sbi->s_commit_interval = old_opts.s_commit_interval;
6710  	sbi->s_min_batch_time = old_opts.s_min_batch_time;
6711  	sbi->s_max_batch_time = old_opts.s_max_batch_time;
6712  	ext4_writepages_up_write(sb, alloc_ctx);
6713  
6714  	if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6715  		ext4_release_system_zone(sb);
6716  #ifdef CONFIG_QUOTA
6717  	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
6718  	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
6719  		to_free[i] = get_qf_name(sb, sbi, i);
6720  		rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
6721  	}
6722  	synchronize_rcu();
6723  	for (i = 0; i < EXT4_MAXQUOTAS; i++)
6724  		kfree(to_free[i]);
6725  #endif
6726  	if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6727  		ext4_stop_mmpd(sbi);
6728  	return err;
6729  }
6730  
ext4_reconfigure(struct fs_context * fc)6731  static int ext4_reconfigure(struct fs_context *fc)
6732  {
6733  	struct super_block *sb = fc->root->d_sb;
6734  	int ret;
6735  
6736  	fc->s_fs_info = EXT4_SB(sb);
6737  
6738  	ret = ext4_check_opt_consistency(fc, sb);
6739  	if (ret < 0)
6740  		return ret;
6741  
6742  	ret = __ext4_remount(fc, sb);
6743  	if (ret < 0)
6744  		return ret;
6745  
6746  	ext4_msg(sb, KERN_INFO, "re-mounted %pU %s. Quota mode: %s.",
6747  		 &sb->s_uuid, sb_rdonly(sb) ? "ro" : "r/w",
6748  		 ext4_quota_mode(sb));
6749  
6750  	return 0;
6751  }
6752  
6753  #ifdef CONFIG_QUOTA
ext4_statfs_project(struct super_block * sb,kprojid_t projid,struct kstatfs * buf)6754  static int ext4_statfs_project(struct super_block *sb,
6755  			       kprojid_t projid, struct kstatfs *buf)
6756  {
6757  	struct kqid qid;
6758  	struct dquot *dquot;
6759  	u64 limit;
6760  	u64 curblock;
6761  
6762  	qid = make_kqid_projid(projid);
6763  	dquot = dqget(sb, qid);
6764  	if (IS_ERR(dquot))
6765  		return PTR_ERR(dquot);
6766  	spin_lock(&dquot->dq_dqb_lock);
6767  
6768  	limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6769  			     dquot->dq_dqb.dqb_bhardlimit);
6770  	limit >>= sb->s_blocksize_bits;
6771  
6772  	if (limit && buf->f_blocks > limit) {
6773  		curblock = (dquot->dq_dqb.dqb_curspace +
6774  			    dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
6775  		buf->f_blocks = limit;
6776  		buf->f_bfree = buf->f_bavail =
6777  			(buf->f_blocks > curblock) ?
6778  			 (buf->f_blocks - curblock) : 0;
6779  	}
6780  
6781  	limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6782  			     dquot->dq_dqb.dqb_ihardlimit);
6783  	if (limit && buf->f_files > limit) {
6784  		buf->f_files = limit;
6785  		buf->f_ffree =
6786  			(buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6787  			 (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6788  	}
6789  
6790  	spin_unlock(&dquot->dq_dqb_lock);
6791  	dqput(dquot);
6792  	return 0;
6793  }
6794  #endif
6795  
ext4_statfs(struct dentry * dentry,struct kstatfs * buf)6796  static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
6797  {
6798  	struct super_block *sb = dentry->d_sb;
6799  	struct ext4_sb_info *sbi = EXT4_SB(sb);
6800  	struct ext4_super_block *es = sbi->s_es;
6801  	ext4_fsblk_t overhead = 0, resv_blocks;
6802  	s64 bfree;
6803  	resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
6804  
6805  	if (!test_opt(sb, MINIX_DF))
6806  		overhead = sbi->s_overhead;
6807  
6808  	buf->f_type = EXT4_SUPER_MAGIC;
6809  	buf->f_bsize = sb->s_blocksize;
6810  	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
6811  	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6812  		percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
6813  	/* prevent underflow in case that few free space is available */
6814  	buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
6815  	buf->f_bavail = buf->f_bfree -
6816  			(ext4_r_blocks_count(es) + resv_blocks);
6817  	if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
6818  		buf->f_bavail = 0;
6819  	buf->f_files = le32_to_cpu(es->s_inodes_count);
6820  	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
6821  	buf->f_namelen = EXT4_NAME_LEN;
6822  	buf->f_fsid = uuid_to_fsid(es->s_uuid);
6823  
6824  #ifdef CONFIG_QUOTA
6825  	if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6826  	    sb_has_quota_limits_enabled(sb, PRJQUOTA))
6827  		ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6828  #endif
6829  	return 0;
6830  }
6831  
6832  
6833  #ifdef CONFIG_QUOTA
6834  
6835  /*
6836   * Helper functions so that transaction is started before we acquire dqio_sem
6837   * to keep correct lock ordering of transaction > dqio_sem
6838   */
dquot_to_inode(struct dquot * dquot)6839  static inline struct inode *dquot_to_inode(struct dquot *dquot)
6840  {
6841  	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
6842  }
6843  
ext4_write_dquot(struct dquot * dquot)6844  static int ext4_write_dquot(struct dquot *dquot)
6845  {
6846  	int ret, err;
6847  	handle_t *handle;
6848  	struct inode *inode;
6849  
6850  	inode = dquot_to_inode(dquot);
6851  	handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
6852  				    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
6853  	if (IS_ERR(handle))
6854  		return PTR_ERR(handle);
6855  	ret = dquot_commit(dquot);
6856  	if (ret < 0)
6857  		ext4_error_err(dquot->dq_sb, -ret,
6858  			       "Failed to commit dquot type %d",
6859  			       dquot->dq_id.type);
6860  	err = ext4_journal_stop(handle);
6861  	if (!ret)
6862  		ret = err;
6863  	return ret;
6864  }
6865  
ext4_acquire_dquot(struct dquot * dquot)6866  static int ext4_acquire_dquot(struct dquot *dquot)
6867  {
6868  	int ret, err;
6869  	handle_t *handle;
6870  
6871  	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6872  				    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
6873  	if (IS_ERR(handle))
6874  		return PTR_ERR(handle);
6875  	ret = dquot_acquire(dquot);
6876  	if (ret < 0)
6877  		ext4_error_err(dquot->dq_sb, -ret,
6878  			      "Failed to acquire dquot type %d",
6879  			      dquot->dq_id.type);
6880  	err = ext4_journal_stop(handle);
6881  	if (!ret)
6882  		ret = err;
6883  	return ret;
6884  }
6885  
ext4_release_dquot(struct dquot * dquot)6886  static int ext4_release_dquot(struct dquot *dquot)
6887  {
6888  	int ret, err;
6889  	handle_t *handle;
6890  
6891  	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6892  				    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
6893  	if (IS_ERR(handle)) {
6894  		/* Release dquot anyway to avoid endless cycle in dqput() */
6895  		dquot_release(dquot);
6896  		return PTR_ERR(handle);
6897  	}
6898  	ret = dquot_release(dquot);
6899  	if (ret < 0)
6900  		ext4_error_err(dquot->dq_sb, -ret,
6901  			       "Failed to release dquot type %d",
6902  			       dquot->dq_id.type);
6903  	err = ext4_journal_stop(handle);
6904  	if (!ret)
6905  		ret = err;
6906  	return ret;
6907  }
6908  
ext4_mark_dquot_dirty(struct dquot * dquot)6909  static int ext4_mark_dquot_dirty(struct dquot *dquot)
6910  {
6911  	struct super_block *sb = dquot->dq_sb;
6912  
6913  	if (ext4_is_quota_journalled(sb)) {
6914  		dquot_mark_dquot_dirty(dquot);
6915  		return ext4_write_dquot(dquot);
6916  	} else {
6917  		return dquot_mark_dquot_dirty(dquot);
6918  	}
6919  }
6920  
ext4_write_info(struct super_block * sb,int type)6921  static int ext4_write_info(struct super_block *sb, int type)
6922  {
6923  	int ret, err;
6924  	handle_t *handle;
6925  
6926  	/* Data block + inode block */
6927  	handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
6928  	if (IS_ERR(handle))
6929  		return PTR_ERR(handle);
6930  	ret = dquot_commit_info(sb, type);
6931  	err = ext4_journal_stop(handle);
6932  	if (!ret)
6933  		ret = err;
6934  	return ret;
6935  }
6936  
lockdep_set_quota_inode(struct inode * inode,int subclass)6937  static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6938  {
6939  	struct ext4_inode_info *ei = EXT4_I(inode);
6940  
6941  	/* The first argument of lockdep_set_subclass has to be
6942  	 * *exactly* the same as the argument to init_rwsem() --- in
6943  	 * this case, in init_once() --- or lockdep gets unhappy
6944  	 * because the name of the lock is set using the
6945  	 * stringification of the argument to init_rwsem().
6946  	 */
6947  	(void) ei;	/* shut up clang warning if !CONFIG_LOCKDEP */
6948  	lockdep_set_subclass(&ei->i_data_sem, subclass);
6949  }
6950  
6951  /*
6952   * Standard function to be called on quota_on
6953   */
ext4_quota_on(struct super_block * sb,int type,int format_id,const struct path * path)6954  static int ext4_quota_on(struct super_block *sb, int type, int format_id,
6955  			 const struct path *path)
6956  {
6957  	int err;
6958  
6959  	if (!test_opt(sb, QUOTA))
6960  		return -EINVAL;
6961  
6962  	/* Quotafile not on the same filesystem? */
6963  	if (path->dentry->d_sb != sb)
6964  		return -EXDEV;
6965  
6966  	/* Quota already enabled for this file? */
6967  	if (IS_NOQUOTA(d_inode(path->dentry)))
6968  		return -EBUSY;
6969  
6970  	/* Journaling quota? */
6971  	if (EXT4_SB(sb)->s_qf_names[type]) {
6972  		/* Quotafile not in fs root? */
6973  		if (path->dentry->d_parent != sb->s_root)
6974  			ext4_msg(sb, KERN_WARNING,
6975  				"Quota file not on filesystem root. "
6976  				"Journaled quota will not work");
6977  		sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6978  	} else {
6979  		/*
6980  		 * Clear the flag just in case mount options changed since
6981  		 * last time.
6982  		 */
6983  		sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
6984  	}
6985  
6986  	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6987  	err = dquot_quota_on(sb, type, format_id, path);
6988  	if (!err) {
6989  		struct inode *inode = d_inode(path->dentry);
6990  		handle_t *handle;
6991  
6992  		/*
6993  		 * Set inode flags to prevent userspace from messing with quota
6994  		 * files. If this fails, we return success anyway since quotas
6995  		 * are already enabled and this is not a hard failure.
6996  		 */
6997  		inode_lock(inode);
6998  		handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6999  		if (IS_ERR(handle))
7000  			goto unlock_inode;
7001  		EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
7002  		inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
7003  				S_NOATIME | S_IMMUTABLE);
7004  		err = ext4_mark_inode_dirty(handle, inode);
7005  		ext4_journal_stop(handle);
7006  	unlock_inode:
7007  		inode_unlock(inode);
7008  		if (err)
7009  			dquot_quota_off(sb, type);
7010  	}
7011  	if (err)
7012  		lockdep_set_quota_inode(path->dentry->d_inode,
7013  					     I_DATA_SEM_NORMAL);
7014  	return err;
7015  }
7016  
ext4_check_quota_inum(int type,unsigned long qf_inum)7017  static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
7018  {
7019  	switch (type) {
7020  	case USRQUOTA:
7021  		return qf_inum == EXT4_USR_QUOTA_INO;
7022  	case GRPQUOTA:
7023  		return qf_inum == EXT4_GRP_QUOTA_INO;
7024  	case PRJQUOTA:
7025  		return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
7026  	default:
7027  		BUG();
7028  	}
7029  }
7030  
ext4_quota_enable(struct super_block * sb,int type,int format_id,unsigned int flags)7031  static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
7032  			     unsigned int flags)
7033  {
7034  	int err;
7035  	struct inode *qf_inode;
7036  	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7037  		le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7038  		le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7039  		le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7040  	};
7041  
7042  	BUG_ON(!ext4_has_feature_quota(sb));
7043  
7044  	if (!qf_inums[type])
7045  		return -EPERM;
7046  
7047  	if (!ext4_check_quota_inum(type, qf_inums[type])) {
7048  		ext4_error(sb, "Bad quota inum: %lu, type: %d",
7049  				qf_inums[type], type);
7050  		return -EUCLEAN;
7051  	}
7052  
7053  	qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
7054  	if (IS_ERR(qf_inode)) {
7055  		ext4_error(sb, "Bad quota inode: %lu, type: %d",
7056  				qf_inums[type], type);
7057  		return PTR_ERR(qf_inode);
7058  	}
7059  
7060  	/* Don't account quota for quota files to avoid recursion */
7061  	qf_inode->i_flags |= S_NOQUOTA;
7062  	lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
7063  	err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
7064  	if (err)
7065  		lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
7066  	iput(qf_inode);
7067  
7068  	return err;
7069  }
7070  
7071  /* Enable usage tracking for all quota types. */
ext4_enable_quotas(struct super_block * sb)7072  int ext4_enable_quotas(struct super_block *sb)
7073  {
7074  	int type, err = 0;
7075  	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
7076  		le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
7077  		le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
7078  		le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
7079  	};
7080  	bool quota_mopt[EXT4_MAXQUOTAS] = {
7081  		test_opt(sb, USRQUOTA),
7082  		test_opt(sb, GRPQUOTA),
7083  		test_opt(sb, PRJQUOTA),
7084  	};
7085  
7086  	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
7087  	for (type = 0; type < EXT4_MAXQUOTAS; type++) {
7088  		if (qf_inums[type]) {
7089  			err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
7090  				DQUOT_USAGE_ENABLED |
7091  				(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
7092  			if (err) {
7093  				ext4_warning(sb,
7094  					"Failed to enable quota tracking "
7095  					"(type=%d, err=%d, ino=%lu). "
7096  					"Please run e2fsck to fix.", type,
7097  					err, qf_inums[type]);
7098  
7099  				ext4_quotas_off(sb, type);
7100  				return err;
7101  			}
7102  		}
7103  	}
7104  	return 0;
7105  }
7106  
ext4_quota_off(struct super_block * sb,int type)7107  static int ext4_quota_off(struct super_block *sb, int type)
7108  {
7109  	struct inode *inode = sb_dqopt(sb)->files[type];
7110  	handle_t *handle;
7111  	int err;
7112  
7113  	/* Force all delayed allocation blocks to be allocated.
7114  	 * Caller already holds s_umount sem */
7115  	if (test_opt(sb, DELALLOC))
7116  		sync_filesystem(sb);
7117  
7118  	if (!inode || !igrab(inode))
7119  		goto out;
7120  
7121  	err = dquot_quota_off(sb, type);
7122  	if (err || ext4_has_feature_quota(sb))
7123  		goto out_put;
7124  	/*
7125  	 * When the filesystem was remounted read-only first, we cannot cleanup
7126  	 * inode flags here. Bad luck but people should be using QUOTA feature
7127  	 * these days anyway.
7128  	 */
7129  	if (sb_rdonly(sb))
7130  		goto out_put;
7131  
7132  	inode_lock(inode);
7133  	/*
7134  	 * Update modification times of quota files when userspace can
7135  	 * start looking at them. If we fail, we return success anyway since
7136  	 * this is not a hard failure and quotas are already disabled.
7137  	 */
7138  	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
7139  	if (IS_ERR(handle)) {
7140  		err = PTR_ERR(handle);
7141  		goto out_unlock;
7142  	}
7143  	EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
7144  	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
7145  	inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
7146  	err = ext4_mark_inode_dirty(handle, inode);
7147  	ext4_journal_stop(handle);
7148  out_unlock:
7149  	inode_unlock(inode);
7150  out_put:
7151  	lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
7152  	iput(inode);
7153  	return err;
7154  out:
7155  	return dquot_quota_off(sb, type);
7156  }
7157  
7158  /* Read data from quotafile - avoid pagecache and such because we cannot afford
7159   * acquiring the locks... As quota files are never truncated and quota code
7160   * itself serializes the operations (and no one else should touch the files)
7161   * we don't have to be afraid of races */
ext4_quota_read(struct super_block * sb,int type,char * data,size_t len,loff_t off)7162  static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
7163  			       size_t len, loff_t off)
7164  {
7165  	struct inode *inode = sb_dqopt(sb)->files[type];
7166  	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7167  	int offset = off & (sb->s_blocksize - 1);
7168  	int tocopy;
7169  	size_t toread;
7170  	struct buffer_head *bh;
7171  	loff_t i_size = i_size_read(inode);
7172  
7173  	if (off > i_size)
7174  		return 0;
7175  	if (off+len > i_size)
7176  		len = i_size-off;
7177  	toread = len;
7178  	while (toread > 0) {
7179  		tocopy = min_t(unsigned long, sb->s_blocksize - offset, toread);
7180  		bh = ext4_bread(NULL, inode, blk, 0);
7181  		if (IS_ERR(bh))
7182  			return PTR_ERR(bh);
7183  		if (!bh)	/* A hole? */
7184  			memset(data, 0, tocopy);
7185  		else
7186  			memcpy(data, bh->b_data+offset, tocopy);
7187  		brelse(bh);
7188  		offset = 0;
7189  		toread -= tocopy;
7190  		data += tocopy;
7191  		blk++;
7192  	}
7193  	return len;
7194  }
7195  
7196  /* Write to quotafile (we know the transaction is already started and has
7197   * enough credits) */
ext4_quota_write(struct super_block * sb,int type,const char * data,size_t len,loff_t off)7198  static ssize_t ext4_quota_write(struct super_block *sb, int type,
7199  				const char *data, size_t len, loff_t off)
7200  {
7201  	struct inode *inode = sb_dqopt(sb)->files[type];
7202  	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7203  	int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
7204  	int retries = 0;
7205  	struct buffer_head *bh;
7206  	handle_t *handle = journal_current_handle();
7207  
7208  	if (!handle) {
7209  		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7210  			" cancelled because transaction is not started",
7211  			(unsigned long long)off, (unsigned long long)len);
7212  		return -EIO;
7213  	}
7214  	/*
7215  	 * Since we account only one data block in transaction credits,
7216  	 * then it is impossible to cross a block boundary.
7217  	 */
7218  	if (sb->s_blocksize - offset < len) {
7219  		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7220  			" cancelled because not block aligned",
7221  			(unsigned long long)off, (unsigned long long)len);
7222  		return -EIO;
7223  	}
7224  
7225  	do {
7226  		bh = ext4_bread(handle, inode, blk,
7227  				EXT4_GET_BLOCKS_CREATE |
7228  				EXT4_GET_BLOCKS_METADATA_NOFAIL);
7229  	} while (PTR_ERR(bh) == -ENOSPC &&
7230  		 ext4_should_retry_alloc(inode->i_sb, &retries));
7231  	if (IS_ERR(bh))
7232  		return PTR_ERR(bh);
7233  	if (!bh)
7234  		goto out;
7235  	BUFFER_TRACE(bh, "get write access");
7236  	err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
7237  	if (err) {
7238  		brelse(bh);
7239  		return err;
7240  	}
7241  	lock_buffer(bh);
7242  	memcpy(bh->b_data+offset, data, len);
7243  	flush_dcache_page(bh->b_page);
7244  	unlock_buffer(bh);
7245  	err = ext4_handle_dirty_metadata(handle, NULL, bh);
7246  	brelse(bh);
7247  out:
7248  	if (inode->i_size < off + len) {
7249  		i_size_write(inode, off + len);
7250  		EXT4_I(inode)->i_disksize = inode->i_size;
7251  		err2 = ext4_mark_inode_dirty(handle, inode);
7252  		if (unlikely(err2 && !err))
7253  			err = err2;
7254  	}
7255  	return err ? err : len;
7256  }
7257  #endif
7258  
7259  #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
register_as_ext2(void)7260  static inline void register_as_ext2(void)
7261  {
7262  	int err = register_filesystem(&ext2_fs_type);
7263  	if (err)
7264  		printk(KERN_WARNING
7265  		       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
7266  }
7267  
unregister_as_ext2(void)7268  static inline void unregister_as_ext2(void)
7269  {
7270  	unregister_filesystem(&ext2_fs_type);
7271  }
7272  
ext2_feature_set_ok(struct super_block * sb)7273  static inline int ext2_feature_set_ok(struct super_block *sb)
7274  {
7275  	if (ext4_has_unknown_ext2_incompat_features(sb))
7276  		return 0;
7277  	if (sb_rdonly(sb))
7278  		return 1;
7279  	if (ext4_has_unknown_ext2_ro_compat_features(sb))
7280  		return 0;
7281  	return 1;
7282  }
7283  #else
register_as_ext2(void)7284  static inline void register_as_ext2(void) { }
unregister_as_ext2(void)7285  static inline void unregister_as_ext2(void) { }
ext2_feature_set_ok(struct super_block * sb)7286  static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
7287  #endif
7288  
register_as_ext3(void)7289  static inline void register_as_ext3(void)
7290  {
7291  	int err = register_filesystem(&ext3_fs_type);
7292  	if (err)
7293  		printk(KERN_WARNING
7294  		       "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7295  }
7296  
unregister_as_ext3(void)7297  static inline void unregister_as_ext3(void)
7298  {
7299  	unregister_filesystem(&ext3_fs_type);
7300  }
7301  
ext3_feature_set_ok(struct super_block * sb)7302  static inline int ext3_feature_set_ok(struct super_block *sb)
7303  {
7304  	if (ext4_has_unknown_ext3_incompat_features(sb))
7305  		return 0;
7306  	if (!ext4_has_feature_journal(sb))
7307  		return 0;
7308  	if (sb_rdonly(sb))
7309  		return 1;
7310  	if (ext4_has_unknown_ext3_ro_compat_features(sb))
7311  		return 0;
7312  	return 1;
7313  }
7314  
ext4_kill_sb(struct super_block * sb)7315  static void ext4_kill_sb(struct super_block *sb)
7316  {
7317  	struct ext4_sb_info *sbi = EXT4_SB(sb);
7318  	struct file *bdev_file = sbi ? sbi->s_journal_bdev_file : NULL;
7319  
7320  	kill_block_super(sb);
7321  
7322  	if (bdev_file)
7323  		bdev_fput(bdev_file);
7324  }
7325  
7326  static struct file_system_type ext4_fs_type = {
7327  	.owner			= THIS_MODULE,
7328  	.name			= "ext4",
7329  	.init_fs_context	= ext4_init_fs_context,
7330  	.parameters		= ext4_param_specs,
7331  	.kill_sb		= ext4_kill_sb,
7332  	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
7333  };
7334  MODULE_ALIAS_FS("ext4");
7335  
7336  /* Shared across all ext4 file systems */
7337  wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
7338  
ext4_init_fs(void)7339  static int __init ext4_init_fs(void)
7340  {
7341  	int i, err;
7342  
7343  	ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
7344  	ext4_li_info = NULL;
7345  
7346  	/* Build-time check for flags consistency */
7347  	ext4_check_flag_values();
7348  
7349  	for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
7350  		init_waitqueue_head(&ext4__ioend_wq[i]);
7351  
7352  	err = ext4_init_es();
7353  	if (err)
7354  		return err;
7355  
7356  	err = ext4_init_pending();
7357  	if (err)
7358  		goto out7;
7359  
7360  	err = ext4_init_post_read_processing();
7361  	if (err)
7362  		goto out6;
7363  
7364  	err = ext4_init_pageio();
7365  	if (err)
7366  		goto out5;
7367  
7368  	err = ext4_init_system_zone();
7369  	if (err)
7370  		goto out4;
7371  
7372  	err = ext4_init_sysfs();
7373  	if (err)
7374  		goto out3;
7375  
7376  	err = ext4_init_mballoc();
7377  	if (err)
7378  		goto out2;
7379  	err = init_inodecache();
7380  	if (err)
7381  		goto out1;
7382  
7383  	err = ext4_fc_init_dentry_cache();
7384  	if (err)
7385  		goto out05;
7386  
7387  	register_as_ext3();
7388  	register_as_ext2();
7389  	err = register_filesystem(&ext4_fs_type);
7390  	if (err)
7391  		goto out;
7392  
7393  	return 0;
7394  out:
7395  	unregister_as_ext2();
7396  	unregister_as_ext3();
7397  	ext4_fc_destroy_dentry_cache();
7398  out05:
7399  	destroy_inodecache();
7400  out1:
7401  	ext4_exit_mballoc();
7402  out2:
7403  	ext4_exit_sysfs();
7404  out3:
7405  	ext4_exit_system_zone();
7406  out4:
7407  	ext4_exit_pageio();
7408  out5:
7409  	ext4_exit_post_read_processing();
7410  out6:
7411  	ext4_exit_pending();
7412  out7:
7413  	ext4_exit_es();
7414  
7415  	return err;
7416  }
7417  
ext4_exit_fs(void)7418  static void __exit ext4_exit_fs(void)
7419  {
7420  	ext4_destroy_lazyinit_thread();
7421  	unregister_as_ext2();
7422  	unregister_as_ext3();
7423  	unregister_filesystem(&ext4_fs_type);
7424  	ext4_fc_destroy_dentry_cache();
7425  	destroy_inodecache();
7426  	ext4_exit_mballoc();
7427  	ext4_exit_sysfs();
7428  	ext4_exit_system_zone();
7429  	ext4_exit_pageio();
7430  	ext4_exit_post_read_processing();
7431  	ext4_exit_es();
7432  	ext4_exit_pending();
7433  }
7434  
7435  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
7436  MODULE_DESCRIPTION("Fourth Extended Filesystem");
7437  MODULE_LICENSE("GPL");
7438  MODULE_SOFTDEP("pre: crc32c");
7439  module_init(ext4_init_fs)
7440  module_exit(ext4_exit_fs)
7441