1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4   * All Rights Reserved.
5   */
6  
7  #include "xfs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_log_format.h"
11  #include "xfs_trans_resv.h"
12  #include "xfs_sb.h"
13  #include "xfs_mount.h"
14  #include "xfs_inode.h"
15  #include "xfs_btree.h"
16  #include "xfs_bmap.h"
17  #include "xfs_alloc.h"
18  #include "xfs_fsops.h"
19  #include "xfs_trans.h"
20  #include "xfs_buf_item.h"
21  #include "xfs_log.h"
22  #include "xfs_log_priv.h"
23  #include "xfs_dir2.h"
24  #include "xfs_extfree_item.h"
25  #include "xfs_mru_cache.h"
26  #include "xfs_inode_item.h"
27  #include "xfs_icache.h"
28  #include "xfs_trace.h"
29  #include "xfs_icreate_item.h"
30  #include "xfs_filestream.h"
31  #include "xfs_quota.h"
32  #include "xfs_sysfs.h"
33  #include "xfs_ondisk.h"
34  #include "xfs_rmap_item.h"
35  #include "xfs_refcount_item.h"
36  #include "xfs_bmap_item.h"
37  #include "xfs_reflink.h"
38  #include "xfs_pwork.h"
39  #include "xfs_ag.h"
40  #include "xfs_defer.h"
41  #include "xfs_attr_item.h"
42  #include "xfs_xattr.h"
43  #include "xfs_iunlink_item.h"
44  #include "xfs_dahash_test.h"
45  #include "xfs_rtbitmap.h"
46  #include "xfs_exchmaps_item.h"
47  #include "xfs_parent.h"
48  #include "scrub/stats.h"
49  #include "scrub/rcbag_btree.h"
50  
51  #include <linux/magic.h>
52  #include <linux/fs_context.h>
53  #include <linux/fs_parser.h>
54  
55  static const struct super_operations xfs_super_operations;
56  
57  static struct dentry *xfs_debugfs;	/* top-level xfs debugfs dir */
58  static struct kset *xfs_kset;		/* top-level xfs sysfs dir */
59  #ifdef DEBUG
60  static struct xfs_kobj xfs_dbg_kobj;	/* global debug sysfs attrs */
61  #endif
62  
63  enum xfs_dax_mode {
64  	XFS_DAX_INODE = 0,
65  	XFS_DAX_ALWAYS = 1,
66  	XFS_DAX_NEVER = 2,
67  };
68  
69  static void
xfs_mount_set_dax_mode(struct xfs_mount * mp,enum xfs_dax_mode mode)70  xfs_mount_set_dax_mode(
71  	struct xfs_mount	*mp,
72  	enum xfs_dax_mode	mode)
73  {
74  	switch (mode) {
75  	case XFS_DAX_INODE:
76  		mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER);
77  		break;
78  	case XFS_DAX_ALWAYS:
79  		mp->m_features |= XFS_FEAT_DAX_ALWAYS;
80  		mp->m_features &= ~XFS_FEAT_DAX_NEVER;
81  		break;
82  	case XFS_DAX_NEVER:
83  		mp->m_features |= XFS_FEAT_DAX_NEVER;
84  		mp->m_features &= ~XFS_FEAT_DAX_ALWAYS;
85  		break;
86  	}
87  }
88  
89  static const struct constant_table dax_param_enums[] = {
90  	{"inode",	XFS_DAX_INODE },
91  	{"always",	XFS_DAX_ALWAYS },
92  	{"never",	XFS_DAX_NEVER },
93  	{}
94  };
95  
96  /*
97   * Table driven mount option parser.
98   */
99  enum {
100  	Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
101  	Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
102  	Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
103  	Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
104  	Opt_noikeep, Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2,
105  	Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
106  	Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
107  	Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
108  	Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum,
109  };
110  
111  static const struct fs_parameter_spec xfs_fs_parameters[] = {
112  	fsparam_u32("logbufs",		Opt_logbufs),
113  	fsparam_string("logbsize",	Opt_logbsize),
114  	fsparam_string("logdev",	Opt_logdev),
115  	fsparam_string("rtdev",		Opt_rtdev),
116  	fsparam_flag("wsync",		Opt_wsync),
117  	fsparam_flag("noalign",		Opt_noalign),
118  	fsparam_flag("swalloc",		Opt_swalloc),
119  	fsparam_u32("sunit",		Opt_sunit),
120  	fsparam_u32("swidth",		Opt_swidth),
121  	fsparam_flag("nouuid",		Opt_nouuid),
122  	fsparam_flag("grpid",		Opt_grpid),
123  	fsparam_flag("nogrpid",		Opt_nogrpid),
124  	fsparam_flag("bsdgroups",	Opt_bsdgroups),
125  	fsparam_flag("sysvgroups",	Opt_sysvgroups),
126  	fsparam_string("allocsize",	Opt_allocsize),
127  	fsparam_flag("norecovery",	Opt_norecovery),
128  	fsparam_flag("inode64",		Opt_inode64),
129  	fsparam_flag("inode32",		Opt_inode32),
130  	fsparam_flag("ikeep",		Opt_ikeep),
131  	fsparam_flag("noikeep",		Opt_noikeep),
132  	fsparam_flag("largeio",		Opt_largeio),
133  	fsparam_flag("nolargeio",	Opt_nolargeio),
134  	fsparam_flag("attr2",		Opt_attr2),
135  	fsparam_flag("noattr2",		Opt_noattr2),
136  	fsparam_flag("filestreams",	Opt_filestreams),
137  	fsparam_flag("quota",		Opt_quota),
138  	fsparam_flag("noquota",		Opt_noquota),
139  	fsparam_flag("usrquota",	Opt_usrquota),
140  	fsparam_flag("grpquota",	Opt_grpquota),
141  	fsparam_flag("prjquota",	Opt_prjquota),
142  	fsparam_flag("uquota",		Opt_uquota),
143  	fsparam_flag("gquota",		Opt_gquota),
144  	fsparam_flag("pquota",		Opt_pquota),
145  	fsparam_flag("uqnoenforce",	Opt_uqnoenforce),
146  	fsparam_flag("gqnoenforce",	Opt_gqnoenforce),
147  	fsparam_flag("pqnoenforce",	Opt_pqnoenforce),
148  	fsparam_flag("qnoenforce",	Opt_qnoenforce),
149  	fsparam_flag("discard",		Opt_discard),
150  	fsparam_flag("nodiscard",	Opt_nodiscard),
151  	fsparam_flag("dax",		Opt_dax),
152  	fsparam_enum("dax",		Opt_dax_enum, dax_param_enums),
153  	{}
154  };
155  
156  struct proc_xfs_info {
157  	uint64_t	flag;
158  	char		*str;
159  };
160  
161  static int
xfs_fs_show_options(struct seq_file * m,struct dentry * root)162  xfs_fs_show_options(
163  	struct seq_file		*m,
164  	struct dentry		*root)
165  {
166  	static struct proc_xfs_info xfs_info_set[] = {
167  		/* the few simple ones we can get from the mount struct */
168  		{ XFS_FEAT_IKEEP,		",ikeep" },
169  		{ XFS_FEAT_WSYNC,		",wsync" },
170  		{ XFS_FEAT_NOALIGN,		",noalign" },
171  		{ XFS_FEAT_SWALLOC,		",swalloc" },
172  		{ XFS_FEAT_NOUUID,		",nouuid" },
173  		{ XFS_FEAT_NORECOVERY,		",norecovery" },
174  		{ XFS_FEAT_ATTR2,		",attr2" },
175  		{ XFS_FEAT_FILESTREAMS,		",filestreams" },
176  		{ XFS_FEAT_GRPID,		",grpid" },
177  		{ XFS_FEAT_DISCARD,		",discard" },
178  		{ XFS_FEAT_LARGE_IOSIZE,	",largeio" },
179  		{ XFS_FEAT_DAX_ALWAYS,		",dax=always" },
180  		{ XFS_FEAT_DAX_NEVER,		",dax=never" },
181  		{ 0, NULL }
182  	};
183  	struct xfs_mount	*mp = XFS_M(root->d_sb);
184  	struct proc_xfs_info	*xfs_infop;
185  
186  	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
187  		if (mp->m_features & xfs_infop->flag)
188  			seq_puts(m, xfs_infop->str);
189  	}
190  
191  	seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64);
192  
193  	if (xfs_has_allocsize(mp))
194  		seq_printf(m, ",allocsize=%dk",
195  			   (1 << mp->m_allocsize_log) >> 10);
196  
197  	if (mp->m_logbufs > 0)
198  		seq_printf(m, ",logbufs=%d", mp->m_logbufs);
199  	if (mp->m_logbsize > 0)
200  		seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
201  
202  	if (mp->m_logname)
203  		seq_show_option(m, "logdev", mp->m_logname);
204  	if (mp->m_rtname)
205  		seq_show_option(m, "rtdev", mp->m_rtname);
206  
207  	if (mp->m_dalign > 0)
208  		seq_printf(m, ",sunit=%d",
209  				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
210  	if (mp->m_swidth > 0)
211  		seq_printf(m, ",swidth=%d",
212  				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
213  
214  	if (mp->m_qflags & XFS_UQUOTA_ENFD)
215  		seq_puts(m, ",usrquota");
216  	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
217  		seq_puts(m, ",uqnoenforce");
218  
219  	if (mp->m_qflags & XFS_PQUOTA_ENFD)
220  		seq_puts(m, ",prjquota");
221  	else if (mp->m_qflags & XFS_PQUOTA_ACCT)
222  		seq_puts(m, ",pqnoenforce");
223  
224  	if (mp->m_qflags & XFS_GQUOTA_ENFD)
225  		seq_puts(m, ",grpquota");
226  	else if (mp->m_qflags & XFS_GQUOTA_ACCT)
227  		seq_puts(m, ",gqnoenforce");
228  
229  	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
230  		seq_puts(m, ",noquota");
231  
232  	return 0;
233  }
234  
235  static bool
xfs_set_inode_alloc_perag(struct xfs_perag * pag,xfs_ino_t ino,xfs_agnumber_t max_metadata)236  xfs_set_inode_alloc_perag(
237  	struct xfs_perag	*pag,
238  	xfs_ino_t		ino,
239  	xfs_agnumber_t		max_metadata)
240  {
241  	if (!xfs_is_inode32(pag->pag_mount)) {
242  		set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
243  		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
244  		return false;
245  	}
246  
247  	if (ino > XFS_MAXINUMBER_32) {
248  		clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
249  		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
250  		return false;
251  	}
252  
253  	set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
254  	if (pag->pag_agno < max_metadata)
255  		set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
256  	else
257  		clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
258  	return true;
259  }
260  
261  /*
262   * Set parameters for inode allocation heuristics, taking into account
263   * filesystem size and inode32/inode64 mount options; i.e. specifically
264   * whether or not XFS_FEAT_SMALL_INUMS is set.
265   *
266   * Inode allocation patterns are altered only if inode32 is requested
267   * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large.
268   * If altered, XFS_OPSTATE_INODE32 is set as well.
269   *
270   * An agcount independent of that in the mount structure is provided
271   * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
272   * to the potentially higher ag count.
273   *
274   * Returns the maximum AG index which may contain inodes.
275   */
276  xfs_agnumber_t
xfs_set_inode_alloc(struct xfs_mount * mp,xfs_agnumber_t agcount)277  xfs_set_inode_alloc(
278  	struct xfs_mount *mp,
279  	xfs_agnumber_t	agcount)
280  {
281  	xfs_agnumber_t	index;
282  	xfs_agnumber_t	maxagi = 0;
283  	xfs_sb_t	*sbp = &mp->m_sb;
284  	xfs_agnumber_t	max_metadata;
285  	xfs_agino_t	agino;
286  	xfs_ino_t	ino;
287  
288  	/*
289  	 * Calculate how much should be reserved for inodes to meet
290  	 * the max inode percentage.  Used only for inode32.
291  	 */
292  	if (M_IGEO(mp)->maxicount) {
293  		uint64_t	icount;
294  
295  		icount = sbp->sb_dblocks * sbp->sb_imax_pct;
296  		do_div(icount, 100);
297  		icount += sbp->sb_agblocks - 1;
298  		do_div(icount, sbp->sb_agblocks);
299  		max_metadata = icount;
300  	} else {
301  		max_metadata = agcount;
302  	}
303  
304  	/* Get the last possible inode in the filesystem */
305  	agino =	XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1);
306  	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
307  
308  	/*
309  	 * If user asked for no more than 32-bit inodes, and the fs is
310  	 * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter
311  	 * the allocator to accommodate the request.
312  	 */
313  	if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32)
314  		xfs_set_inode32(mp);
315  	else
316  		xfs_clear_inode32(mp);
317  
318  	for (index = 0; index < agcount; index++) {
319  		struct xfs_perag	*pag;
320  
321  		ino = XFS_AGINO_TO_INO(mp, index, agino);
322  
323  		pag = xfs_perag_get(mp, index);
324  		if (xfs_set_inode_alloc_perag(pag, ino, max_metadata))
325  			maxagi++;
326  		xfs_perag_put(pag);
327  	}
328  
329  	return xfs_is_inode32(mp) ? maxagi : agcount;
330  }
331  
332  static int
xfs_setup_dax_always(struct xfs_mount * mp)333  xfs_setup_dax_always(
334  	struct xfs_mount	*mp)
335  {
336  	if (!mp->m_ddev_targp->bt_daxdev &&
337  	    (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) {
338  		xfs_alert(mp,
339  			"DAX unsupported by block device. Turning off DAX.");
340  		goto disable_dax;
341  	}
342  
343  	if (mp->m_super->s_blocksize != PAGE_SIZE) {
344  		xfs_alert(mp,
345  			"DAX not supported for blocksize. Turning off DAX.");
346  		goto disable_dax;
347  	}
348  
349  	if (xfs_has_reflink(mp) &&
350  	    bdev_is_partition(mp->m_ddev_targp->bt_bdev)) {
351  		xfs_alert(mp,
352  			"DAX and reflink cannot work with multi-partitions!");
353  		return -EINVAL;
354  	}
355  
356  	return 0;
357  
358  disable_dax:
359  	xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER);
360  	return 0;
361  }
362  
363  STATIC int
xfs_blkdev_get(xfs_mount_t * mp,const char * name,struct file ** bdev_filep)364  xfs_blkdev_get(
365  	xfs_mount_t		*mp,
366  	const char		*name,
367  	struct file		**bdev_filep)
368  {
369  	int			error = 0;
370  
371  	*bdev_filep = bdev_file_open_by_path(name,
372  		BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
373  		mp->m_super, &fs_holder_ops);
374  	if (IS_ERR(*bdev_filep)) {
375  		error = PTR_ERR(*bdev_filep);
376  		*bdev_filep = NULL;
377  		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
378  	}
379  
380  	return error;
381  }
382  
383  STATIC void
xfs_shutdown_devices(struct xfs_mount * mp)384  xfs_shutdown_devices(
385  	struct xfs_mount	*mp)
386  {
387  	/*
388  	 * Udev is triggered whenever anyone closes a block device or unmounts
389  	 * a file systemm on a block device.
390  	 * The default udev rules invoke blkid to read the fs super and create
391  	 * symlinks to the bdev under /dev/disk.  For this, it uses buffered
392  	 * reads through the page cache.
393  	 *
394  	 * xfs_db also uses buffered reads to examine metadata.  There is no
395  	 * coordination between xfs_db and udev, which means that they can run
396  	 * concurrently.  Note there is no coordination between the kernel and
397  	 * blkid either.
398  	 *
399  	 * On a system with 64k pages, the page cache can cache the superblock
400  	 * and the root inode (and hence the root directory) with the same 64k
401  	 * page.  If udev spawns blkid after the mkfs and the system is busy
402  	 * enough that it is still running when xfs_db starts up, they'll both
403  	 * read from the same page in the pagecache.
404  	 *
405  	 * The unmount writes updated inode metadata to disk directly.  The XFS
406  	 * buffer cache does not use the bdev pagecache, so it needs to
407  	 * invalidate that pagecache on unmount.  If the above scenario occurs,
408  	 * the pagecache no longer reflects what's on disk, xfs_db reads the
409  	 * stale metadata, and fails to find /a.  Most of the time this succeeds
410  	 * because closing a bdev invalidates the page cache, but when processes
411  	 * race, everyone loses.
412  	 */
413  	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
414  		blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
415  		invalidate_bdev(mp->m_logdev_targp->bt_bdev);
416  	}
417  	if (mp->m_rtdev_targp) {
418  		blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
419  		invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
420  	}
421  	blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
422  	invalidate_bdev(mp->m_ddev_targp->bt_bdev);
423  }
424  
425  /*
426   * The file system configurations are:
427   *	(1) device (partition) with data and internal log
428   *	(2) logical volume with data and log subvolumes.
429   *	(3) logical volume with data, log, and realtime subvolumes.
430   *
431   * We only have to handle opening the log and realtime volumes here if
432   * they are present.  The data subvolume has already been opened by
433   * get_sb_bdev() and is stored in sb->s_bdev.
434   */
435  STATIC int
xfs_open_devices(struct xfs_mount * mp)436  xfs_open_devices(
437  	struct xfs_mount	*mp)
438  {
439  	struct super_block	*sb = mp->m_super;
440  	struct block_device	*ddev = sb->s_bdev;
441  	struct file		*logdev_file = NULL, *rtdev_file = NULL;
442  	int			error;
443  
444  	/*
445  	 * Open real time and log devices - order is important.
446  	 */
447  	if (mp->m_logname) {
448  		error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file);
449  		if (error)
450  			return error;
451  	}
452  
453  	if (mp->m_rtname) {
454  		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file);
455  		if (error)
456  			goto out_close_logdev;
457  
458  		if (file_bdev(rtdev_file) == ddev ||
459  		    (logdev_file &&
460  		     file_bdev(rtdev_file) == file_bdev(logdev_file))) {
461  			xfs_warn(mp,
462  	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
463  			error = -EINVAL;
464  			goto out_close_rtdev;
465  		}
466  	}
467  
468  	/*
469  	 * Setup xfs_mount buffer target pointers
470  	 */
471  	error = -ENOMEM;
472  	mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
473  	if (!mp->m_ddev_targp)
474  		goto out_close_rtdev;
475  
476  	if (rtdev_file) {
477  		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
478  		if (!mp->m_rtdev_targp)
479  			goto out_free_ddev_targ;
480  	}
481  
482  	if (logdev_file && file_bdev(logdev_file) != ddev) {
483  		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
484  		if (!mp->m_logdev_targp)
485  			goto out_free_rtdev_targ;
486  	} else {
487  		mp->m_logdev_targp = mp->m_ddev_targp;
488  		/* Handle won't be used, drop it */
489  		if (logdev_file)
490  			bdev_fput(logdev_file);
491  	}
492  
493  	return 0;
494  
495   out_free_rtdev_targ:
496  	if (mp->m_rtdev_targp)
497  		xfs_free_buftarg(mp->m_rtdev_targp);
498   out_free_ddev_targ:
499  	xfs_free_buftarg(mp->m_ddev_targp);
500   out_close_rtdev:
501  	 if (rtdev_file)
502  		bdev_fput(rtdev_file);
503   out_close_logdev:
504  	if (logdev_file)
505  		bdev_fput(logdev_file);
506  	return error;
507  }
508  
509  /*
510   * Setup xfs_mount buffer target pointers based on superblock
511   */
512  STATIC int
xfs_setup_devices(struct xfs_mount * mp)513  xfs_setup_devices(
514  	struct xfs_mount	*mp)
515  {
516  	int			error;
517  
518  	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);
519  	if (error)
520  		return error;
521  
522  	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
523  		unsigned int	log_sector_size = BBSIZE;
524  
525  		if (xfs_has_sector(mp))
526  			log_sector_size = mp->m_sb.sb_logsectsize;
527  		error = xfs_setsize_buftarg(mp->m_logdev_targp,
528  					    log_sector_size);
529  		if (error)
530  			return error;
531  	}
532  	if (mp->m_rtdev_targp) {
533  		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
534  					    mp->m_sb.sb_sectsize);
535  		if (error)
536  			return error;
537  	}
538  
539  	return 0;
540  }
541  
542  STATIC int
xfs_init_mount_workqueues(struct xfs_mount * mp)543  xfs_init_mount_workqueues(
544  	struct xfs_mount	*mp)
545  {
546  	mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
547  			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
548  			1, mp->m_super->s_id);
549  	if (!mp->m_buf_workqueue)
550  		goto out;
551  
552  	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
553  			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
554  			0, mp->m_super->s_id);
555  	if (!mp->m_unwritten_workqueue)
556  		goto out_destroy_buf;
557  
558  	mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
559  			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
560  			0, mp->m_super->s_id);
561  	if (!mp->m_reclaim_workqueue)
562  		goto out_destroy_unwritten;
563  
564  	mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s",
565  			XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
566  			0, mp->m_super->s_id);
567  	if (!mp->m_blockgc_wq)
568  		goto out_destroy_reclaim;
569  
570  	mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s",
571  			XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
572  			1, mp->m_super->s_id);
573  	if (!mp->m_inodegc_wq)
574  		goto out_destroy_blockgc;
575  
576  	mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s",
577  			XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id);
578  	if (!mp->m_sync_workqueue)
579  		goto out_destroy_inodegc;
580  
581  	return 0;
582  
583  out_destroy_inodegc:
584  	destroy_workqueue(mp->m_inodegc_wq);
585  out_destroy_blockgc:
586  	destroy_workqueue(mp->m_blockgc_wq);
587  out_destroy_reclaim:
588  	destroy_workqueue(mp->m_reclaim_workqueue);
589  out_destroy_unwritten:
590  	destroy_workqueue(mp->m_unwritten_workqueue);
591  out_destroy_buf:
592  	destroy_workqueue(mp->m_buf_workqueue);
593  out:
594  	return -ENOMEM;
595  }
596  
597  STATIC void
xfs_destroy_mount_workqueues(struct xfs_mount * mp)598  xfs_destroy_mount_workqueues(
599  	struct xfs_mount	*mp)
600  {
601  	destroy_workqueue(mp->m_sync_workqueue);
602  	destroy_workqueue(mp->m_blockgc_wq);
603  	destroy_workqueue(mp->m_inodegc_wq);
604  	destroy_workqueue(mp->m_reclaim_workqueue);
605  	destroy_workqueue(mp->m_unwritten_workqueue);
606  	destroy_workqueue(mp->m_buf_workqueue);
607  }
608  
609  static void
xfs_flush_inodes_worker(struct work_struct * work)610  xfs_flush_inodes_worker(
611  	struct work_struct	*work)
612  {
613  	struct xfs_mount	*mp = container_of(work, struct xfs_mount,
614  						   m_flush_inodes_work);
615  	struct super_block	*sb = mp->m_super;
616  
617  	if (down_read_trylock(&sb->s_umount)) {
618  		sync_inodes_sb(sb);
619  		up_read(&sb->s_umount);
620  	}
621  }
622  
623  /*
624   * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK
625   * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting
626   * for IO to complete so that we effectively throttle multiple callers to the
627   * rate at which IO is completing.
628   */
629  void
xfs_flush_inodes(struct xfs_mount * mp)630  xfs_flush_inodes(
631  	struct xfs_mount	*mp)
632  {
633  	/*
634  	 * If flush_work() returns true then that means we waited for a flush
635  	 * which was already in progress.  Don't bother running another scan.
636  	 */
637  	if (flush_work(&mp->m_flush_inodes_work))
638  		return;
639  
640  	queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work);
641  	flush_work(&mp->m_flush_inodes_work);
642  }
643  
644  /* Catch misguided souls that try to use this interface on XFS */
645  STATIC struct inode *
xfs_fs_alloc_inode(struct super_block * sb)646  xfs_fs_alloc_inode(
647  	struct super_block	*sb)
648  {
649  	BUG();
650  	return NULL;
651  }
652  
653  /*
654   * Now that the generic code is guaranteed not to be accessing
655   * the linux inode, we can inactivate and reclaim the inode.
656   */
657  STATIC void
xfs_fs_destroy_inode(struct inode * inode)658  xfs_fs_destroy_inode(
659  	struct inode		*inode)
660  {
661  	struct xfs_inode	*ip = XFS_I(inode);
662  
663  	trace_xfs_destroy_inode(ip);
664  
665  	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
666  	XFS_STATS_INC(ip->i_mount, vn_rele);
667  	XFS_STATS_INC(ip->i_mount, vn_remove);
668  	xfs_inode_mark_reclaimable(ip);
669  }
670  
671  static void
xfs_fs_dirty_inode(struct inode * inode,int flags)672  xfs_fs_dirty_inode(
673  	struct inode			*inode,
674  	int				flags)
675  {
676  	struct xfs_inode		*ip = XFS_I(inode);
677  	struct xfs_mount		*mp = ip->i_mount;
678  	struct xfs_trans		*tp;
679  
680  	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
681  		return;
682  
683  	/*
684  	 * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
685  	 * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
686  	 * in flags possibly together with I_DIRTY_SYNC.
687  	 */
688  	if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
689  		return;
690  
691  	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
692  		return;
693  	xfs_ilock(ip, XFS_ILOCK_EXCL);
694  	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
695  	xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
696  	xfs_trans_commit(tp);
697  }
698  
699  /*
700   * Slab object creation initialisation for the XFS inode.
701   * This covers only the idempotent fields in the XFS inode;
702   * all other fields need to be initialised on allocation
703   * from the slab. This avoids the need to repeatedly initialise
704   * fields in the xfs inode that left in the initialise state
705   * when freeing the inode.
706   */
707  STATIC void
xfs_fs_inode_init_once(void * inode)708  xfs_fs_inode_init_once(
709  	void			*inode)
710  {
711  	struct xfs_inode	*ip = inode;
712  
713  	memset(ip, 0, sizeof(struct xfs_inode));
714  
715  	/* vfs inode */
716  	inode_init_once(VFS_I(ip));
717  
718  	/* xfs inode */
719  	atomic_set(&ip->i_pincount, 0);
720  	spin_lock_init(&ip->i_flags_lock);
721  	init_rwsem(&ip->i_lock);
722  }
723  
724  /*
725   * We do an unlocked check for XFS_IDONTCACHE here because we are already
726   * serialised against cache hits here via the inode->i_lock and igrab() in
727   * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
728   * racing with us, and it avoids needing to grab a spinlock here for every inode
729   * we drop the final reference on.
730   */
731  STATIC int
xfs_fs_drop_inode(struct inode * inode)732  xfs_fs_drop_inode(
733  	struct inode		*inode)
734  {
735  	struct xfs_inode	*ip = XFS_I(inode);
736  
737  	/*
738  	 * If this unlinked inode is in the middle of recovery, don't
739  	 * drop the inode just yet; log recovery will take care of
740  	 * that.  See the comment for this inode flag.
741  	 */
742  	if (ip->i_flags & XFS_IRECOVERY) {
743  		ASSERT(xlog_recovery_needed(ip->i_mount->m_log));
744  		return 0;
745  	}
746  
747  	return generic_drop_inode(inode);
748  }
749  
750  static void
xfs_mount_free(struct xfs_mount * mp)751  xfs_mount_free(
752  	struct xfs_mount	*mp)
753  {
754  	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
755  		xfs_free_buftarg(mp->m_logdev_targp);
756  	if (mp->m_rtdev_targp)
757  		xfs_free_buftarg(mp->m_rtdev_targp);
758  	if (mp->m_ddev_targp)
759  		xfs_free_buftarg(mp->m_ddev_targp);
760  
761  	debugfs_remove(mp->m_debugfs);
762  	kfree(mp->m_rtname);
763  	kfree(mp->m_logname);
764  	kfree(mp);
765  }
766  
767  STATIC int
xfs_fs_sync_fs(struct super_block * sb,int wait)768  xfs_fs_sync_fs(
769  	struct super_block	*sb,
770  	int			wait)
771  {
772  	struct xfs_mount	*mp = XFS_M(sb);
773  	int			error;
774  
775  	trace_xfs_fs_sync_fs(mp, __return_address);
776  
777  	/*
778  	 * Doing anything during the async pass would be counterproductive.
779  	 */
780  	if (!wait)
781  		return 0;
782  
783  	error = xfs_log_force(mp, XFS_LOG_SYNC);
784  	if (error)
785  		return error;
786  
787  	if (laptop_mode) {
788  		/*
789  		 * The disk must be active because we're syncing.
790  		 * We schedule log work now (now that the disk is
791  		 * active) instead of later (when it might not be).
792  		 */
793  		flush_delayed_work(&mp->m_log->l_work);
794  	}
795  
796  	/*
797  	 * If we are called with page faults frozen out, it means we are about
798  	 * to freeze the transaction subsystem. Take the opportunity to shut
799  	 * down inodegc because once SB_FREEZE_FS is set it's too late to
800  	 * prevent inactivation races with freeze. The fs doesn't get called
801  	 * again by the freezing process until after SB_FREEZE_FS has been set,
802  	 * so it's now or never.  Same logic applies to speculative allocation
803  	 * garbage collection.
804  	 *
805  	 * We don't care if this is a normal syncfs call that does this or
806  	 * freeze that does this - we can run this multiple times without issue
807  	 * and we won't race with a restart because a restart can only occur
808  	 * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE.
809  	 */
810  	if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) {
811  		xfs_inodegc_stop(mp);
812  		xfs_blockgc_stop(mp);
813  	}
814  
815  	return 0;
816  }
817  
818  STATIC int
xfs_fs_statfs(struct dentry * dentry,struct kstatfs * statp)819  xfs_fs_statfs(
820  	struct dentry		*dentry,
821  	struct kstatfs		*statp)
822  {
823  	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
824  	xfs_sb_t		*sbp = &mp->m_sb;
825  	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
826  	uint64_t		fakeinos, id;
827  	uint64_t		icount;
828  	uint64_t		ifree;
829  	uint64_t		fdblocks;
830  	xfs_extlen_t		lsize;
831  	int64_t			ffree;
832  
833  	/*
834  	 * Expedite background inodegc but don't wait. We do not want to block
835  	 * here waiting hours for a billion extent file to be truncated.
836  	 */
837  	xfs_inodegc_push(mp);
838  
839  	statp->f_type = XFS_SUPER_MAGIC;
840  	statp->f_namelen = MAXNAMELEN - 1;
841  
842  	id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
843  	statp->f_fsid = u64_to_fsid(id);
844  
845  	icount = percpu_counter_sum(&mp->m_icount);
846  	ifree = percpu_counter_sum(&mp->m_ifree);
847  	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
848  
849  	spin_lock(&mp->m_sb_lock);
850  	statp->f_bsize = sbp->sb_blocksize;
851  	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
852  	statp->f_blocks = sbp->sb_dblocks - lsize;
853  	spin_unlock(&mp->m_sb_lock);
854  
855  	/* make sure statp->f_bfree does not underflow */
856  	statp->f_bfree = max_t(int64_t, 0,
857  				fdblocks - xfs_fdblocks_unavailable(mp));
858  	statp->f_bavail = statp->f_bfree;
859  
860  	fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
861  	statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
862  	if (M_IGEO(mp)->maxicount)
863  		statp->f_files = min_t(typeof(statp->f_files),
864  					statp->f_files,
865  					M_IGEO(mp)->maxicount);
866  
867  	/* If sb_icount overshot maxicount, report actual allocation */
868  	statp->f_files = max_t(typeof(statp->f_files),
869  					statp->f_files,
870  					sbp->sb_icount);
871  
872  	/* make sure statp->f_ffree does not underflow */
873  	ffree = statp->f_files - (icount - ifree);
874  	statp->f_ffree = max_t(int64_t, ffree, 0);
875  
876  
877  	if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
878  	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
879  			      (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
880  		xfs_qm_statvfs(ip, statp);
881  
882  	if (XFS_IS_REALTIME_MOUNT(mp) &&
883  	    (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
884  		s64	freertx;
885  
886  		statp->f_blocks = sbp->sb_rblocks;
887  		freertx = percpu_counter_sum_positive(&mp->m_frextents);
888  		statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
889  	}
890  
891  	return 0;
892  }
893  
894  STATIC void
xfs_save_resvblks(struct xfs_mount * mp)895  xfs_save_resvblks(struct xfs_mount *mp)
896  {
897  	mp->m_resblks_save = mp->m_resblks;
898  	xfs_reserve_blocks(mp, 0);
899  }
900  
901  STATIC void
xfs_restore_resvblks(struct xfs_mount * mp)902  xfs_restore_resvblks(struct xfs_mount *mp)
903  {
904  	uint64_t resblks;
905  
906  	if (mp->m_resblks_save) {
907  		resblks = mp->m_resblks_save;
908  		mp->m_resblks_save = 0;
909  	} else
910  		resblks = xfs_default_resblks(mp);
911  
912  	xfs_reserve_blocks(mp, resblks);
913  }
914  
915  /*
916   * Second stage of a freeze. The data is already frozen so we only
917   * need to take care of the metadata. Once that's done sync the superblock
918   * to the log to dirty it in case of a crash while frozen. This ensures that we
919   * will recover the unlinked inode lists on the next mount.
920   */
921  STATIC int
xfs_fs_freeze(struct super_block * sb)922  xfs_fs_freeze(
923  	struct super_block	*sb)
924  {
925  	struct xfs_mount	*mp = XFS_M(sb);
926  	unsigned int		flags;
927  	int			ret;
928  
929  	/*
930  	 * The filesystem is now frozen far enough that memory reclaim
931  	 * cannot safely operate on the filesystem. Hence we need to
932  	 * set a GFP_NOFS context here to avoid recursion deadlocks.
933  	 */
934  	flags = memalloc_nofs_save();
935  	xfs_save_resvblks(mp);
936  	ret = xfs_log_quiesce(mp);
937  	memalloc_nofs_restore(flags);
938  
939  	/*
940  	 * For read-write filesystems, we need to restart the inodegc on error
941  	 * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not
942  	 * going to be run to restart it now.  We are at SB_FREEZE_FS level
943  	 * here, so we can restart safely without racing with a stop in
944  	 * xfs_fs_sync_fs().
945  	 */
946  	if (ret && !xfs_is_readonly(mp)) {
947  		xfs_blockgc_start(mp);
948  		xfs_inodegc_start(mp);
949  	}
950  
951  	return ret;
952  }
953  
954  STATIC int
xfs_fs_unfreeze(struct super_block * sb)955  xfs_fs_unfreeze(
956  	struct super_block	*sb)
957  {
958  	struct xfs_mount	*mp = XFS_M(sb);
959  
960  	xfs_restore_resvblks(mp);
961  	xfs_log_work_queue(mp);
962  
963  	/*
964  	 * Don't reactivate the inodegc worker on a readonly filesystem because
965  	 * inodes are sent directly to reclaim.  Don't reactivate the blockgc
966  	 * worker because there are no speculative preallocations on a readonly
967  	 * filesystem.
968  	 */
969  	if (!xfs_is_readonly(mp)) {
970  		xfs_blockgc_start(mp);
971  		xfs_inodegc_start(mp);
972  	}
973  
974  	return 0;
975  }
976  
977  /*
978   * This function fills in xfs_mount_t fields based on mount args.
979   * Note: the superblock _has_ now been read in.
980   */
981  STATIC int
xfs_finish_flags(struct xfs_mount * mp)982  xfs_finish_flags(
983  	struct xfs_mount	*mp)
984  {
985  	/* Fail a mount where the logbuf is smaller than the log stripe */
986  	if (xfs_has_logv2(mp)) {
987  		if (mp->m_logbsize <= 0 &&
988  		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
989  			mp->m_logbsize = mp->m_sb.sb_logsunit;
990  		} else if (mp->m_logbsize > 0 &&
991  			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
992  			xfs_warn(mp,
993  		"logbuf size must be greater than or equal to log stripe size");
994  			return -EINVAL;
995  		}
996  	} else {
997  		/* Fail a mount if the logbuf is larger than 32K */
998  		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
999  			xfs_warn(mp,
1000  		"logbuf size for version 1 logs must be 16K or 32K");
1001  			return -EINVAL;
1002  		}
1003  	}
1004  
1005  	/*
1006  	 * V5 filesystems always use attr2 format for attributes.
1007  	 */
1008  	if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) {
1009  		xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
1010  			     "attr2 is always enabled for V5 filesystems.");
1011  		return -EINVAL;
1012  	}
1013  
1014  	/*
1015  	 * prohibit r/w mounts of read-only filesystems
1016  	 */
1017  	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) {
1018  		xfs_warn(mp,
1019  			"cannot mount a read-only filesystem as read-write");
1020  		return -EROFS;
1021  	}
1022  
1023  	if ((mp->m_qflags & XFS_GQUOTA_ACCT) &&
1024  	    (mp->m_qflags & XFS_PQUOTA_ACCT) &&
1025  	    !xfs_has_pquotino(mp)) {
1026  		xfs_warn(mp,
1027  		  "Super block does not support project and group quota together");
1028  		return -EINVAL;
1029  	}
1030  
1031  	return 0;
1032  }
1033  
1034  static int
xfs_init_percpu_counters(struct xfs_mount * mp)1035  xfs_init_percpu_counters(
1036  	struct xfs_mount	*mp)
1037  {
1038  	int		error;
1039  
1040  	error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1041  	if (error)
1042  		return -ENOMEM;
1043  
1044  	error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1045  	if (error)
1046  		goto free_icount;
1047  
1048  	error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1049  	if (error)
1050  		goto free_ifree;
1051  
1052  	error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL);
1053  	if (error)
1054  		goto free_fdblocks;
1055  
1056  	error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL);
1057  	if (error)
1058  		goto free_delalloc;
1059  
1060  	error = percpu_counter_init(&mp->m_frextents, 0, GFP_KERNEL);
1061  	if (error)
1062  		goto free_delalloc_rt;
1063  
1064  	return 0;
1065  
1066  free_delalloc_rt:
1067  	percpu_counter_destroy(&mp->m_delalloc_rtextents);
1068  free_delalloc:
1069  	percpu_counter_destroy(&mp->m_delalloc_blks);
1070  free_fdblocks:
1071  	percpu_counter_destroy(&mp->m_fdblocks);
1072  free_ifree:
1073  	percpu_counter_destroy(&mp->m_ifree);
1074  free_icount:
1075  	percpu_counter_destroy(&mp->m_icount);
1076  	return -ENOMEM;
1077  }
1078  
1079  void
xfs_reinit_percpu_counters(struct xfs_mount * mp)1080  xfs_reinit_percpu_counters(
1081  	struct xfs_mount	*mp)
1082  {
1083  	percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1084  	percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1085  	percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1086  	percpu_counter_set(&mp->m_frextents, mp->m_sb.sb_frextents);
1087  }
1088  
1089  static void
xfs_destroy_percpu_counters(struct xfs_mount * mp)1090  xfs_destroy_percpu_counters(
1091  	struct xfs_mount	*mp)
1092  {
1093  	percpu_counter_destroy(&mp->m_icount);
1094  	percpu_counter_destroy(&mp->m_ifree);
1095  	percpu_counter_destroy(&mp->m_fdblocks);
1096  	ASSERT(xfs_is_shutdown(mp) ||
1097  	       percpu_counter_sum(&mp->m_delalloc_rtextents) == 0);
1098  	percpu_counter_destroy(&mp->m_delalloc_rtextents);
1099  	ASSERT(xfs_is_shutdown(mp) ||
1100  	       percpu_counter_sum(&mp->m_delalloc_blks) == 0);
1101  	percpu_counter_destroy(&mp->m_delalloc_blks);
1102  	percpu_counter_destroy(&mp->m_frextents);
1103  }
1104  
1105  static int
xfs_inodegc_init_percpu(struct xfs_mount * mp)1106  xfs_inodegc_init_percpu(
1107  	struct xfs_mount	*mp)
1108  {
1109  	struct xfs_inodegc	*gc;
1110  	int			cpu;
1111  
1112  	mp->m_inodegc = alloc_percpu(struct xfs_inodegc);
1113  	if (!mp->m_inodegc)
1114  		return -ENOMEM;
1115  
1116  	for_each_possible_cpu(cpu) {
1117  		gc = per_cpu_ptr(mp->m_inodegc, cpu);
1118  		gc->cpu = cpu;
1119  		gc->mp = mp;
1120  		init_llist_head(&gc->list);
1121  		gc->items = 0;
1122  		gc->error = 0;
1123  		INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
1124  	}
1125  	return 0;
1126  }
1127  
1128  static void
xfs_inodegc_free_percpu(struct xfs_mount * mp)1129  xfs_inodegc_free_percpu(
1130  	struct xfs_mount	*mp)
1131  {
1132  	if (!mp->m_inodegc)
1133  		return;
1134  	free_percpu(mp->m_inodegc);
1135  }
1136  
1137  static void
xfs_fs_put_super(struct super_block * sb)1138  xfs_fs_put_super(
1139  	struct super_block	*sb)
1140  {
1141  	struct xfs_mount	*mp = XFS_M(sb);
1142  
1143  	xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
1144  	xfs_filestream_unmount(mp);
1145  	xfs_unmountfs(mp);
1146  
1147  	xfs_freesb(mp);
1148  	xchk_mount_stats_free(mp);
1149  	free_percpu(mp->m_stats.xs_stats);
1150  	xfs_inodegc_free_percpu(mp);
1151  	xfs_destroy_percpu_counters(mp);
1152  	xfs_destroy_mount_workqueues(mp);
1153  	xfs_shutdown_devices(mp);
1154  }
1155  
1156  static long
xfs_fs_nr_cached_objects(struct super_block * sb,struct shrink_control * sc)1157  xfs_fs_nr_cached_objects(
1158  	struct super_block	*sb,
1159  	struct shrink_control	*sc)
1160  {
1161  	/* Paranoia: catch incorrect calls during mount setup or teardown */
1162  	if (WARN_ON_ONCE(!sb->s_fs_info))
1163  		return 0;
1164  	return xfs_reclaim_inodes_count(XFS_M(sb));
1165  }
1166  
1167  static long
xfs_fs_free_cached_objects(struct super_block * sb,struct shrink_control * sc)1168  xfs_fs_free_cached_objects(
1169  	struct super_block	*sb,
1170  	struct shrink_control	*sc)
1171  {
1172  	return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
1173  }
1174  
1175  static void
xfs_fs_shutdown(struct super_block * sb)1176  xfs_fs_shutdown(
1177  	struct super_block	*sb)
1178  {
1179  	xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED);
1180  }
1181  
1182  static const struct super_operations xfs_super_operations = {
1183  	.alloc_inode		= xfs_fs_alloc_inode,
1184  	.destroy_inode		= xfs_fs_destroy_inode,
1185  	.dirty_inode		= xfs_fs_dirty_inode,
1186  	.drop_inode		= xfs_fs_drop_inode,
1187  	.put_super		= xfs_fs_put_super,
1188  	.sync_fs		= xfs_fs_sync_fs,
1189  	.freeze_fs		= xfs_fs_freeze,
1190  	.unfreeze_fs		= xfs_fs_unfreeze,
1191  	.statfs			= xfs_fs_statfs,
1192  	.show_options		= xfs_fs_show_options,
1193  	.nr_cached_objects	= xfs_fs_nr_cached_objects,
1194  	.free_cached_objects	= xfs_fs_free_cached_objects,
1195  	.shutdown		= xfs_fs_shutdown,
1196  };
1197  
1198  static int
suffix_kstrtoint(const char * s,unsigned int base,int * res)1199  suffix_kstrtoint(
1200  	const char	*s,
1201  	unsigned int	base,
1202  	int		*res)
1203  {
1204  	int		last, shift_left_factor = 0, _res;
1205  	char		*value;
1206  	int		ret = 0;
1207  
1208  	value = kstrdup(s, GFP_KERNEL);
1209  	if (!value)
1210  		return -ENOMEM;
1211  
1212  	last = strlen(value) - 1;
1213  	if (value[last] == 'K' || value[last] == 'k') {
1214  		shift_left_factor = 10;
1215  		value[last] = '\0';
1216  	}
1217  	if (value[last] == 'M' || value[last] == 'm') {
1218  		shift_left_factor = 20;
1219  		value[last] = '\0';
1220  	}
1221  	if (value[last] == 'G' || value[last] == 'g') {
1222  		shift_left_factor = 30;
1223  		value[last] = '\0';
1224  	}
1225  
1226  	if (kstrtoint(value, base, &_res))
1227  		ret = -EINVAL;
1228  	kfree(value);
1229  	*res = _res << shift_left_factor;
1230  	return ret;
1231  }
1232  
1233  static inline void
xfs_fs_warn_deprecated(struct fs_context * fc,struct fs_parameter * param,uint64_t flag,bool value)1234  xfs_fs_warn_deprecated(
1235  	struct fs_context	*fc,
1236  	struct fs_parameter	*param,
1237  	uint64_t		flag,
1238  	bool			value)
1239  {
1240  	/* Don't print the warning if reconfiguring and current mount point
1241  	 * already had the flag set
1242  	 */
1243  	if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
1244              !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
1245  		return;
1246  	xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
1247  }
1248  
1249  /*
1250   * Set mount state from a mount option.
1251   *
1252   * NOTE: mp->m_super is NULL here!
1253   */
1254  static int
xfs_fs_parse_param(struct fs_context * fc,struct fs_parameter * param)1255  xfs_fs_parse_param(
1256  	struct fs_context	*fc,
1257  	struct fs_parameter	*param)
1258  {
1259  	struct xfs_mount	*parsing_mp = fc->s_fs_info;
1260  	struct fs_parse_result	result;
1261  	int			size = 0;
1262  	int			opt;
1263  
1264  	opt = fs_parse(fc, xfs_fs_parameters, param, &result);
1265  	if (opt < 0)
1266  		return opt;
1267  
1268  	switch (opt) {
1269  	case Opt_logbufs:
1270  		parsing_mp->m_logbufs = result.uint_32;
1271  		return 0;
1272  	case Opt_logbsize:
1273  		if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize))
1274  			return -EINVAL;
1275  		return 0;
1276  	case Opt_logdev:
1277  		kfree(parsing_mp->m_logname);
1278  		parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL);
1279  		if (!parsing_mp->m_logname)
1280  			return -ENOMEM;
1281  		return 0;
1282  	case Opt_rtdev:
1283  		kfree(parsing_mp->m_rtname);
1284  		parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
1285  		if (!parsing_mp->m_rtname)
1286  			return -ENOMEM;
1287  		return 0;
1288  	case Opt_allocsize:
1289  		if (suffix_kstrtoint(param->string, 10, &size))
1290  			return -EINVAL;
1291  		parsing_mp->m_allocsize_log = ffs(size) - 1;
1292  		parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE;
1293  		return 0;
1294  	case Opt_grpid:
1295  	case Opt_bsdgroups:
1296  		parsing_mp->m_features |= XFS_FEAT_GRPID;
1297  		return 0;
1298  	case Opt_nogrpid:
1299  	case Opt_sysvgroups:
1300  		parsing_mp->m_features &= ~XFS_FEAT_GRPID;
1301  		return 0;
1302  	case Opt_wsync:
1303  		parsing_mp->m_features |= XFS_FEAT_WSYNC;
1304  		return 0;
1305  	case Opt_norecovery:
1306  		parsing_mp->m_features |= XFS_FEAT_NORECOVERY;
1307  		return 0;
1308  	case Opt_noalign:
1309  		parsing_mp->m_features |= XFS_FEAT_NOALIGN;
1310  		return 0;
1311  	case Opt_swalloc:
1312  		parsing_mp->m_features |= XFS_FEAT_SWALLOC;
1313  		return 0;
1314  	case Opt_sunit:
1315  		parsing_mp->m_dalign = result.uint_32;
1316  		return 0;
1317  	case Opt_swidth:
1318  		parsing_mp->m_swidth = result.uint_32;
1319  		return 0;
1320  	case Opt_inode32:
1321  		parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS;
1322  		return 0;
1323  	case Opt_inode64:
1324  		parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
1325  		return 0;
1326  	case Opt_nouuid:
1327  		parsing_mp->m_features |= XFS_FEAT_NOUUID;
1328  		return 0;
1329  	case Opt_largeio:
1330  		parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE;
1331  		return 0;
1332  	case Opt_nolargeio:
1333  		parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE;
1334  		return 0;
1335  	case Opt_filestreams:
1336  		parsing_mp->m_features |= XFS_FEAT_FILESTREAMS;
1337  		return 0;
1338  	case Opt_noquota:
1339  		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
1340  		parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
1341  		return 0;
1342  	case Opt_quota:
1343  	case Opt_uquota:
1344  	case Opt_usrquota:
1345  		parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
1346  		return 0;
1347  	case Opt_qnoenforce:
1348  	case Opt_uqnoenforce:
1349  		parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
1350  		parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
1351  		return 0;
1352  	case Opt_pquota:
1353  	case Opt_prjquota:
1354  		parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
1355  		return 0;
1356  	case Opt_pqnoenforce:
1357  		parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
1358  		parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
1359  		return 0;
1360  	case Opt_gquota:
1361  	case Opt_grpquota:
1362  		parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
1363  		return 0;
1364  	case Opt_gqnoenforce:
1365  		parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
1366  		parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
1367  		return 0;
1368  	case Opt_discard:
1369  		parsing_mp->m_features |= XFS_FEAT_DISCARD;
1370  		return 0;
1371  	case Opt_nodiscard:
1372  		parsing_mp->m_features &= ~XFS_FEAT_DISCARD;
1373  		return 0;
1374  #ifdef CONFIG_FS_DAX
1375  	case Opt_dax:
1376  		xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS);
1377  		return 0;
1378  	case Opt_dax_enum:
1379  		xfs_mount_set_dax_mode(parsing_mp, result.uint_32);
1380  		return 0;
1381  #endif
1382  	/* Following mount options will be removed in September 2025 */
1383  	case Opt_ikeep:
1384  		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true);
1385  		parsing_mp->m_features |= XFS_FEAT_IKEEP;
1386  		return 0;
1387  	case Opt_noikeep:
1388  		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false);
1389  		parsing_mp->m_features &= ~XFS_FEAT_IKEEP;
1390  		return 0;
1391  	case Opt_attr2:
1392  		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true);
1393  		parsing_mp->m_features |= XFS_FEAT_ATTR2;
1394  		return 0;
1395  	case Opt_noattr2:
1396  		xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true);
1397  		parsing_mp->m_features |= XFS_FEAT_NOATTR2;
1398  		return 0;
1399  	default:
1400  		xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
1401  		return -EINVAL;
1402  	}
1403  
1404  	return 0;
1405  }
1406  
1407  static int
xfs_fs_validate_params(struct xfs_mount * mp)1408  xfs_fs_validate_params(
1409  	struct xfs_mount	*mp)
1410  {
1411  	/* No recovery flag requires a read-only mount */
1412  	if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) {
1413  		xfs_warn(mp, "no-recovery mounts must be read-only.");
1414  		return -EINVAL;
1415  	}
1416  
1417  	/*
1418  	 * We have not read the superblock at this point, so only the attr2
1419  	 * mount option can set the attr2 feature by this stage.
1420  	 */
1421  	if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) {
1422  		xfs_warn(mp, "attr2 and noattr2 cannot both be specified.");
1423  		return -EINVAL;
1424  	}
1425  
1426  
1427  	if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) {
1428  		xfs_warn(mp,
1429  	"sunit and swidth options incompatible with the noalign option");
1430  		return -EINVAL;
1431  	}
1432  
1433  	if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) {
1434  		xfs_warn(mp, "quota support not available in this kernel.");
1435  		return -EINVAL;
1436  	}
1437  
1438  	if ((mp->m_dalign && !mp->m_swidth) ||
1439  	    (!mp->m_dalign && mp->m_swidth)) {
1440  		xfs_warn(mp, "sunit and swidth must be specified together");
1441  		return -EINVAL;
1442  	}
1443  
1444  	if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
1445  		xfs_warn(mp,
1446  	"stripe width (%d) must be a multiple of the stripe unit (%d)",
1447  			mp->m_swidth, mp->m_dalign);
1448  		return -EINVAL;
1449  	}
1450  
1451  	if (mp->m_logbufs != -1 &&
1452  	    mp->m_logbufs != 0 &&
1453  	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
1454  	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
1455  		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
1456  			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1457  		return -EINVAL;
1458  	}
1459  
1460  	if (mp->m_logbsize != -1 &&
1461  	    mp->m_logbsize !=  0 &&
1462  	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
1463  	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
1464  	     !is_power_of_2(mp->m_logbsize))) {
1465  		xfs_warn(mp,
1466  			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1467  			mp->m_logbsize);
1468  		return -EINVAL;
1469  	}
1470  
1471  	if (xfs_has_allocsize(mp) &&
1472  	    (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
1473  	     mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
1474  		xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
1475  			mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
1476  		return -EINVAL;
1477  	}
1478  
1479  	return 0;
1480  }
1481  
1482  struct dentry *
xfs_debugfs_mkdir(const char * name,struct dentry * parent)1483  xfs_debugfs_mkdir(
1484  	const char	*name,
1485  	struct dentry	*parent)
1486  {
1487  	struct dentry	*child;
1488  
1489  	/* Apparently we're expected to ignore error returns?? */
1490  	child = debugfs_create_dir(name, parent);
1491  	if (IS_ERR(child))
1492  		return NULL;
1493  
1494  	return child;
1495  }
1496  
1497  static int
xfs_fs_fill_super(struct super_block * sb,struct fs_context * fc)1498  xfs_fs_fill_super(
1499  	struct super_block	*sb,
1500  	struct fs_context	*fc)
1501  {
1502  	struct xfs_mount	*mp = sb->s_fs_info;
1503  	struct inode		*root;
1504  	int			flags = 0, error;
1505  
1506  	mp->m_super = sb;
1507  
1508  	/*
1509  	 * Copy VFS mount flags from the context now that all parameter parsing
1510  	 * is guaranteed to have been completed by either the old mount API or
1511  	 * the newer fsopen/fsconfig API.
1512  	 */
1513  	if (fc->sb_flags & SB_RDONLY)
1514  		xfs_set_readonly(mp);
1515  	if (fc->sb_flags & SB_DIRSYNC)
1516  		mp->m_features |= XFS_FEAT_DIRSYNC;
1517  	if (fc->sb_flags & SB_SYNCHRONOUS)
1518  		mp->m_features |= XFS_FEAT_WSYNC;
1519  
1520  	error = xfs_fs_validate_params(mp);
1521  	if (error)
1522  		return error;
1523  
1524  	sb_min_blocksize(sb, BBSIZE);
1525  	sb->s_xattr = xfs_xattr_handlers;
1526  	sb->s_export_op = &xfs_export_operations;
1527  #ifdef CONFIG_XFS_QUOTA
1528  	sb->s_qcop = &xfs_quotactl_operations;
1529  	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
1530  #endif
1531  	sb->s_op = &xfs_super_operations;
1532  
1533  	/*
1534  	 * Delay mount work if the debug hook is set. This is debug
1535  	 * instrumention to coordinate simulation of xfs mount failures with
1536  	 * VFS superblock operations
1537  	 */
1538  	if (xfs_globals.mount_delay) {
1539  		xfs_notice(mp, "Delaying mount for %d seconds.",
1540  			xfs_globals.mount_delay);
1541  		msleep(xfs_globals.mount_delay * 1000);
1542  	}
1543  
1544  	if (fc->sb_flags & SB_SILENT)
1545  		flags |= XFS_MFSI_QUIET;
1546  
1547  	error = xfs_open_devices(mp);
1548  	if (error)
1549  		return error;
1550  
1551  	if (xfs_debugfs) {
1552  		mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id,
1553  						  xfs_debugfs);
1554  	} else {
1555  		mp->m_debugfs = NULL;
1556  	}
1557  
1558  	error = xfs_init_mount_workqueues(mp);
1559  	if (error)
1560  		goto out_shutdown_devices;
1561  
1562  	error = xfs_init_percpu_counters(mp);
1563  	if (error)
1564  		goto out_destroy_workqueues;
1565  
1566  	error = xfs_inodegc_init_percpu(mp);
1567  	if (error)
1568  		goto out_destroy_counters;
1569  
1570  	/* Allocate stats memory before we do operations that might use it */
1571  	mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1572  	if (!mp->m_stats.xs_stats) {
1573  		error = -ENOMEM;
1574  		goto out_destroy_inodegc;
1575  	}
1576  
1577  	error = xchk_mount_stats_alloc(mp);
1578  	if (error)
1579  		goto out_free_stats;
1580  
1581  	error = xfs_readsb(mp, flags);
1582  	if (error)
1583  		goto out_free_scrub_stats;
1584  
1585  	error = xfs_finish_flags(mp);
1586  	if (error)
1587  		goto out_free_sb;
1588  
1589  	error = xfs_setup_devices(mp);
1590  	if (error)
1591  		goto out_free_sb;
1592  
1593  	/*
1594  	 * V4 support is undergoing deprecation.
1595  	 *
1596  	 * Note: this has to use an open coded m_features check as xfs_has_crc
1597  	 * always returns false for !CONFIG_XFS_SUPPORT_V4.
1598  	 */
1599  	if (!(mp->m_features & XFS_FEAT_CRC)) {
1600  		if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) {
1601  			xfs_warn(mp,
1602  	"Deprecated V4 format (crc=0) not supported by kernel.");
1603  			error = -EINVAL;
1604  			goto out_free_sb;
1605  		}
1606  		xfs_warn_once(mp,
1607  	"Deprecated V4 format (crc=0) will not be supported after September 2030.");
1608  	}
1609  
1610  	/* ASCII case insensitivity is undergoing deprecation. */
1611  	if (xfs_has_asciici(mp)) {
1612  #ifdef CONFIG_XFS_SUPPORT_ASCII_CI
1613  		xfs_warn_once(mp,
1614  	"Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030.");
1615  #else
1616  		xfs_warn(mp,
1617  	"Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel.");
1618  		error = -EINVAL;
1619  		goto out_free_sb;
1620  #endif
1621  	}
1622  
1623  	/* Filesystem claims it needs repair, so refuse the mount. */
1624  	if (xfs_has_needsrepair(mp)) {
1625  		xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
1626  		error = -EFSCORRUPTED;
1627  		goto out_free_sb;
1628  	}
1629  
1630  	/*
1631  	 * Don't touch the filesystem if a user tool thinks it owns the primary
1632  	 * superblock.  mkfs doesn't clear the flag from secondary supers, so
1633  	 * we don't check them at all.
1634  	 */
1635  	if (mp->m_sb.sb_inprogress) {
1636  		xfs_warn(mp, "Offline file system operation in progress!");
1637  		error = -EFSCORRUPTED;
1638  		goto out_free_sb;
1639  	}
1640  
1641  	if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
1642  		size_t max_folio_size = mapping_max_folio_size_supported();
1643  
1644  		if (!xfs_has_crc(mp)) {
1645  			xfs_warn(mp,
1646  "V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.",
1647  				mp->m_sb.sb_blocksize, PAGE_SIZE);
1648  			error = -ENOSYS;
1649  			goto out_free_sb;
1650  		}
1651  
1652  		if (mp->m_sb.sb_blocksize > max_folio_size) {
1653  			xfs_warn(mp,
1654  "block size (%u bytes) not supported; Only block size (%zu) or less is supported",
1655  				mp->m_sb.sb_blocksize, max_folio_size);
1656  			error = -ENOSYS;
1657  			goto out_free_sb;
1658  		}
1659  
1660  		xfs_warn(mp,
1661  "EXPERIMENTAL: V5 Filesystem with Large Block Size (%d bytes) enabled.",
1662  			mp->m_sb.sb_blocksize);
1663  	}
1664  
1665  	/* Ensure this filesystem fits in the page cache limits */
1666  	if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
1667  	    xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
1668  		xfs_warn(mp,
1669  		"file system too large to be mounted on this system.");
1670  		error = -EFBIG;
1671  		goto out_free_sb;
1672  	}
1673  
1674  	/*
1675  	 * XFS block mappings use 54 bits to store the logical block offset.
1676  	 * This should suffice to handle the maximum file size that the VFS
1677  	 * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
1678  	 * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
1679  	 * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
1680  	 * to check this assertion.
1681  	 *
1682  	 * Avoid integer overflow by comparing the maximum bmbt offset to the
1683  	 * maximum pagecache offset in units of fs blocks.
1684  	 */
1685  	if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
1686  		xfs_warn(mp,
1687  "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
1688  			 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
1689  			 XFS_MAX_FILEOFF);
1690  		error = -EINVAL;
1691  		goto out_free_sb;
1692  	}
1693  
1694  	error = xfs_filestream_mount(mp);
1695  	if (error)
1696  		goto out_free_sb;
1697  
1698  	/*
1699  	 * we must configure the block size in the superblock before we run the
1700  	 * full mount process as the mount process can lookup and cache inodes.
1701  	 */
1702  	sb->s_magic = XFS_SUPER_MAGIC;
1703  	sb->s_blocksize = mp->m_sb.sb_blocksize;
1704  	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1705  	sb->s_maxbytes = MAX_LFS_FILESIZE;
1706  	sb->s_max_links = XFS_MAXLINK;
1707  	sb->s_time_gran = 1;
1708  	if (xfs_has_bigtime(mp)) {
1709  		sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN);
1710  		sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX);
1711  	} else {
1712  		sb->s_time_min = XFS_LEGACY_TIME_MIN;
1713  		sb->s_time_max = XFS_LEGACY_TIME_MAX;
1714  	}
1715  	trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
1716  	sb->s_iflags |= SB_I_CGROUPWB;
1717  
1718  	set_posix_acl_flag(sb);
1719  
1720  	/* version 5 superblocks support inode version counters. */
1721  	if (xfs_has_crc(mp))
1722  		sb->s_flags |= SB_I_VERSION;
1723  
1724  	if (xfs_has_dax_always(mp)) {
1725  		error = xfs_setup_dax_always(mp);
1726  		if (error)
1727  			goto out_filestream_unmount;
1728  	}
1729  
1730  	if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) {
1731  		xfs_warn(mp,
1732  	"mounting with \"discard\" option, but the device does not support discard");
1733  		mp->m_features &= ~XFS_FEAT_DISCARD;
1734  	}
1735  
1736  	if (xfs_has_reflink(mp)) {
1737  		if (mp->m_sb.sb_rblocks) {
1738  			xfs_alert(mp,
1739  	"reflink not compatible with realtime device!");
1740  			error = -EINVAL;
1741  			goto out_filestream_unmount;
1742  		}
1743  
1744  		if (xfs_globals.always_cow) {
1745  			xfs_info(mp, "using DEBUG-only always_cow mode.");
1746  			mp->m_always_cow = true;
1747  		}
1748  	}
1749  
1750  	if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
1751  		xfs_alert(mp,
1752  	"reverse mapping btree not compatible with realtime device!");
1753  		error = -EINVAL;
1754  		goto out_filestream_unmount;
1755  	}
1756  
1757  	if (xfs_has_exchange_range(mp))
1758  		xfs_warn(mp,
1759  	"EXPERIMENTAL exchange-range feature enabled. Use at your own risk!");
1760  
1761  	if (xfs_has_parent(mp))
1762  		xfs_warn(mp,
1763  	"EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
1764  
1765  	error = xfs_mountfs(mp);
1766  	if (error)
1767  		goto out_filestream_unmount;
1768  
1769  	root = igrab(VFS_I(mp->m_rootip));
1770  	if (!root) {
1771  		error = -ENOENT;
1772  		goto out_unmount;
1773  	}
1774  	sb->s_root = d_make_root(root);
1775  	if (!sb->s_root) {
1776  		error = -ENOMEM;
1777  		goto out_unmount;
1778  	}
1779  
1780  	return 0;
1781  
1782   out_filestream_unmount:
1783  	xfs_filestream_unmount(mp);
1784   out_free_sb:
1785  	xfs_freesb(mp);
1786   out_free_scrub_stats:
1787  	xchk_mount_stats_free(mp);
1788   out_free_stats:
1789  	free_percpu(mp->m_stats.xs_stats);
1790   out_destroy_inodegc:
1791  	xfs_inodegc_free_percpu(mp);
1792   out_destroy_counters:
1793  	xfs_destroy_percpu_counters(mp);
1794   out_destroy_workqueues:
1795  	xfs_destroy_mount_workqueues(mp);
1796   out_shutdown_devices:
1797  	xfs_shutdown_devices(mp);
1798  	return error;
1799  
1800   out_unmount:
1801  	xfs_filestream_unmount(mp);
1802  	xfs_unmountfs(mp);
1803  	goto out_free_sb;
1804  }
1805  
1806  static int
xfs_fs_get_tree(struct fs_context * fc)1807  xfs_fs_get_tree(
1808  	struct fs_context	*fc)
1809  {
1810  	return get_tree_bdev(fc, xfs_fs_fill_super);
1811  }
1812  
1813  static int
xfs_remount_rw(struct xfs_mount * mp)1814  xfs_remount_rw(
1815  	struct xfs_mount	*mp)
1816  {
1817  	struct xfs_sb		*sbp = &mp->m_sb;
1818  	int error;
1819  
1820  	if (xfs_has_norecovery(mp)) {
1821  		xfs_warn(mp,
1822  			"ro->rw transition prohibited on norecovery mount");
1823  		return -EINVAL;
1824  	}
1825  
1826  	if (xfs_sb_is_v5(sbp) &&
1827  	    xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
1828  		xfs_warn(mp,
1829  	"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
1830  			(sbp->sb_features_ro_compat &
1831  				XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
1832  		return -EINVAL;
1833  	}
1834  
1835  	xfs_clear_readonly(mp);
1836  
1837  	/*
1838  	 * If this is the first remount to writeable state we might have some
1839  	 * superblock changes to update.
1840  	 */
1841  	if (mp->m_update_sb) {
1842  		error = xfs_sync_sb(mp, false);
1843  		if (error) {
1844  			xfs_warn(mp, "failed to write sb changes");
1845  			return error;
1846  		}
1847  		mp->m_update_sb = false;
1848  	}
1849  
1850  	/*
1851  	 * Fill out the reserve pool if it is empty. Use the stashed value if
1852  	 * it is non-zero, otherwise go with the default.
1853  	 */
1854  	xfs_restore_resvblks(mp);
1855  	xfs_log_work_queue(mp);
1856  	xfs_blockgc_start(mp);
1857  
1858  	/* Create the per-AG metadata reservation pool .*/
1859  	error = xfs_fs_reserve_ag_blocks(mp);
1860  	if (error && error != -ENOSPC)
1861  		return error;
1862  
1863  	/* Re-enable the background inode inactivation worker. */
1864  	xfs_inodegc_start(mp);
1865  
1866  	return 0;
1867  }
1868  
1869  static int
xfs_remount_ro(struct xfs_mount * mp)1870  xfs_remount_ro(
1871  	struct xfs_mount	*mp)
1872  {
1873  	struct xfs_icwalk	icw = {
1874  		.icw_flags	= XFS_ICWALK_FLAG_SYNC,
1875  	};
1876  	int			error;
1877  
1878  	/* Flush all the dirty data to disk. */
1879  	error = sync_filesystem(mp->m_super);
1880  	if (error)
1881  		return error;
1882  
1883  	/*
1884  	 * Cancel background eofb scanning so it cannot race with the final
1885  	 * log force+buftarg wait and deadlock the remount.
1886  	 */
1887  	xfs_blockgc_stop(mp);
1888  
1889  	/*
1890  	 * Clear out all remaining COW staging extents and speculative post-EOF
1891  	 * preallocations so that we don't leave inodes requiring inactivation
1892  	 * cleanups during reclaim on a read-only mount.  We must process every
1893  	 * cached inode, so this requires a synchronous cache scan.
1894  	 */
1895  	error = xfs_blockgc_free_space(mp, &icw);
1896  	if (error) {
1897  		xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1898  		return error;
1899  	}
1900  
1901  	/*
1902  	 * Stop the inodegc background worker.  xfs_fs_reconfigure already
1903  	 * flushed all pending inodegc work when it sync'd the filesystem.
1904  	 * The VFS holds s_umount, so we know that inodes cannot enter
1905  	 * xfs_fs_destroy_inode during a remount operation.  In readonly mode
1906  	 * we send inodes straight to reclaim, so no inodes will be queued.
1907  	 */
1908  	xfs_inodegc_stop(mp);
1909  
1910  	/* Free the per-AG metadata reservation pool. */
1911  	xfs_fs_unreserve_ag_blocks(mp);
1912  
1913  	/*
1914  	 * Before we sync the metadata, we need to free up the reserve block
1915  	 * pool so that the used block count in the superblock on disk is
1916  	 * correct at the end of the remount. Stash the current* reserve pool
1917  	 * size so that if we get remounted rw, we can return it to the same
1918  	 * size.
1919  	 */
1920  	xfs_save_resvblks(mp);
1921  
1922  	xfs_log_clean(mp);
1923  	xfs_set_readonly(mp);
1924  
1925  	return 0;
1926  }
1927  
1928  /*
1929   * Logically we would return an error here to prevent users from believing
1930   * they might have changed mount options using remount which can't be changed.
1931   *
1932   * But unfortunately mount(8) adds all options from mtab and fstab to the mount
1933   * arguments in some cases so we can't blindly reject options, but have to
1934   * check for each specified option if it actually differs from the currently
1935   * set option and only reject it if that's the case.
1936   *
1937   * Until that is implemented we return success for every remount request, and
1938   * silently ignore all options that we can't actually change.
1939   */
1940  static int
xfs_fs_reconfigure(struct fs_context * fc)1941  xfs_fs_reconfigure(
1942  	struct fs_context *fc)
1943  {
1944  	struct xfs_mount	*mp = XFS_M(fc->root->d_sb);
1945  	struct xfs_mount        *new_mp = fc->s_fs_info;
1946  	int			flags = fc->sb_flags;
1947  	int			error;
1948  
1949  	/* version 5 superblocks always support version counters. */
1950  	if (xfs_has_crc(mp))
1951  		fc->sb_flags |= SB_I_VERSION;
1952  
1953  	error = xfs_fs_validate_params(new_mp);
1954  	if (error)
1955  		return error;
1956  
1957  	/* inode32 -> inode64 */
1958  	if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) {
1959  		mp->m_features &= ~XFS_FEAT_SMALL_INUMS;
1960  		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
1961  	}
1962  
1963  	/* inode64 -> inode32 */
1964  	if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) {
1965  		mp->m_features |= XFS_FEAT_SMALL_INUMS;
1966  		mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount);
1967  	}
1968  
1969  	/* ro -> rw */
1970  	if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) {
1971  		error = xfs_remount_rw(mp);
1972  		if (error)
1973  			return error;
1974  	}
1975  
1976  	/* rw -> ro */
1977  	if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) {
1978  		error = xfs_remount_ro(mp);
1979  		if (error)
1980  			return error;
1981  	}
1982  
1983  	return 0;
1984  }
1985  
1986  static void
xfs_fs_free(struct fs_context * fc)1987  xfs_fs_free(
1988  	struct fs_context	*fc)
1989  {
1990  	struct xfs_mount	*mp = fc->s_fs_info;
1991  
1992  	/*
1993  	 * mp is stored in the fs_context when it is initialized.
1994  	 * mp is transferred to the superblock on a successful mount,
1995  	 * but if an error occurs before the transfer we have to free
1996  	 * it here.
1997  	 */
1998  	if (mp)
1999  		xfs_mount_free(mp);
2000  }
2001  
2002  static const struct fs_context_operations xfs_context_ops = {
2003  	.parse_param = xfs_fs_parse_param,
2004  	.get_tree    = xfs_fs_get_tree,
2005  	.reconfigure = xfs_fs_reconfigure,
2006  	.free        = xfs_fs_free,
2007  };
2008  
2009  /*
2010   * WARNING: do not initialise any parameters in this function that depend on
2011   * mount option parsing having already been performed as this can be called from
2012   * fsopen() before any parameters have been set.
2013   */
xfs_init_fs_context(struct fs_context * fc)2014  static int xfs_init_fs_context(
2015  	struct fs_context	*fc)
2016  {
2017  	struct xfs_mount	*mp;
2018  
2019  	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL);
2020  	if (!mp)
2021  		return -ENOMEM;
2022  
2023  	spin_lock_init(&mp->m_sb_lock);
2024  	xa_init(&mp->m_perags);
2025  	mutex_init(&mp->m_growlock);
2026  	INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
2027  	INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
2028  	mp->m_kobj.kobject.kset = xfs_kset;
2029  	/*
2030  	 * We don't create the finobt per-ag space reservation until after log
2031  	 * recovery, so we must set this to true so that an ifree transaction
2032  	 * started during log recovery will not depend on space reservations
2033  	 * for finobt expansion.
2034  	 */
2035  	mp->m_finobt_nores = true;
2036  
2037  	/*
2038  	 * These can be overridden by the mount option parsing.
2039  	 */
2040  	mp->m_logbufs = -1;
2041  	mp->m_logbsize = -1;
2042  	mp->m_allocsize_log = 16; /* 64k */
2043  
2044  	xfs_hooks_init(&mp->m_dir_update_hooks);
2045  
2046  	fc->s_fs_info = mp;
2047  	fc->ops = &xfs_context_ops;
2048  
2049  	return 0;
2050  }
2051  
2052  static void
xfs_kill_sb(struct super_block * sb)2053  xfs_kill_sb(
2054  	struct super_block		*sb)
2055  {
2056  	kill_block_super(sb);
2057  	xfs_mount_free(XFS_M(sb));
2058  }
2059  
2060  static struct file_system_type xfs_fs_type = {
2061  	.owner			= THIS_MODULE,
2062  	.name			= "xfs",
2063  	.init_fs_context	= xfs_init_fs_context,
2064  	.parameters		= xfs_fs_parameters,
2065  	.kill_sb		= xfs_kill_sb,
2066  	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
2067  };
2068  MODULE_ALIAS_FS("xfs");
2069  
2070  STATIC int __init
xfs_init_caches(void)2071  xfs_init_caches(void)
2072  {
2073  	int		error;
2074  
2075  	xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
2076  					 SLAB_HWCACHE_ALIGN |
2077  					 SLAB_RECLAIM_ACCOUNT,
2078  					 NULL);
2079  	if (!xfs_buf_cache)
2080  		goto out;
2081  
2082  	xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket",
2083  						sizeof(struct xlog_ticket),
2084  						0, 0, NULL);
2085  	if (!xfs_log_ticket_cache)
2086  		goto out_destroy_buf_cache;
2087  
2088  	error = xfs_btree_init_cur_caches();
2089  	if (error)
2090  		goto out_destroy_log_ticket_cache;
2091  
2092  	error = rcbagbt_init_cur_cache();
2093  	if (error)
2094  		goto out_destroy_btree_cur_cache;
2095  
2096  	error = xfs_defer_init_item_caches();
2097  	if (error)
2098  		goto out_destroy_rcbagbt_cur_cache;
2099  
2100  	xfs_da_state_cache = kmem_cache_create("xfs_da_state",
2101  					      sizeof(struct xfs_da_state),
2102  					      0, 0, NULL);
2103  	if (!xfs_da_state_cache)
2104  		goto out_destroy_defer_item_cache;
2105  
2106  	xfs_ifork_cache = kmem_cache_create("xfs_ifork",
2107  					   sizeof(struct xfs_ifork),
2108  					   0, 0, NULL);
2109  	if (!xfs_ifork_cache)
2110  		goto out_destroy_da_state_cache;
2111  
2112  	xfs_trans_cache = kmem_cache_create("xfs_trans",
2113  					   sizeof(struct xfs_trans),
2114  					   0, 0, NULL);
2115  	if (!xfs_trans_cache)
2116  		goto out_destroy_ifork_cache;
2117  
2118  
2119  	/*
2120  	 * The size of the cache-allocated buf log item is the maximum
2121  	 * size possible under XFS.  This wastes a little bit of memory,
2122  	 * but it is much faster.
2123  	 */
2124  	xfs_buf_item_cache = kmem_cache_create("xfs_buf_item",
2125  					      sizeof(struct xfs_buf_log_item),
2126  					      0, 0, NULL);
2127  	if (!xfs_buf_item_cache)
2128  		goto out_destroy_trans_cache;
2129  
2130  	xfs_efd_cache = kmem_cache_create("xfs_efd_item",
2131  			xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS),
2132  			0, 0, NULL);
2133  	if (!xfs_efd_cache)
2134  		goto out_destroy_buf_item_cache;
2135  
2136  	xfs_efi_cache = kmem_cache_create("xfs_efi_item",
2137  			xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS),
2138  			0, 0, NULL);
2139  	if (!xfs_efi_cache)
2140  		goto out_destroy_efd_cache;
2141  
2142  	xfs_inode_cache = kmem_cache_create("xfs_inode",
2143  					   sizeof(struct xfs_inode), 0,
2144  					   (SLAB_HWCACHE_ALIGN |
2145  					    SLAB_RECLAIM_ACCOUNT |
2146  					    SLAB_ACCOUNT),
2147  					   xfs_fs_inode_init_once);
2148  	if (!xfs_inode_cache)
2149  		goto out_destroy_efi_cache;
2150  
2151  	xfs_ili_cache = kmem_cache_create("xfs_ili",
2152  					 sizeof(struct xfs_inode_log_item), 0,
2153  					 SLAB_RECLAIM_ACCOUNT,
2154  					 NULL);
2155  	if (!xfs_ili_cache)
2156  		goto out_destroy_inode_cache;
2157  
2158  	xfs_icreate_cache = kmem_cache_create("xfs_icr",
2159  					     sizeof(struct xfs_icreate_item),
2160  					     0, 0, NULL);
2161  	if (!xfs_icreate_cache)
2162  		goto out_destroy_ili_cache;
2163  
2164  	xfs_rud_cache = kmem_cache_create("xfs_rud_item",
2165  					 sizeof(struct xfs_rud_log_item),
2166  					 0, 0, NULL);
2167  	if (!xfs_rud_cache)
2168  		goto out_destroy_icreate_cache;
2169  
2170  	xfs_rui_cache = kmem_cache_create("xfs_rui_item",
2171  			xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
2172  			0, 0, NULL);
2173  	if (!xfs_rui_cache)
2174  		goto out_destroy_rud_cache;
2175  
2176  	xfs_cud_cache = kmem_cache_create("xfs_cud_item",
2177  					 sizeof(struct xfs_cud_log_item),
2178  					 0, 0, NULL);
2179  	if (!xfs_cud_cache)
2180  		goto out_destroy_rui_cache;
2181  
2182  	xfs_cui_cache = kmem_cache_create("xfs_cui_item",
2183  			xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
2184  			0, 0, NULL);
2185  	if (!xfs_cui_cache)
2186  		goto out_destroy_cud_cache;
2187  
2188  	xfs_bud_cache = kmem_cache_create("xfs_bud_item",
2189  					 sizeof(struct xfs_bud_log_item),
2190  					 0, 0, NULL);
2191  	if (!xfs_bud_cache)
2192  		goto out_destroy_cui_cache;
2193  
2194  	xfs_bui_cache = kmem_cache_create("xfs_bui_item",
2195  			xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
2196  			0, 0, NULL);
2197  	if (!xfs_bui_cache)
2198  		goto out_destroy_bud_cache;
2199  
2200  	xfs_attrd_cache = kmem_cache_create("xfs_attrd_item",
2201  					    sizeof(struct xfs_attrd_log_item),
2202  					    0, 0, NULL);
2203  	if (!xfs_attrd_cache)
2204  		goto out_destroy_bui_cache;
2205  
2206  	xfs_attri_cache = kmem_cache_create("xfs_attri_item",
2207  					    sizeof(struct xfs_attri_log_item),
2208  					    0, 0, NULL);
2209  	if (!xfs_attri_cache)
2210  		goto out_destroy_attrd_cache;
2211  
2212  	xfs_iunlink_cache = kmem_cache_create("xfs_iul_item",
2213  					     sizeof(struct xfs_iunlink_item),
2214  					     0, 0, NULL);
2215  	if (!xfs_iunlink_cache)
2216  		goto out_destroy_attri_cache;
2217  
2218  	xfs_xmd_cache = kmem_cache_create("xfs_xmd_item",
2219  					 sizeof(struct xfs_xmd_log_item),
2220  					 0, 0, NULL);
2221  	if (!xfs_xmd_cache)
2222  		goto out_destroy_iul_cache;
2223  
2224  	xfs_xmi_cache = kmem_cache_create("xfs_xmi_item",
2225  					 sizeof(struct xfs_xmi_log_item),
2226  					 0, 0, NULL);
2227  	if (!xfs_xmi_cache)
2228  		goto out_destroy_xmd_cache;
2229  
2230  	xfs_parent_args_cache = kmem_cache_create("xfs_parent_args",
2231  					     sizeof(struct xfs_parent_args),
2232  					     0, 0, NULL);
2233  	if (!xfs_parent_args_cache)
2234  		goto out_destroy_xmi_cache;
2235  
2236  	return 0;
2237  
2238   out_destroy_xmi_cache:
2239  	kmem_cache_destroy(xfs_xmi_cache);
2240   out_destroy_xmd_cache:
2241  	kmem_cache_destroy(xfs_xmd_cache);
2242   out_destroy_iul_cache:
2243  	kmem_cache_destroy(xfs_iunlink_cache);
2244   out_destroy_attri_cache:
2245  	kmem_cache_destroy(xfs_attri_cache);
2246   out_destroy_attrd_cache:
2247  	kmem_cache_destroy(xfs_attrd_cache);
2248   out_destroy_bui_cache:
2249  	kmem_cache_destroy(xfs_bui_cache);
2250   out_destroy_bud_cache:
2251  	kmem_cache_destroy(xfs_bud_cache);
2252   out_destroy_cui_cache:
2253  	kmem_cache_destroy(xfs_cui_cache);
2254   out_destroy_cud_cache:
2255  	kmem_cache_destroy(xfs_cud_cache);
2256   out_destroy_rui_cache:
2257  	kmem_cache_destroy(xfs_rui_cache);
2258   out_destroy_rud_cache:
2259  	kmem_cache_destroy(xfs_rud_cache);
2260   out_destroy_icreate_cache:
2261  	kmem_cache_destroy(xfs_icreate_cache);
2262   out_destroy_ili_cache:
2263  	kmem_cache_destroy(xfs_ili_cache);
2264   out_destroy_inode_cache:
2265  	kmem_cache_destroy(xfs_inode_cache);
2266   out_destroy_efi_cache:
2267  	kmem_cache_destroy(xfs_efi_cache);
2268   out_destroy_efd_cache:
2269  	kmem_cache_destroy(xfs_efd_cache);
2270   out_destroy_buf_item_cache:
2271  	kmem_cache_destroy(xfs_buf_item_cache);
2272   out_destroy_trans_cache:
2273  	kmem_cache_destroy(xfs_trans_cache);
2274   out_destroy_ifork_cache:
2275  	kmem_cache_destroy(xfs_ifork_cache);
2276   out_destroy_da_state_cache:
2277  	kmem_cache_destroy(xfs_da_state_cache);
2278   out_destroy_defer_item_cache:
2279  	xfs_defer_destroy_item_caches();
2280   out_destroy_rcbagbt_cur_cache:
2281  	rcbagbt_destroy_cur_cache();
2282   out_destroy_btree_cur_cache:
2283  	xfs_btree_destroy_cur_caches();
2284   out_destroy_log_ticket_cache:
2285  	kmem_cache_destroy(xfs_log_ticket_cache);
2286   out_destroy_buf_cache:
2287  	kmem_cache_destroy(xfs_buf_cache);
2288   out:
2289  	return -ENOMEM;
2290  }
2291  
2292  STATIC void
xfs_destroy_caches(void)2293  xfs_destroy_caches(void)
2294  {
2295  	/*
2296  	 * Make sure all delayed rcu free are flushed before we
2297  	 * destroy caches.
2298  	 */
2299  	rcu_barrier();
2300  	kmem_cache_destroy(xfs_parent_args_cache);
2301  	kmem_cache_destroy(xfs_xmd_cache);
2302  	kmem_cache_destroy(xfs_xmi_cache);
2303  	kmem_cache_destroy(xfs_iunlink_cache);
2304  	kmem_cache_destroy(xfs_attri_cache);
2305  	kmem_cache_destroy(xfs_attrd_cache);
2306  	kmem_cache_destroy(xfs_bui_cache);
2307  	kmem_cache_destroy(xfs_bud_cache);
2308  	kmem_cache_destroy(xfs_cui_cache);
2309  	kmem_cache_destroy(xfs_cud_cache);
2310  	kmem_cache_destroy(xfs_rui_cache);
2311  	kmem_cache_destroy(xfs_rud_cache);
2312  	kmem_cache_destroy(xfs_icreate_cache);
2313  	kmem_cache_destroy(xfs_ili_cache);
2314  	kmem_cache_destroy(xfs_inode_cache);
2315  	kmem_cache_destroy(xfs_efi_cache);
2316  	kmem_cache_destroy(xfs_efd_cache);
2317  	kmem_cache_destroy(xfs_buf_item_cache);
2318  	kmem_cache_destroy(xfs_trans_cache);
2319  	kmem_cache_destroy(xfs_ifork_cache);
2320  	kmem_cache_destroy(xfs_da_state_cache);
2321  	xfs_defer_destroy_item_caches();
2322  	rcbagbt_destroy_cur_cache();
2323  	xfs_btree_destroy_cur_caches();
2324  	kmem_cache_destroy(xfs_log_ticket_cache);
2325  	kmem_cache_destroy(xfs_buf_cache);
2326  }
2327  
2328  STATIC int __init
xfs_init_workqueues(void)2329  xfs_init_workqueues(void)
2330  {
2331  	/*
2332  	 * The allocation workqueue can be used in memory reclaim situations
2333  	 * (writepage path), and parallelism is only limited by the number of
2334  	 * AGs in all the filesystems mounted. Hence use the default large
2335  	 * max_active value for this workqueue.
2336  	 */
2337  	xfs_alloc_wq = alloc_workqueue("xfsalloc",
2338  			XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE), 0);
2339  	if (!xfs_alloc_wq)
2340  		return -ENOMEM;
2341  
2342  	xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND),
2343  			0);
2344  	if (!xfs_discard_wq)
2345  		goto out_free_alloc_wq;
2346  
2347  	return 0;
2348  out_free_alloc_wq:
2349  	destroy_workqueue(xfs_alloc_wq);
2350  	return -ENOMEM;
2351  }
2352  
2353  STATIC void
xfs_destroy_workqueues(void)2354  xfs_destroy_workqueues(void)
2355  {
2356  	destroy_workqueue(xfs_discard_wq);
2357  	destroy_workqueue(xfs_alloc_wq);
2358  }
2359  
2360  STATIC int __init
init_xfs_fs(void)2361  init_xfs_fs(void)
2362  {
2363  	int			error;
2364  
2365  	xfs_check_ondisk_structs();
2366  
2367  	error = xfs_dahash_test();
2368  	if (error)
2369  		return error;
2370  
2371  	printk(KERN_INFO XFS_VERSION_STRING " with "
2372  			 XFS_BUILD_OPTIONS " enabled\n");
2373  
2374  	xfs_dir_startup();
2375  
2376  	error = xfs_init_caches();
2377  	if (error)
2378  		goto out;
2379  
2380  	error = xfs_init_workqueues();
2381  	if (error)
2382  		goto out_destroy_caches;
2383  
2384  	error = xfs_mru_cache_init();
2385  	if (error)
2386  		goto out_destroy_wq;
2387  
2388  	error = xfs_init_procfs();
2389  	if (error)
2390  		goto out_mru_cache_uninit;
2391  
2392  	error = xfs_sysctl_register();
2393  	if (error)
2394  		goto out_cleanup_procfs;
2395  
2396  	xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL);
2397  
2398  	xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
2399  	if (!xfs_kset) {
2400  		error = -ENOMEM;
2401  		goto out_debugfs_unregister;
2402  	}
2403  
2404  	xfsstats.xs_kobj.kobject.kset = xfs_kset;
2405  
2406  	xfsstats.xs_stats = alloc_percpu(struct xfsstats);
2407  	if (!xfsstats.xs_stats) {
2408  		error = -ENOMEM;
2409  		goto out_kset_unregister;
2410  	}
2411  
2412  	error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
2413  			       "stats");
2414  	if (error)
2415  		goto out_free_stats;
2416  
2417  	error = xchk_global_stats_setup(xfs_debugfs);
2418  	if (error)
2419  		goto out_remove_stats_kobj;
2420  
2421  #ifdef DEBUG
2422  	xfs_dbg_kobj.kobject.kset = xfs_kset;
2423  	error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
2424  	if (error)
2425  		goto out_remove_scrub_stats;
2426  #endif
2427  
2428  	error = xfs_qm_init();
2429  	if (error)
2430  		goto out_remove_dbg_kobj;
2431  
2432  	error = register_filesystem(&xfs_fs_type);
2433  	if (error)
2434  		goto out_qm_exit;
2435  	return 0;
2436  
2437   out_qm_exit:
2438  	xfs_qm_exit();
2439   out_remove_dbg_kobj:
2440  #ifdef DEBUG
2441  	xfs_sysfs_del(&xfs_dbg_kobj);
2442   out_remove_scrub_stats:
2443  #endif
2444  	xchk_global_stats_teardown();
2445   out_remove_stats_kobj:
2446  	xfs_sysfs_del(&xfsstats.xs_kobj);
2447   out_free_stats:
2448  	free_percpu(xfsstats.xs_stats);
2449   out_kset_unregister:
2450  	kset_unregister(xfs_kset);
2451   out_debugfs_unregister:
2452  	debugfs_remove(xfs_debugfs);
2453  	xfs_sysctl_unregister();
2454   out_cleanup_procfs:
2455  	xfs_cleanup_procfs();
2456   out_mru_cache_uninit:
2457  	xfs_mru_cache_uninit();
2458   out_destroy_wq:
2459  	xfs_destroy_workqueues();
2460   out_destroy_caches:
2461  	xfs_destroy_caches();
2462   out:
2463  	return error;
2464  }
2465  
2466  STATIC void __exit
exit_xfs_fs(void)2467  exit_xfs_fs(void)
2468  {
2469  	xfs_qm_exit();
2470  	unregister_filesystem(&xfs_fs_type);
2471  #ifdef DEBUG
2472  	xfs_sysfs_del(&xfs_dbg_kobj);
2473  #endif
2474  	xchk_global_stats_teardown();
2475  	xfs_sysfs_del(&xfsstats.xs_kobj);
2476  	free_percpu(xfsstats.xs_stats);
2477  	kset_unregister(xfs_kset);
2478  	debugfs_remove(xfs_debugfs);
2479  	xfs_sysctl_unregister();
2480  	xfs_cleanup_procfs();
2481  	xfs_mru_cache_uninit();
2482  	xfs_destroy_workqueues();
2483  	xfs_destroy_caches();
2484  	xfs_uuid_table_free();
2485  }
2486  
2487  module_init(init_xfs_fs);
2488  module_exit(exit_xfs_fs);
2489  
2490  MODULE_AUTHOR("Silicon Graphics, Inc.");
2491  MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
2492  MODULE_LICENSE("GPL");
2493