1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Copyright (C) 2018-2023 Oracle.  All Rights Reserved.
4   * Author: Darrick J. Wong <djwong@kernel.org>
5   */
6  #include "xfs.h"
7  #include "xfs_fs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_trans_resv.h"
11  #include "xfs_mount.h"
12  #include "xfs_defer.h"
13  #include "xfs_btree.h"
14  #include "xfs_bit.h"
15  #include "xfs_log_format.h"
16  #include "xfs_trans.h"
17  #include "xfs_sb.h"
18  #include "xfs_inode.h"
19  #include "xfs_icache.h"
20  #include "xfs_inode_buf.h"
21  #include "xfs_inode_fork.h"
22  #include "xfs_ialloc.h"
23  #include "xfs_da_format.h"
24  #include "xfs_reflink.h"
25  #include "xfs_alloc.h"
26  #include "xfs_rmap.h"
27  #include "xfs_rmap_btree.h"
28  #include "xfs_bmap.h"
29  #include "xfs_bmap_btree.h"
30  #include "xfs_bmap_util.h"
31  #include "xfs_dir2.h"
32  #include "xfs_dir2_priv.h"
33  #include "xfs_quota_defs.h"
34  #include "xfs_quota.h"
35  #include "xfs_ag.h"
36  #include "xfs_rtbitmap.h"
37  #include "xfs_attr_leaf.h"
38  #include "xfs_log_priv.h"
39  #include "xfs_health.h"
40  #include "xfs_symlink_remote.h"
41  #include "scrub/xfs_scrub.h"
42  #include "scrub/scrub.h"
43  #include "scrub/common.h"
44  #include "scrub/btree.h"
45  #include "scrub/trace.h"
46  #include "scrub/repair.h"
47  #include "scrub/iscan.h"
48  #include "scrub/readdir.h"
49  #include "scrub/tempfile.h"
50  
51  /*
52   * Inode Record Repair
53   * ===================
54   *
55   * Roughly speaking, inode problems can be classified based on whether or not
56   * they trip the dinode verifiers.  If those trip, then we won't be able to
57   * xfs_iget ourselves the inode.
58   *
59   * Therefore, the xrep_dinode_* functions fix anything that will cause the
60   * inode buffer verifier or the dinode verifier.  The xrep_inode_* functions
61   * fix things on live incore inodes.  The inode repair functions make decisions
62   * with security and usability implications when reviving a file:
63   *
64   * - Files with zero di_mode or a garbage di_mode are converted to regular file
65   *   that only root can read.  This file may not actually contain user data,
66   *   if the file was not previously a regular file.  Setuid and setgid bits
67   *   are cleared.
68   *
69   * - Zero-size directories can be truncated to look empty.  It is necessary to
70   *   run the bmapbtd and directory repair functions to fully rebuild the
71   *   directory.
72   *
73   * - Zero-size symbolic link targets can be truncated to '?'.  It is necessary
74   *   to run the bmapbtd and symlink repair functions to salvage the symlink.
75   *
76   * - Invalid extent size hints will be removed.
77   *
78   * - Quotacheck will be scheduled if we repaired an inode that was so badly
79   *   damaged that the ondisk inode had to be rebuilt.
80   *
81   * - Invalid user, group, or project IDs (aka -1U) will be reset to zero.
82   *   Setuid and setgid bits are cleared.
83   *
84   * - Data and attr forks are reset to extents format with zero extents if the
85   *   fork data is inconsistent.  It is necessary to run the bmapbtd or bmapbta
86   *   repair functions to recover the space mapping.
87   *
88   * - ACLs will not be recovered if the attr fork is zapped or the extended
89   *   attribute structure itself requires salvaging.
90   *
91   * - If the attr fork is zapped, the user and group ids are reset to root and
92   *   the setuid and setgid bits are removed.
93   */
94  
95  /*
96   * All the information we need to repair the ondisk inode if we can't iget the
97   * incore inode.  We don't allocate this buffer unless we're going to perform
98   * a repair to the ondisk inode cluster buffer.
99   */
100  struct xrep_inode {
101  	/* Inode mapping that we saved from the initial lookup attempt. */
102  	struct xfs_imap		imap;
103  
104  	struct xfs_scrub	*sc;
105  
106  	/* Blocks in use on the data device by data extents or bmbt blocks. */
107  	xfs_rfsblock_t		data_blocks;
108  
109  	/* Blocks in use on the rt device. */
110  	xfs_rfsblock_t		rt_blocks;
111  
112  	/* Blocks in use by the attr fork. */
113  	xfs_rfsblock_t		attr_blocks;
114  
115  	/* Number of data device extents for the data fork. */
116  	xfs_extnum_t		data_extents;
117  
118  	/*
119  	 * Number of realtime device extents for the data fork.  If
120  	 * data_extents and rt_extents indicate that the data fork has extents
121  	 * on both devices, we'll just back away slowly.
122  	 */
123  	xfs_extnum_t		rt_extents;
124  
125  	/* Number of (data device) extents for the attr fork. */
126  	xfs_aextnum_t		attr_extents;
127  
128  	/* Sick state to set after zapping parts of the inode. */
129  	unsigned int		ino_sick_mask;
130  
131  	/* Must we remove all access from this file? */
132  	bool			zap_acls;
133  
134  	/* Inode scanner to see if we can find the ftype from dirents */
135  	struct xchk_iscan	ftype_iscan;
136  	uint8_t			alleged_ftype;
137  };
138  
139  /*
140   * Setup function for inode repair.  @imap contains the ondisk inode mapping
141   * information so that we can correct the ondisk inode cluster buffer if
142   * necessary to make iget work.
143   */
144  int
xrep_setup_inode(struct xfs_scrub * sc,const struct xfs_imap * imap)145  xrep_setup_inode(
146  	struct xfs_scrub	*sc,
147  	const struct xfs_imap	*imap)
148  {
149  	struct xrep_inode	*ri;
150  
151  	sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS);
152  	if (!sc->buf)
153  		return -ENOMEM;
154  
155  	ri = sc->buf;
156  	memcpy(&ri->imap, imap, sizeof(struct xfs_imap));
157  	ri->sc = sc;
158  	return 0;
159  }
160  
161  /*
162   * Make sure this ondisk inode can pass the inode buffer verifier.  This is
163   * not the same as the dinode verifier.
164   */
165  STATIC void
xrep_dinode_buf_core(struct xfs_scrub * sc,struct xfs_buf * bp,unsigned int ioffset)166  xrep_dinode_buf_core(
167  	struct xfs_scrub	*sc,
168  	struct xfs_buf		*bp,
169  	unsigned int		ioffset)
170  {
171  	struct xfs_dinode	*dip = xfs_buf_offset(bp, ioffset);
172  	struct xfs_trans	*tp = sc->tp;
173  	struct xfs_mount	*mp = sc->mp;
174  	xfs_agino_t		agino;
175  	bool			crc_ok = false;
176  	bool			magic_ok = false;
177  	bool			unlinked_ok = false;
178  
179  	agino = be32_to_cpu(dip->di_next_unlinked);
180  
181  	if (xfs_verify_agino_or_null(bp->b_pag, agino))
182  		unlinked_ok = true;
183  
184  	if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
185  	    xfs_dinode_good_version(mp, dip->di_version))
186  		magic_ok = true;
187  
188  	if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
189  			XFS_DINODE_CRC_OFF))
190  		crc_ok = true;
191  
192  	if (magic_ok && unlinked_ok && crc_ok)
193  		return;
194  
195  	if (!magic_ok) {
196  		dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
197  		dip->di_version = 3;
198  	}
199  	if (!unlinked_ok)
200  		dip->di_next_unlinked = cpu_to_be32(NULLAGINO);
201  	xfs_dinode_calc_crc(mp, dip);
202  	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
203  	xfs_trans_log_buf(tp, bp, ioffset,
204  				  ioffset + sizeof(struct xfs_dinode) - 1);
205  }
206  
207  /* Make sure this inode cluster buffer can pass the inode buffer verifier. */
208  STATIC void
xrep_dinode_buf(struct xfs_scrub * sc,struct xfs_buf * bp)209  xrep_dinode_buf(
210  	struct xfs_scrub	*sc,
211  	struct xfs_buf		*bp)
212  {
213  	struct xfs_mount	*mp = sc->mp;
214  	int			i;
215  	int			ni;
216  
217  	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
218  	for (i = 0; i < ni; i++)
219  		xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog);
220  }
221  
222  /* Reinitialize things that never change in an inode. */
223  STATIC void
xrep_dinode_header(struct xfs_scrub * sc,struct xfs_dinode * dip)224  xrep_dinode_header(
225  	struct xfs_scrub	*sc,
226  	struct xfs_dinode	*dip)
227  {
228  	trace_xrep_dinode_header(sc, dip);
229  
230  	dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
231  	if (!xfs_dinode_good_version(sc->mp, dip->di_version))
232  		dip->di_version = 3;
233  	dip->di_ino = cpu_to_be64(sc->sm->sm_ino);
234  	uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid);
235  	dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
236  }
237  
238  /*
239   * If this directory entry points to the scrub target inode, then the directory
240   * we're scanning is the parent of the scrub target inode.
241   */
242  STATIC int
xrep_dinode_findmode_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)243  xrep_dinode_findmode_dirent(
244  	struct xfs_scrub		*sc,
245  	struct xfs_inode		*dp,
246  	xfs_dir2_dataptr_t		dapos,
247  	const struct xfs_name		*name,
248  	xfs_ino_t			ino,
249  	void				*priv)
250  {
251  	struct xrep_inode		*ri = priv;
252  	int				error = 0;
253  
254  	if (xchk_should_terminate(ri->sc, &error))
255  		return error;
256  
257  	if (ino != sc->sm->sm_ino)
258  		return 0;
259  
260  	/* Ignore garbage directory entry names. */
261  	if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len))
262  		return -EFSCORRUPTED;
263  
264  	/* Don't pick up dot or dotdot entries; we only want child dirents. */
265  	if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
266  	    xfs_dir2_samename(name, &xfs_name_dot))
267  		return 0;
268  
269  	/*
270  	 * Uhoh, more than one parent for this inode and they don't agree on
271  	 * the file type?
272  	 */
273  	if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN &&
274  	    ri->alleged_ftype != name->type) {
275  		trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type,
276  				ri->alleged_ftype);
277  		return -EFSCORRUPTED;
278  	}
279  
280  	/* We found a potential parent; remember the ftype. */
281  	trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type);
282  	ri->alleged_ftype = name->type;
283  	return 0;
284  }
285  
286  /* Try to lock a directory, or wait a jiffy. */
287  static inline int
xrep_dinode_ilock_nowait(struct xfs_inode * dp,unsigned int lock_mode)288  xrep_dinode_ilock_nowait(
289  	struct xfs_inode	*dp,
290  	unsigned int		lock_mode)
291  {
292  	if (xfs_ilock_nowait(dp, lock_mode))
293  		return true;
294  
295  	schedule_timeout_killable(1);
296  	return false;
297  }
298  
299  /*
300   * Try to lock a directory to look for ftype hints.  Since we already hold the
301   * AGI buffer, we cannot block waiting for the ILOCK because rename can take
302   * the ILOCK and then try to lock AGIs.
303   */
304  STATIC int
xrep_dinode_trylock_directory(struct xrep_inode * ri,struct xfs_inode * dp,unsigned int * lock_modep)305  xrep_dinode_trylock_directory(
306  	struct xrep_inode	*ri,
307  	struct xfs_inode	*dp,
308  	unsigned int		*lock_modep)
309  {
310  	unsigned long		deadline = jiffies + msecs_to_jiffies(30000);
311  	unsigned int		lock_mode;
312  	int			error = 0;
313  
314  	do {
315  		if (xchk_should_terminate(ri->sc, &error))
316  			return error;
317  
318  		if (xfs_need_iread_extents(&dp->i_df))
319  			lock_mode = XFS_ILOCK_EXCL;
320  		else
321  			lock_mode = XFS_ILOCK_SHARED;
322  
323  		if (xrep_dinode_ilock_nowait(dp, lock_mode)) {
324  			*lock_modep = lock_mode;
325  			return 0;
326  		}
327  	} while (!time_is_before_jiffies(deadline));
328  	return -EBUSY;
329  }
330  
331  /*
332   * If this is a directory, walk the dirents looking for any that point to the
333   * scrub target inode.
334   */
335  STATIC int
xrep_dinode_findmode_walk_directory(struct xrep_inode * ri,struct xfs_inode * dp)336  xrep_dinode_findmode_walk_directory(
337  	struct xrep_inode	*ri,
338  	struct xfs_inode	*dp)
339  {
340  	struct xfs_scrub	*sc = ri->sc;
341  	unsigned int		lock_mode;
342  	int			error = 0;
343  
344  	/* Ignore temporary repair directories. */
345  	if (xrep_is_tempfile(dp))
346  		return 0;
347  
348  	/*
349  	 * Scan the directory to see if there it contains an entry pointing to
350  	 * the directory that we are repairing.
351  	 */
352  	error = xrep_dinode_trylock_directory(ri, dp, &lock_mode);
353  	if (error)
354  		return error;
355  
356  	/*
357  	 * If this directory is known to be sick, we cannot scan it reliably
358  	 * and must abort.
359  	 */
360  	if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE |
361  				       XFS_SICK_INO_BMBTD |
362  				       XFS_SICK_INO_DIR)) {
363  		error = -EFSCORRUPTED;
364  		goto out_unlock;
365  	}
366  
367  	/*
368  	 * We cannot complete our parent pointer scan if a directory looks as
369  	 * though it has been zapped by the inode record repair code.
370  	 */
371  	if (xchk_dir_looks_zapped(dp)) {
372  		error = -EBUSY;
373  		goto out_unlock;
374  	}
375  
376  	error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri);
377  	if (error)
378  		goto out_unlock;
379  
380  out_unlock:
381  	xfs_iunlock(dp, lock_mode);
382  	return error;
383  }
384  
385  /*
386   * Try to find the mode of the inode being repaired by looking for directories
387   * that point down to this file.
388   */
389  STATIC int
xrep_dinode_find_mode(struct xrep_inode * ri,uint16_t * mode)390  xrep_dinode_find_mode(
391  	struct xrep_inode	*ri,
392  	uint16_t		*mode)
393  {
394  	struct xfs_scrub	*sc = ri->sc;
395  	struct xfs_inode	*dp;
396  	int			error;
397  
398  	/* No ftype means we have no other metadata to consult. */
399  	if (!xfs_has_ftype(sc->mp)) {
400  		*mode = S_IFREG;
401  		return 0;
402  	}
403  
404  	/*
405  	 * Scan all directories for parents that might point down to this
406  	 * inode.  Skip the inode being repaired during the scan since it
407  	 * cannot be its own parent.  Note that we still hold the AGI locked
408  	 * so there's a real possibility that _iscan_iter can return EBUSY.
409  	 */
410  	xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan);
411  	xchk_iscan_set_agi_trylock(&ri->ftype_iscan);
412  	ri->ftype_iscan.skip_ino = sc->sm->sm_ino;
413  	ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN;
414  	while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) {
415  		if (S_ISDIR(VFS_I(dp)->i_mode))
416  			error = xrep_dinode_findmode_walk_directory(ri, dp);
417  		xchk_iscan_mark_visited(&ri->ftype_iscan, dp);
418  		xchk_irele(sc, dp);
419  		if (error < 0)
420  			break;
421  		if (xchk_should_terminate(sc, &error))
422  			break;
423  	}
424  	xchk_iscan_iter_finish(&ri->ftype_iscan);
425  	xchk_iscan_teardown(&ri->ftype_iscan);
426  
427  	if (error == -EBUSY) {
428  		if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) {
429  			/*
430  			 * If we got an EBUSY after finding at least one
431  			 * dirent, that means the scan found an inode on the
432  			 * inactivation list and could not open it.  Accept the
433  			 * alleged ftype and install a new mode below.
434  			 */
435  			error = 0;
436  		} else if (!(sc->flags & XCHK_TRY_HARDER)) {
437  			/*
438  			 * Otherwise, retry the operation one time to see if
439  			 * the reason for the delay is an inode from the same
440  			 * cluster buffer waiting on the inactivation list.
441  			 */
442  			error = -EDEADLOCK;
443  		}
444  	}
445  	if (error)
446  		return error;
447  
448  	/*
449  	 * Convert the discovered ftype into the file mode.  If all else fails,
450  	 * return S_IFREG.
451  	 */
452  	switch (ri->alleged_ftype) {
453  	case XFS_DIR3_FT_DIR:
454  		*mode = S_IFDIR;
455  		break;
456  	case XFS_DIR3_FT_WHT:
457  	case XFS_DIR3_FT_CHRDEV:
458  		*mode = S_IFCHR;
459  		break;
460  	case XFS_DIR3_FT_BLKDEV:
461  		*mode = S_IFBLK;
462  		break;
463  	case XFS_DIR3_FT_FIFO:
464  		*mode = S_IFIFO;
465  		break;
466  	case XFS_DIR3_FT_SOCK:
467  		*mode = S_IFSOCK;
468  		break;
469  	case XFS_DIR3_FT_SYMLINK:
470  		*mode = S_IFLNK;
471  		break;
472  	default:
473  		*mode = S_IFREG;
474  		break;
475  	}
476  	return 0;
477  }
478  
479  /* Turn di_mode into /something/ recognizable.  Returns true if we succeed. */
480  STATIC int
xrep_dinode_mode(struct xrep_inode * ri,struct xfs_dinode * dip)481  xrep_dinode_mode(
482  	struct xrep_inode	*ri,
483  	struct xfs_dinode	*dip)
484  {
485  	struct xfs_scrub	*sc = ri->sc;
486  	uint16_t		mode = be16_to_cpu(dip->di_mode);
487  	int			error;
488  
489  	trace_xrep_dinode_mode(sc, dip);
490  
491  	if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN)
492  		return 0;
493  
494  	/* Try to fix the mode.  If we cannot, then leave everything alone. */
495  	error = xrep_dinode_find_mode(ri, &mode);
496  	switch (error) {
497  	case -EINTR:
498  	case -EBUSY:
499  	case -EDEADLOCK:
500  		/* temporary failure or fatal signal */
501  		return error;
502  	case 0:
503  		/* found mode */
504  		break;
505  	default:
506  		/* some other error, assume S_IFREG */
507  		mode = S_IFREG;
508  		break;
509  	}
510  
511  	/* bad mode, so we set it to a file that only root can read */
512  	dip->di_mode = cpu_to_be16(mode);
513  	dip->di_uid = 0;
514  	dip->di_gid = 0;
515  	ri->zap_acls = true;
516  	return 0;
517  }
518  
519  /* Fix unused link count fields having nonzero values. */
520  STATIC void
xrep_dinode_nlinks(struct xfs_dinode * dip)521  xrep_dinode_nlinks(
522  	struct xfs_dinode	*dip)
523  {
524  	if (dip->di_version > 1)
525  		dip->di_onlink = 0;
526  	else
527  		dip->di_nlink = 0;
528  }
529  
530  /* Fix any conflicting flags that the verifiers complain about. */
531  STATIC void
xrep_dinode_flags(struct xfs_scrub * sc,struct xfs_dinode * dip,bool isrt)532  xrep_dinode_flags(
533  	struct xfs_scrub	*sc,
534  	struct xfs_dinode	*dip,
535  	bool			isrt)
536  {
537  	struct xfs_mount	*mp = sc->mp;
538  	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
539  	uint16_t		flags = be16_to_cpu(dip->di_flags);
540  	uint16_t		mode = be16_to_cpu(dip->di_mode);
541  
542  	trace_xrep_dinode_flags(sc, dip);
543  
544  	if (isrt)
545  		flags |= XFS_DIFLAG_REALTIME;
546  	else
547  		flags &= ~XFS_DIFLAG_REALTIME;
548  
549  	/*
550  	 * For regular files on a reflink filesystem, set the REFLINK flag to
551  	 * protect shared extents.  A later stage will actually check those
552  	 * extents and clear the flag if possible.
553  	 */
554  	if (xfs_has_reflink(mp) && S_ISREG(mode))
555  		flags2 |= XFS_DIFLAG2_REFLINK;
556  	else
557  		flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE);
558  	if (flags & XFS_DIFLAG_REALTIME)
559  		flags2 &= ~XFS_DIFLAG2_REFLINK;
560  	if (!xfs_has_bigtime(mp))
561  		flags2 &= ~XFS_DIFLAG2_BIGTIME;
562  	if (!xfs_has_large_extent_counts(mp))
563  		flags2 &= ~XFS_DIFLAG2_NREXT64;
564  	if (flags2 & XFS_DIFLAG2_NREXT64)
565  		dip->di_nrext64_pad = 0;
566  	else if (dip->di_version >= 3)
567  		dip->di_v3_pad = 0;
568  	dip->di_flags = cpu_to_be16(flags);
569  	dip->di_flags2 = cpu_to_be64(flags2);
570  }
571  
572  /*
573   * Blow out symlink; now it points nowhere.  We don't have to worry about
574   * incore state because this inode is failing the verifiers.
575   */
576  STATIC void
xrep_dinode_zap_symlink(struct xrep_inode * ri,struct xfs_dinode * dip)577  xrep_dinode_zap_symlink(
578  	struct xrep_inode	*ri,
579  	struct xfs_dinode	*dip)
580  {
581  	struct xfs_scrub	*sc = ri->sc;
582  	char			*p;
583  
584  	trace_xrep_dinode_zap_symlink(sc, dip);
585  
586  	dip->di_format = XFS_DINODE_FMT_LOCAL;
587  	dip->di_size = cpu_to_be64(1);
588  	p = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
589  	*p = '?';
590  	ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED;
591  }
592  
593  /*
594   * Blow out dir, make the parent point to the root.  In the future repair will
595   * reconstruct this directory for us.  Note that there's no in-core directory
596   * inode because the sf verifier tripped, so we don't have to worry about the
597   * dentry cache.
598   */
599  STATIC void
xrep_dinode_zap_dir(struct xrep_inode * ri,struct xfs_dinode * dip)600  xrep_dinode_zap_dir(
601  	struct xrep_inode	*ri,
602  	struct xfs_dinode	*dip)
603  {
604  	struct xfs_scrub	*sc = ri->sc;
605  	struct xfs_mount	*mp = sc->mp;
606  	struct xfs_dir2_sf_hdr	*sfp;
607  	int			i8count;
608  
609  	trace_xrep_dinode_zap_dir(sc, dip);
610  
611  	dip->di_format = XFS_DINODE_FMT_LOCAL;
612  	i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM;
613  	sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
614  	sfp->count = 0;
615  	sfp->i8count = i8count;
616  	xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino);
617  	dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count));
618  	ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED;
619  }
620  
621  /* Make sure we don't have a garbage file size. */
622  STATIC void
xrep_dinode_size(struct xrep_inode * ri,struct xfs_dinode * dip)623  xrep_dinode_size(
624  	struct xrep_inode	*ri,
625  	struct xfs_dinode	*dip)
626  {
627  	struct xfs_scrub	*sc = ri->sc;
628  	uint64_t		size = be64_to_cpu(dip->di_size);
629  	uint16_t		mode = be16_to_cpu(dip->di_mode);
630  
631  	trace_xrep_dinode_size(sc, dip);
632  
633  	switch (mode & S_IFMT) {
634  	case S_IFIFO:
635  	case S_IFCHR:
636  	case S_IFBLK:
637  	case S_IFSOCK:
638  		/* di_size can't be nonzero for special files */
639  		dip->di_size = 0;
640  		break;
641  	case S_IFREG:
642  		/* Regular files can't be larger than 2^63-1 bytes. */
643  		dip->di_size = cpu_to_be64(size & ~(1ULL << 63));
644  		break;
645  	case S_IFLNK:
646  		/*
647  		 * Truncate ridiculously oversized symlinks.  If the size is
648  		 * zero, reset it to point to the current directory.  Both of
649  		 * these conditions trigger dinode verifier errors, so there
650  		 * is no in-core state to reset.
651  		 */
652  		if (size > XFS_SYMLINK_MAXLEN)
653  			dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN);
654  		else if (size == 0)
655  			xrep_dinode_zap_symlink(ri, dip);
656  		break;
657  	case S_IFDIR:
658  		/*
659  		 * Directories can't have a size larger than 32G.  If the size
660  		 * is zero, reset it to an empty directory.  Both of these
661  		 * conditions trigger dinode verifier errors, so there is no
662  		 * in-core state to reset.
663  		 */
664  		if (size > XFS_DIR2_SPACE_SIZE)
665  			dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE);
666  		else if (size == 0)
667  			xrep_dinode_zap_dir(ri, dip);
668  		break;
669  	}
670  }
671  
672  /* Fix extent size hints. */
673  STATIC void
xrep_dinode_extsize_hints(struct xfs_scrub * sc,struct xfs_dinode * dip)674  xrep_dinode_extsize_hints(
675  	struct xfs_scrub	*sc,
676  	struct xfs_dinode	*dip)
677  {
678  	struct xfs_mount	*mp = sc->mp;
679  	uint64_t		flags2 = be64_to_cpu(dip->di_flags2);
680  	uint16_t		flags = be16_to_cpu(dip->di_flags);
681  	uint16_t		mode = be16_to_cpu(dip->di_mode);
682  
683  	xfs_failaddr_t		fa;
684  
685  	trace_xrep_dinode_extsize_hints(sc, dip);
686  
687  	fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize),
688  			mode, flags);
689  	if (fa) {
690  		dip->di_extsize = 0;
691  		dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE |
692  					      XFS_DIFLAG_EXTSZINHERIT);
693  	}
694  
695  	if (dip->di_version < 3)
696  		return;
697  
698  	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
699  			mode, flags, flags2);
700  	if (fa) {
701  		dip->di_cowextsize = 0;
702  		dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE);
703  	}
704  }
705  
706  /* Count extents and blocks for an inode given an rmap. */
707  STATIC int
xrep_dinode_walk_rmap(struct xfs_btree_cur * cur,const struct xfs_rmap_irec * rec,void * priv)708  xrep_dinode_walk_rmap(
709  	struct xfs_btree_cur		*cur,
710  	const struct xfs_rmap_irec	*rec,
711  	void				*priv)
712  {
713  	struct xrep_inode		*ri = priv;
714  	int				error = 0;
715  
716  	if (xchk_should_terminate(ri->sc, &error))
717  		return error;
718  
719  	/* We only care about this inode. */
720  	if (rec->rm_owner != ri->sc->sm->sm_ino)
721  		return 0;
722  
723  	if (rec->rm_flags & XFS_RMAP_ATTR_FORK) {
724  		ri->attr_blocks += rec->rm_blockcount;
725  		if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
726  			ri->attr_extents++;
727  
728  		return 0;
729  	}
730  
731  	ri->data_blocks += rec->rm_blockcount;
732  	if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
733  		ri->data_extents++;
734  
735  	return 0;
736  }
737  
738  /* Count extents and blocks for an inode from all AG rmap data. */
739  STATIC int
xrep_dinode_count_ag_rmaps(struct xrep_inode * ri,struct xfs_perag * pag)740  xrep_dinode_count_ag_rmaps(
741  	struct xrep_inode	*ri,
742  	struct xfs_perag	*pag)
743  {
744  	struct xfs_btree_cur	*cur;
745  	struct xfs_buf		*agf;
746  	int			error;
747  
748  	error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf);
749  	if (error)
750  		return error;
751  
752  	cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag);
753  	error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri);
754  	xfs_btree_del_cursor(cur, error);
755  	xfs_trans_brelse(ri->sc->tp, agf);
756  	return error;
757  }
758  
759  /* Count extents and blocks for a given inode from all rmap data. */
760  STATIC int
xrep_dinode_count_rmaps(struct xrep_inode * ri)761  xrep_dinode_count_rmaps(
762  	struct xrep_inode	*ri)
763  {
764  	struct xfs_perag	*pag;
765  	xfs_agnumber_t		agno;
766  	int			error;
767  
768  	if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
769  		return -EOPNOTSUPP;
770  
771  	for_each_perag(ri->sc->mp, agno, pag) {
772  		error = xrep_dinode_count_ag_rmaps(ri, pag);
773  		if (error) {
774  			xfs_perag_rele(pag);
775  			return error;
776  		}
777  	}
778  
779  	/* Can't have extents on both the rt and the data device. */
780  	if (ri->data_extents && ri->rt_extents)
781  		return -EFSCORRUPTED;
782  
783  	trace_xrep_dinode_count_rmaps(ri->sc,
784  			ri->data_blocks, ri->rt_blocks, ri->attr_blocks,
785  			ri->data_extents, ri->rt_extents, ri->attr_extents);
786  	return 0;
787  }
788  
789  /* Return true if this extents-format ifork looks like garbage. */
790  STATIC bool
xrep_dinode_bad_extents_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)791  xrep_dinode_bad_extents_fork(
792  	struct xfs_scrub	*sc,
793  	struct xfs_dinode	*dip,
794  	unsigned int		dfork_size,
795  	int			whichfork)
796  {
797  	struct xfs_bmbt_irec	new;
798  	struct xfs_bmbt_rec	*dp;
799  	xfs_extnum_t		nex;
800  	bool			isrt;
801  	unsigned int		i;
802  
803  	nex = xfs_dfork_nextents(dip, whichfork);
804  	if (nex > dfork_size / sizeof(struct xfs_bmbt_rec))
805  		return true;
806  
807  	dp = XFS_DFORK_PTR(dip, whichfork);
808  
809  	isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME);
810  	for (i = 0; i < nex; i++, dp++) {
811  		xfs_failaddr_t	fa;
812  
813  		xfs_bmbt_disk_get_all(dp, &new);
814  		fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork,
815  				&new);
816  		if (fa)
817  			return true;
818  	}
819  
820  	return false;
821  }
822  
823  /* Return true if this btree-format ifork looks like garbage. */
824  STATIC bool
xrep_dinode_bad_bmbt_fork(struct xfs_scrub * sc,struct xfs_dinode * dip,unsigned int dfork_size,int whichfork)825  xrep_dinode_bad_bmbt_fork(
826  	struct xfs_scrub	*sc,
827  	struct xfs_dinode	*dip,
828  	unsigned int		dfork_size,
829  	int			whichfork)
830  {
831  	struct xfs_bmdr_block	*dfp;
832  	xfs_extnum_t		nex;
833  	unsigned int		i;
834  	unsigned int		dmxr;
835  	unsigned int		nrecs;
836  	unsigned int		level;
837  
838  	nex = xfs_dfork_nextents(dip, whichfork);
839  	if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec))
840  		return true;
841  
842  	if (dfork_size < sizeof(struct xfs_bmdr_block))
843  		return true;
844  
845  	dfp = XFS_DFORK_PTR(dip, whichfork);
846  	nrecs = be16_to_cpu(dfp->bb_numrecs);
847  	level = be16_to_cpu(dfp->bb_level);
848  
849  	if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size)
850  		return true;
851  	if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork))
852  		return true;
853  
854  	dmxr = xfs_bmdr_maxrecs(dfork_size, 0);
855  	for (i = 1; i <= nrecs; i++) {
856  		struct xfs_bmbt_key	*fkp;
857  		xfs_bmbt_ptr_t		*fpp;
858  		xfs_fileoff_t		fileoff;
859  		xfs_fsblock_t		fsbno;
860  
861  		fkp = xfs_bmdr_key_addr(dfp, i);
862  		fileoff = be64_to_cpu(fkp->br_startoff);
863  		if (!xfs_verify_fileoff(sc->mp, fileoff))
864  			return true;
865  
866  		fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr);
867  		fsbno = be64_to_cpu(*fpp);
868  		if (!xfs_verify_fsbno(sc->mp, fsbno))
869  			return true;
870  	}
871  
872  	return false;
873  }
874  
875  /*
876   * Check the data fork for things that will fail the ifork verifiers or the
877   * ifork formatters.
878   */
879  STATIC bool
xrep_dinode_check_dfork(struct xfs_scrub * sc,struct xfs_dinode * dip,uint16_t mode)880  xrep_dinode_check_dfork(
881  	struct xfs_scrub	*sc,
882  	struct xfs_dinode	*dip,
883  	uint16_t		mode)
884  {
885  	void			*dfork_ptr;
886  	int64_t			data_size;
887  	unsigned int		fmt;
888  	unsigned int		dfork_size;
889  
890  	/*
891  	 * Verifier functions take signed int64_t, so check for bogus negative
892  	 * values first.
893  	 */
894  	data_size = be64_to_cpu(dip->di_size);
895  	if (data_size < 0)
896  		return true;
897  
898  	fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK);
899  	switch (mode & S_IFMT) {
900  	case S_IFIFO:
901  	case S_IFCHR:
902  	case S_IFBLK:
903  	case S_IFSOCK:
904  		if (fmt != XFS_DINODE_FMT_DEV)
905  			return true;
906  		break;
907  	case S_IFREG:
908  		if (fmt == XFS_DINODE_FMT_LOCAL)
909  			return true;
910  		fallthrough;
911  	case S_IFLNK:
912  	case S_IFDIR:
913  		switch (fmt) {
914  		case XFS_DINODE_FMT_LOCAL:
915  		case XFS_DINODE_FMT_EXTENTS:
916  		case XFS_DINODE_FMT_BTREE:
917  			break;
918  		default:
919  			return true;
920  		}
921  		break;
922  	default:
923  		return true;
924  	}
925  
926  	dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK);
927  	dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
928  
929  	switch (fmt) {
930  	case XFS_DINODE_FMT_DEV:
931  		break;
932  	case XFS_DINODE_FMT_LOCAL:
933  		/* dir/symlink structure cannot be larger than the fork */
934  		if (data_size > dfork_size)
935  			return true;
936  		/* directory structure must pass verification. */
937  		if (S_ISDIR(mode) &&
938  		    xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL)
939  			return true;
940  		/* symlink structure must pass verification. */
941  		if (S_ISLNK(mode) &&
942  		    xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL)
943  			return true;
944  		break;
945  	case XFS_DINODE_FMT_EXTENTS:
946  		if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size,
947  				XFS_DATA_FORK))
948  			return true;
949  		break;
950  	case XFS_DINODE_FMT_BTREE:
951  		if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size,
952  				XFS_DATA_FORK))
953  			return true;
954  		break;
955  	default:
956  		return true;
957  	}
958  
959  	return false;
960  }
961  
962  static void
xrep_dinode_set_data_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)963  xrep_dinode_set_data_nextents(
964  	struct xfs_dinode	*dip,
965  	xfs_extnum_t		nextents)
966  {
967  	if (xfs_dinode_has_large_extent_counts(dip))
968  		dip->di_big_nextents = cpu_to_be64(nextents);
969  	else
970  		dip->di_nextents = cpu_to_be32(nextents);
971  }
972  
973  static void
xrep_dinode_set_attr_nextents(struct xfs_dinode * dip,xfs_extnum_t nextents)974  xrep_dinode_set_attr_nextents(
975  	struct xfs_dinode	*dip,
976  	xfs_extnum_t		nextents)
977  {
978  	if (xfs_dinode_has_large_extent_counts(dip))
979  		dip->di_big_anextents = cpu_to_be32(nextents);
980  	else
981  		dip->di_anextents = cpu_to_be16(nextents);
982  }
983  
984  /* Reset the data fork to something sane. */
985  STATIC void
xrep_dinode_zap_dfork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)986  xrep_dinode_zap_dfork(
987  	struct xrep_inode	*ri,
988  	struct xfs_dinode	*dip,
989  	uint16_t		mode)
990  {
991  	struct xfs_scrub	*sc = ri->sc;
992  
993  	trace_xrep_dinode_zap_dfork(sc, dip);
994  
995  	ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED;
996  
997  	xrep_dinode_set_data_nextents(dip, 0);
998  	ri->data_blocks = 0;
999  	ri->rt_blocks = 0;
1000  
1001  	/* Special files always get reset to DEV */
1002  	switch (mode & S_IFMT) {
1003  	case S_IFIFO:
1004  	case S_IFCHR:
1005  	case S_IFBLK:
1006  	case S_IFSOCK:
1007  		dip->di_format = XFS_DINODE_FMT_DEV;
1008  		dip->di_size = 0;
1009  		return;
1010  	}
1011  
1012  	/*
1013  	 * If we have data extents, reset to an empty map and hope the user
1014  	 * will run the bmapbtd checker next.
1015  	 */
1016  	if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) {
1017  		dip->di_format = XFS_DINODE_FMT_EXTENTS;
1018  		return;
1019  	}
1020  
1021  	/* Otherwise, reset the local format to the minimum. */
1022  	switch (mode & S_IFMT) {
1023  	case S_IFLNK:
1024  		xrep_dinode_zap_symlink(ri, dip);
1025  		break;
1026  	case S_IFDIR:
1027  		xrep_dinode_zap_dir(ri, dip);
1028  		break;
1029  	}
1030  }
1031  
1032  /*
1033   * Check the attr fork for things that will fail the ifork verifiers or the
1034   * ifork formatters.
1035   */
1036  STATIC bool
xrep_dinode_check_afork(struct xfs_scrub * sc,struct xfs_dinode * dip)1037  xrep_dinode_check_afork(
1038  	struct xfs_scrub		*sc,
1039  	struct xfs_dinode		*dip)
1040  {
1041  	struct xfs_attr_sf_hdr		*afork_ptr;
1042  	size_t				attr_size;
1043  	unsigned int			afork_size;
1044  
1045  	if (XFS_DFORK_BOFF(dip) == 0)
1046  		return dip->di_aformat != XFS_DINODE_FMT_EXTENTS ||
1047  		       xfs_dfork_attr_extents(dip) != 0;
1048  
1049  	afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1050  	afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1051  
1052  	switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) {
1053  	case XFS_DINODE_FMT_LOCAL:
1054  		/* Fork has to be large enough to extract the xattr size. */
1055  		if (afork_size < sizeof(struct xfs_attr_sf_hdr))
1056  			return true;
1057  
1058  		/* xattr structure cannot be larger than the fork */
1059  		attr_size = be16_to_cpu(afork_ptr->totsize);
1060  		if (attr_size > afork_size)
1061  			return true;
1062  
1063  		/* xattr structure must pass verification. */
1064  		return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL;
1065  	case XFS_DINODE_FMT_EXTENTS:
1066  		if (xrep_dinode_bad_extents_fork(sc, dip, afork_size,
1067  					XFS_ATTR_FORK))
1068  			return true;
1069  		break;
1070  	case XFS_DINODE_FMT_BTREE:
1071  		if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size,
1072  					XFS_ATTR_FORK))
1073  			return true;
1074  		break;
1075  	default:
1076  		return true;
1077  	}
1078  
1079  	return false;
1080  }
1081  
1082  /*
1083   * Reset the attr fork to empty.  Since the attr fork could have contained
1084   * ACLs, make the file readable only by root.
1085   */
1086  STATIC void
xrep_dinode_zap_afork(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1087  xrep_dinode_zap_afork(
1088  	struct xrep_inode	*ri,
1089  	struct xfs_dinode	*dip,
1090  	uint16_t		mode)
1091  {
1092  	struct xfs_scrub	*sc = ri->sc;
1093  
1094  	trace_xrep_dinode_zap_afork(sc, dip);
1095  
1096  	ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED;
1097  
1098  	dip->di_aformat = XFS_DINODE_FMT_EXTENTS;
1099  	xrep_dinode_set_attr_nextents(dip, 0);
1100  	ri->attr_blocks = 0;
1101  
1102  	/*
1103  	 * If the data fork is in btree format, removing the attr fork entirely
1104  	 * might cause verifier failures if the next level down in the bmbt
1105  	 * could now fit in the data fork area.
1106  	 */
1107  	if (dip->di_format != XFS_DINODE_FMT_BTREE)
1108  		dip->di_forkoff = 0;
1109  	dip->di_mode = cpu_to_be16(mode & ~0777);
1110  	dip->di_uid = 0;
1111  	dip->di_gid = 0;
1112  }
1113  
1114  /* Make sure the fork offset is a sensible value. */
1115  STATIC void
xrep_dinode_ensure_forkoff(struct xrep_inode * ri,struct xfs_dinode * dip,uint16_t mode)1116  xrep_dinode_ensure_forkoff(
1117  	struct xrep_inode	*ri,
1118  	struct xfs_dinode	*dip,
1119  	uint16_t		mode)
1120  {
1121  	struct xfs_bmdr_block	*bmdr;
1122  	struct xfs_scrub	*sc = ri->sc;
1123  	xfs_extnum_t		attr_extents, data_extents;
1124  	size_t			bmdr_minsz = xfs_bmdr_space_calc(1);
1125  	unsigned int		lit_sz = XFS_LITINO(sc->mp);
1126  	unsigned int		afork_min, dfork_min;
1127  
1128  	trace_xrep_dinode_ensure_forkoff(sc, dip);
1129  
1130  	/*
1131  	 * Before calling this function, xrep_dinode_core ensured that both
1132  	 * forks actually fit inside their respective literal areas.  If this
1133  	 * was not the case, the fork was reset to FMT_EXTENTS with zero
1134  	 * records.  If the rmapbt scan found attr or data fork blocks, this
1135  	 * will be noted in the dinode_stats, and we must leave enough room
1136  	 * for the bmap repair code to reconstruct the mapping structure.
1137  	 *
1138  	 * First, compute the minimum space required for the attr fork.
1139  	 */
1140  	switch (dip->di_aformat) {
1141  	case XFS_DINODE_FMT_LOCAL:
1142  		/*
1143  		 * If we still have a shortform xattr structure at all, that
1144  		 * means the attr fork area was exactly large enough to fit
1145  		 * the sf structure.
1146  		 */
1147  		afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK);
1148  		break;
1149  	case XFS_DINODE_FMT_EXTENTS:
1150  		attr_extents = xfs_dfork_attr_extents(dip);
1151  		if (attr_extents) {
1152  			/*
1153  			 * We must maintain sufficient space to hold the entire
1154  			 * extent map array in the data fork.  Note that we
1155  			 * previously zapped the fork if it had no chance of
1156  			 * fitting in the inode.
1157  			 */
1158  			afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents;
1159  		} else if (ri->attr_extents > 0) {
1160  			/*
1161  			 * The attr fork thinks it has zero extents, but we
1162  			 * found some xattr extents.  We need to leave enough
1163  			 * empty space here so that the incore attr fork will
1164  			 * get created (and hence trigger the attr fork bmap
1165  			 * repairer).
1166  			 */
1167  			afork_min = bmdr_minsz;
1168  		} else {
1169  			/* No extents on disk or found in rmapbt. */
1170  			afork_min = 0;
1171  		}
1172  		break;
1173  	case XFS_DINODE_FMT_BTREE:
1174  		/* Must have space for btree header and key/pointers. */
1175  		bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK);
1176  		afork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1177  		break;
1178  	default:
1179  		/* We should never see any other formats. */
1180  		afork_min = 0;
1181  		break;
1182  	}
1183  
1184  	/* Compute the minimum space required for the data fork. */
1185  	switch (dip->di_format) {
1186  	case XFS_DINODE_FMT_DEV:
1187  		dfork_min = sizeof(__be32);
1188  		break;
1189  	case XFS_DINODE_FMT_UUID:
1190  		dfork_min = sizeof(uuid_t);
1191  		break;
1192  	case XFS_DINODE_FMT_LOCAL:
1193  		/*
1194  		 * If we still have a shortform data fork at all, that means
1195  		 * the data fork area was large enough to fit whatever was in
1196  		 * there.
1197  		 */
1198  		dfork_min = be64_to_cpu(dip->di_size);
1199  		break;
1200  	case XFS_DINODE_FMT_EXTENTS:
1201  		data_extents = xfs_dfork_data_extents(dip);
1202  		if (data_extents) {
1203  			/*
1204  			 * We must maintain sufficient space to hold the entire
1205  			 * extent map array in the data fork.  Note that we
1206  			 * previously zapped the fork if it had no chance of
1207  			 * fitting in the inode.
1208  			 */
1209  			dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents;
1210  		} else if (ri->data_extents > 0 || ri->rt_extents > 0) {
1211  			/*
1212  			 * The data fork thinks it has zero extents, but we
1213  			 * found some data extents.  We need to leave enough
1214  			 * empty space here so that the data fork bmap repair
1215  			 * will recover the mappings.
1216  			 */
1217  			dfork_min = bmdr_minsz;
1218  		} else {
1219  			/* No extents on disk or found in rmapbt. */
1220  			dfork_min = 0;
1221  		}
1222  		break;
1223  	case XFS_DINODE_FMT_BTREE:
1224  		/* Must have space for btree header and key/pointers. */
1225  		bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
1226  		dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
1227  		break;
1228  	default:
1229  		dfork_min = 0;
1230  		break;
1231  	}
1232  
1233  	/*
1234  	 * Round all values up to the nearest 8 bytes, because that is the
1235  	 * precision of di_forkoff.
1236  	 */
1237  	afork_min = roundup(afork_min, 8);
1238  	dfork_min = roundup(dfork_min, 8);
1239  	bmdr_minsz = roundup(bmdr_minsz, 8);
1240  
1241  	ASSERT(dfork_min <= lit_sz);
1242  	ASSERT(afork_min <= lit_sz);
1243  
1244  	/*
1245  	 * If the data fork was zapped and we don't have enough space for the
1246  	 * recovery fork, move the attr fork up.
1247  	 */
1248  	if (dip->di_format == XFS_DINODE_FMT_EXTENTS &&
1249  	    xfs_dfork_data_extents(dip) == 0 &&
1250  	    (ri->data_extents > 0 || ri->rt_extents > 0) &&
1251  	    bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) {
1252  		if (bmdr_minsz + afork_min > lit_sz) {
1253  			/*
1254  			 * The attr for and the stub fork we need to recover
1255  			 * the data fork won't both fit.  Zap the attr fork.
1256  			 */
1257  			xrep_dinode_zap_afork(ri, dip, mode);
1258  			afork_min = bmdr_minsz;
1259  		} else {
1260  			void	*before, *after;
1261  
1262  			/* Otherwise, just slide the attr fork up. */
1263  			before = XFS_DFORK_APTR(dip);
1264  			dip->di_forkoff = bmdr_minsz >> 3;
1265  			after = XFS_DFORK_APTR(dip);
1266  			memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp));
1267  		}
1268  	}
1269  
1270  	/*
1271  	 * If the attr fork was zapped and we don't have enough space for the
1272  	 * recovery fork, move the attr fork down.
1273  	 */
1274  	if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS &&
1275  	    xfs_dfork_attr_extents(dip) == 0 &&
1276  	    ri->attr_extents > 0 &&
1277  	    bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) {
1278  		if (dip->di_format == XFS_DINODE_FMT_BTREE) {
1279  			/*
1280  			 * If the data fork is in btree format then we can't
1281  			 * adjust forkoff because that runs the risk of
1282  			 * violating the extents/btree format transition rules.
1283  			 */
1284  		} else if (bmdr_minsz + dfork_min > lit_sz) {
1285  			/*
1286  			 * If we can't move the attr fork, too bad, we lose the
1287  			 * attr fork and leak its blocks.
1288  			 */
1289  			xrep_dinode_zap_afork(ri, dip, mode);
1290  		} else {
1291  			/*
1292  			 * Otherwise, just slide the attr fork down.  The attr
1293  			 * fork is empty, so we don't have any old contents to
1294  			 * move here.
1295  			 */
1296  			dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3;
1297  		}
1298  	}
1299  }
1300  
1301  /*
1302   * Zap the data/attr forks if we spot anything that isn't going to pass the
1303   * ifork verifiers or the ifork formatters, because we need to get the inode
1304   * into good enough shape that the higher level repair functions can run.
1305   */
1306  STATIC void
xrep_dinode_zap_forks(struct xrep_inode * ri,struct xfs_dinode * dip)1307  xrep_dinode_zap_forks(
1308  	struct xrep_inode	*ri,
1309  	struct xfs_dinode	*dip)
1310  {
1311  	struct xfs_scrub	*sc = ri->sc;
1312  	xfs_extnum_t		data_extents;
1313  	xfs_extnum_t		attr_extents;
1314  	xfs_filblks_t		nblocks;
1315  	uint16_t		mode;
1316  	bool			zap_datafork = false;
1317  	bool			zap_attrfork = ri->zap_acls;
1318  
1319  	trace_xrep_dinode_zap_forks(sc, dip);
1320  
1321  	mode = be16_to_cpu(dip->di_mode);
1322  
1323  	data_extents = xfs_dfork_data_extents(dip);
1324  	attr_extents = xfs_dfork_attr_extents(dip);
1325  	nblocks = be64_to_cpu(dip->di_nblocks);
1326  
1327  	/* Inode counters don't make sense? */
1328  	if (data_extents > nblocks)
1329  		zap_datafork = true;
1330  	if (attr_extents > nblocks)
1331  		zap_attrfork = true;
1332  	if (data_extents + attr_extents > nblocks)
1333  		zap_datafork = zap_attrfork = true;
1334  
1335  	if (!zap_datafork)
1336  		zap_datafork = xrep_dinode_check_dfork(sc, dip, mode);
1337  	if (!zap_attrfork)
1338  		zap_attrfork = xrep_dinode_check_afork(sc, dip);
1339  
1340  	/* Zap whatever's bad. */
1341  	if (zap_attrfork)
1342  		xrep_dinode_zap_afork(ri, dip, mode);
1343  	if (zap_datafork)
1344  		xrep_dinode_zap_dfork(ri, dip, mode);
1345  	xrep_dinode_ensure_forkoff(ri, dip, mode);
1346  
1347  	/*
1348  	 * Zero di_nblocks if we don't have any extents at all to satisfy the
1349  	 * buffer verifier.
1350  	 */
1351  	data_extents = xfs_dfork_data_extents(dip);
1352  	attr_extents = xfs_dfork_attr_extents(dip);
1353  	if (data_extents + attr_extents == 0)
1354  		dip->di_nblocks = 0;
1355  }
1356  
1357  /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */
1358  STATIC int
xrep_dinode_core(struct xrep_inode * ri)1359  xrep_dinode_core(
1360  	struct xrep_inode	*ri)
1361  {
1362  	struct xfs_scrub	*sc = ri->sc;
1363  	struct xfs_buf		*bp;
1364  	struct xfs_dinode	*dip;
1365  	xfs_ino_t		ino = sc->sm->sm_ino;
1366  	int			error;
1367  	int			iget_error;
1368  
1369  	/* Figure out what this inode had mapped in both forks. */
1370  	error = xrep_dinode_count_rmaps(ri);
1371  	if (error)
1372  		return error;
1373  
1374  	/* Read the inode cluster buffer. */
1375  	error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp,
1376  			ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp,
1377  			NULL);
1378  	if (error)
1379  		return error;
1380  
1381  	/* Make sure we can pass the inode buffer verifier. */
1382  	xrep_dinode_buf(sc, bp);
1383  	bp->b_ops = &xfs_inode_buf_ops;
1384  
1385  	/* Fix everything the verifier will complain about. */
1386  	dip = xfs_buf_offset(bp, ri->imap.im_boffset);
1387  	xrep_dinode_header(sc, dip);
1388  	iget_error = xrep_dinode_mode(ri, dip);
1389  	if (iget_error)
1390  		goto write;
1391  	xrep_dinode_nlinks(dip);
1392  	xrep_dinode_flags(sc, dip, ri->rt_extents > 0);
1393  	xrep_dinode_size(ri, dip);
1394  	xrep_dinode_extsize_hints(sc, dip);
1395  	xrep_dinode_zap_forks(ri, dip);
1396  
1397  write:
1398  	/* Write out the inode. */
1399  	trace_xrep_dinode_fixed(sc, dip);
1400  	xfs_dinode_calc_crc(sc->mp, dip);
1401  	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF);
1402  	xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset,
1403  			ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1);
1404  
1405  	/*
1406  	 * In theory, we've fixed the ondisk inode record enough that we should
1407  	 * be able to load the inode into the cache.  Try to iget that inode
1408  	 * now while we hold the AGI and the inode cluster buffer and take the
1409  	 * IOLOCK so that we can continue with repairs without anyone else
1410  	 * accessing the inode.  If iget fails, we still need to commit the
1411  	 * changes.
1412  	 */
1413  	if (!iget_error)
1414  		iget_error = xchk_iget(sc, ino, &sc->ip);
1415  	if (!iget_error)
1416  		xchk_ilock(sc, XFS_IOLOCK_EXCL);
1417  
1418  	/*
1419  	 * Commit the inode cluster buffer updates and drop the AGI buffer that
1420  	 * we've been holding since scrub setup.  From here on out, repairs
1421  	 * deal only with the cached inode.
1422  	 */
1423  	error = xrep_trans_commit(sc);
1424  	if (error)
1425  		return error;
1426  
1427  	if (iget_error)
1428  		return iget_error;
1429  
1430  	error = xchk_trans_alloc(sc, 0);
1431  	if (error)
1432  		return error;
1433  
1434  	error = xrep_ino_dqattach(sc);
1435  	if (error)
1436  		return error;
1437  
1438  	xchk_ilock(sc, XFS_ILOCK_EXCL);
1439  	if (ri->ino_sick_mask)
1440  		xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask);
1441  	return 0;
1442  }
1443  
1444  /* Fix everything xfs_dinode_verify cares about. */
1445  STATIC int
xrep_dinode_problems(struct xrep_inode * ri)1446  xrep_dinode_problems(
1447  	struct xrep_inode	*ri)
1448  {
1449  	struct xfs_scrub	*sc = ri->sc;
1450  	int			error;
1451  
1452  	error = xrep_dinode_core(ri);
1453  	if (error)
1454  		return error;
1455  
1456  	/* We had to fix a totally busted inode, schedule quotacheck. */
1457  	if (XFS_IS_UQUOTA_ON(sc->mp))
1458  		xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1459  	if (XFS_IS_GQUOTA_ON(sc->mp))
1460  		xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1461  	if (XFS_IS_PQUOTA_ON(sc->mp))
1462  		xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1463  
1464  	return 0;
1465  }
1466  
1467  /*
1468   * Fix problems that the verifiers don't care about.  In general these are
1469   * errors that don't cause problems elsewhere in the kernel that we can easily
1470   * detect, so we don't check them all that rigorously.
1471   */
1472  
1473  /* Make sure block and extent counts are ok. */
1474  STATIC int
xrep_inode_blockcounts(struct xfs_scrub * sc)1475  xrep_inode_blockcounts(
1476  	struct xfs_scrub	*sc)
1477  {
1478  	struct xfs_ifork	*ifp;
1479  	xfs_filblks_t		count;
1480  	xfs_filblks_t		acount;
1481  	xfs_extnum_t		nextents;
1482  	int			error;
1483  
1484  	trace_xrep_inode_blockcounts(sc);
1485  
1486  	/* Set data fork counters from the data fork mappings. */
1487  	error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
1488  			&nextents, &count);
1489  	if (error)
1490  		return error;
1491  	if (xfs_is_reflink_inode(sc->ip)) {
1492  		/*
1493  		 * data fork blockcount can exceed physical storage if a user
1494  		 * reflinks the same block over and over again.
1495  		 */
1496  		;
1497  	} else if (XFS_IS_REALTIME_INODE(sc->ip)) {
1498  		if (count >= sc->mp->m_sb.sb_rblocks)
1499  			return -EFSCORRUPTED;
1500  	} else {
1501  		if (count >= sc->mp->m_sb.sb_dblocks)
1502  			return -EFSCORRUPTED;
1503  	}
1504  	error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents);
1505  	if (error)
1506  		return error;
1507  	sc->ip->i_df.if_nextents = nextents;
1508  
1509  	/* Set attr fork counters from the attr fork mappings. */
1510  	ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1511  	if (ifp) {
1512  		error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
1513  				&nextents, &acount);
1514  		if (error)
1515  			return error;
1516  		if (count >= sc->mp->m_sb.sb_dblocks)
1517  			return -EFSCORRUPTED;
1518  		error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK,
1519  				nextents);
1520  		if (error)
1521  			return error;
1522  		ifp->if_nextents = nextents;
1523  	} else {
1524  		acount = 0;
1525  	}
1526  
1527  	sc->ip->i_nblocks = count + acount;
1528  	return 0;
1529  }
1530  
1531  /* Check for invalid uid/gid/prid. */
1532  STATIC void
xrep_inode_ids(struct xfs_scrub * sc)1533  xrep_inode_ids(
1534  	struct xfs_scrub	*sc)
1535  {
1536  	bool			dirty = false;
1537  
1538  	trace_xrep_inode_ids(sc);
1539  
1540  	if (!uid_valid(VFS_I(sc->ip)->i_uid)) {
1541  		i_uid_write(VFS_I(sc->ip), 0);
1542  		dirty = true;
1543  		if (XFS_IS_UQUOTA_ON(sc->mp))
1544  			xrep_force_quotacheck(sc, XFS_DQTYPE_USER);
1545  	}
1546  
1547  	if (!gid_valid(VFS_I(sc->ip)->i_gid)) {
1548  		i_gid_write(VFS_I(sc->ip), 0);
1549  		dirty = true;
1550  		if (XFS_IS_GQUOTA_ON(sc->mp))
1551  			xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP);
1552  	}
1553  
1554  	if (sc->ip->i_projid == -1U) {
1555  		sc->ip->i_projid = 0;
1556  		dirty = true;
1557  		if (XFS_IS_PQUOTA_ON(sc->mp))
1558  			xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ);
1559  	}
1560  
1561  	/* strip setuid/setgid if we touched any of the ids */
1562  	if (dirty)
1563  		VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID);
1564  }
1565  
1566  static inline void
xrep_clamp_timestamp(struct xfs_inode * ip,struct timespec64 * ts)1567  xrep_clamp_timestamp(
1568  	struct xfs_inode	*ip,
1569  	struct timespec64	*ts)
1570  {
1571  	ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC);
1572  	*ts = timestamp_truncate(*ts, VFS_I(ip));
1573  }
1574  
1575  /* Nanosecond counters can't have more than 1 billion. */
1576  STATIC void
xrep_inode_timestamps(struct xfs_inode * ip)1577  xrep_inode_timestamps(
1578  	struct xfs_inode	*ip)
1579  {
1580  	struct timespec64	tstamp;
1581  	struct inode		*inode = VFS_I(ip);
1582  
1583  	tstamp = inode_get_atime(inode);
1584  	xrep_clamp_timestamp(ip, &tstamp);
1585  	inode_set_atime_to_ts(inode, tstamp);
1586  
1587  	tstamp = inode_get_mtime(inode);
1588  	xrep_clamp_timestamp(ip, &tstamp);
1589  	inode_set_mtime_to_ts(inode, tstamp);
1590  
1591  	tstamp = inode_get_ctime(inode);
1592  	xrep_clamp_timestamp(ip, &tstamp);
1593  	inode_set_ctime_to_ts(inode, tstamp);
1594  
1595  	xrep_clamp_timestamp(ip, &ip->i_crtime);
1596  }
1597  
1598  /* Fix inode flags that don't make sense together. */
1599  STATIC void
xrep_inode_flags(struct xfs_scrub * sc)1600  xrep_inode_flags(
1601  	struct xfs_scrub	*sc)
1602  {
1603  	uint16_t		mode;
1604  
1605  	trace_xrep_inode_flags(sc);
1606  
1607  	mode = VFS_I(sc->ip)->i_mode;
1608  
1609  	/* Clear junk flags */
1610  	if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY)
1611  		sc->ip->i_diflags &= ~XFS_DIFLAG_ANY;
1612  
1613  	/* NEWRTBM only applies to realtime bitmaps */
1614  	if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino)
1615  		sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
1616  	else
1617  		sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM;
1618  
1619  	/* These only make sense for directories. */
1620  	if (!S_ISDIR(mode))
1621  		sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT |
1622  					  XFS_DIFLAG_EXTSZINHERIT |
1623  					  XFS_DIFLAG_PROJINHERIT |
1624  					  XFS_DIFLAG_NOSYMLINKS);
1625  
1626  	/* These only make sense for files. */
1627  	if (!S_ISREG(mode))
1628  		sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME |
1629  					  XFS_DIFLAG_EXTSIZE);
1630  
1631  	/* These only make sense for non-rt files. */
1632  	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1633  		sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM;
1634  
1635  	/* Immutable and append only?  Drop the append. */
1636  	if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) &&
1637  	    (sc->ip->i_diflags & XFS_DIFLAG_APPEND))
1638  		sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND;
1639  
1640  	/* Clear junk flags. */
1641  	if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY)
1642  		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY;
1643  
1644  	/* No reflink flag unless we support it and it's a file. */
1645  	if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode))
1646  		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1647  
1648  	/* DAX only applies to files and dirs. */
1649  	if (!(S_ISREG(mode) || S_ISDIR(mode)))
1650  		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
1651  
1652  	/* No reflink files on the realtime device. */
1653  	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME)
1654  		sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
1655  }
1656  
1657  /*
1658   * Fix size problems with block/node format directories.  If we fail to find
1659   * the extent list, just bail out and let the bmapbtd repair functions clean
1660   * up that mess.
1661   */
1662  STATIC void
xrep_inode_blockdir_size(struct xfs_scrub * sc)1663  xrep_inode_blockdir_size(
1664  	struct xfs_scrub	*sc)
1665  {
1666  	struct xfs_iext_cursor	icur;
1667  	struct xfs_bmbt_irec	got;
1668  	struct xfs_ifork	*ifp;
1669  	xfs_fileoff_t		off;
1670  	int			error;
1671  
1672  	trace_xrep_inode_blockdir_size(sc);
1673  
1674  	error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK);
1675  	if (error)
1676  		return;
1677  
1678  	/* Find the last block before 32G; this is the dir size. */
1679  	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1680  	off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE);
1681  	if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) {
1682  		/* zero-extents directory? */
1683  		return;
1684  	}
1685  
1686  	off = got.br_startoff + got.br_blockcount;
1687  	sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE,
1688  			XFS_FSB_TO_B(sc->mp, off));
1689  }
1690  
1691  /* Fix size problems with short format directories. */
1692  STATIC void
xrep_inode_sfdir_size(struct xfs_scrub * sc)1693  xrep_inode_sfdir_size(
1694  	struct xfs_scrub	*sc)
1695  {
1696  	struct xfs_ifork	*ifp;
1697  
1698  	trace_xrep_inode_sfdir_size(sc);
1699  
1700  	ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1701  	sc->ip->i_disk_size = ifp->if_bytes;
1702  }
1703  
1704  /*
1705   * Fix any irregularities in a directory inode's size now that we can iterate
1706   * extent maps and access other regular inode data.
1707   */
1708  STATIC void
xrep_inode_dir_size(struct xfs_scrub * sc)1709  xrep_inode_dir_size(
1710  	struct xfs_scrub	*sc)
1711  {
1712  	trace_xrep_inode_dir_size(sc);
1713  
1714  	switch (sc->ip->i_df.if_format) {
1715  	case XFS_DINODE_FMT_EXTENTS:
1716  	case XFS_DINODE_FMT_BTREE:
1717  		xrep_inode_blockdir_size(sc);
1718  		break;
1719  	case XFS_DINODE_FMT_LOCAL:
1720  		xrep_inode_sfdir_size(sc);
1721  		break;
1722  	}
1723  }
1724  
1725  /* Fix extent size hint problems. */
1726  STATIC void
xrep_inode_extsize(struct xfs_scrub * sc)1727  xrep_inode_extsize(
1728  	struct xfs_scrub	*sc)
1729  {
1730  	/* Fix misaligned extent size hints on a directory. */
1731  	if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
1732  	    (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
1733  	    xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) {
1734  		sc->ip->i_extsize = 0;
1735  		sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT;
1736  	}
1737  }
1738  
1739  /* Ensure this file has an attr fork if it needs to hold a parent pointer. */
1740  STATIC int
xrep_inode_pptr(struct xfs_scrub * sc)1741  xrep_inode_pptr(
1742  	struct xfs_scrub	*sc)
1743  {
1744  	struct xfs_mount	*mp = sc->mp;
1745  	struct xfs_inode	*ip = sc->ip;
1746  	struct inode		*inode = VFS_I(ip);
1747  
1748  	if (!xfs_has_parent(mp))
1749  		return 0;
1750  
1751  	/*
1752  	 * Unlinked inodes that cannot be added to the directory tree will not
1753  	 * have a parent pointer.
1754  	 */
1755  	if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
1756  		return 0;
1757  
1758  	/* The root directory doesn't have a parent pointer. */
1759  	if (ip == mp->m_rootip)
1760  		return 0;
1761  
1762  	/*
1763  	 * Metadata inodes are rooted in the superblock and do not have any
1764  	 * parents.
1765  	 */
1766  	if (xfs_is_metadata_inode(ip))
1767  		return 0;
1768  
1769  	/* Inode already has an attr fork; no further work possible here. */
1770  	if (xfs_inode_has_attr_fork(ip))
1771  		return 0;
1772  
1773  	return xfs_bmap_add_attrfork(sc->tp, ip,
1774  			sizeof(struct xfs_attr_sf_hdr), true);
1775  }
1776  
1777  /* Fix any irregularities in an inode that the verifiers don't catch. */
1778  STATIC int
xrep_inode_problems(struct xfs_scrub * sc)1779  xrep_inode_problems(
1780  	struct xfs_scrub	*sc)
1781  {
1782  	int			error;
1783  
1784  	error = xrep_inode_blockcounts(sc);
1785  	if (error)
1786  		return error;
1787  	error = xrep_inode_pptr(sc);
1788  	if (error)
1789  		return error;
1790  	xrep_inode_timestamps(sc->ip);
1791  	xrep_inode_flags(sc);
1792  	xrep_inode_ids(sc);
1793  	/*
1794  	 * We can now do a better job fixing the size of a directory now that
1795  	 * we can scan the data fork extents than we could in xrep_dinode_size.
1796  	 */
1797  	if (S_ISDIR(VFS_I(sc->ip)->i_mode))
1798  		xrep_inode_dir_size(sc);
1799  	xrep_inode_extsize(sc);
1800  
1801  	trace_xrep_inode_fixed(sc);
1802  	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
1803  	return xrep_roll_trans(sc);
1804  }
1805  
1806  /*
1807   * Make sure this inode's unlinked list pointers are consistent with its
1808   * link count.
1809   */
1810  STATIC int
xrep_inode_unlinked(struct xfs_scrub * sc)1811  xrep_inode_unlinked(
1812  	struct xfs_scrub	*sc)
1813  {
1814  	unsigned int		nlink = VFS_I(sc->ip)->i_nlink;
1815  	int			error;
1816  
1817  	/*
1818  	 * If this inode is linked from the directory tree and on the unlinked
1819  	 * list, remove it from the unlinked list.
1820  	 */
1821  	if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) {
1822  		struct xfs_perag	*pag;
1823  		int			error;
1824  
1825  		pag = xfs_perag_get(sc->mp,
1826  				XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino));
1827  		error = xfs_iunlink_remove(sc->tp, pag, sc->ip);
1828  		xfs_perag_put(pag);
1829  		if (error)
1830  			return error;
1831  	}
1832  
1833  	/*
1834  	 * If this inode is not linked from the directory tree yet not on the
1835  	 * unlinked list, put it on the unlinked list.
1836  	 */
1837  	if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) {
1838  		error = xfs_iunlink(sc->tp, sc->ip);
1839  		if (error)
1840  			return error;
1841  	}
1842  
1843  	return 0;
1844  }
1845  
1846  /* Repair an inode's fields. */
1847  int
xrep_inode(struct xfs_scrub * sc)1848  xrep_inode(
1849  	struct xfs_scrub	*sc)
1850  {
1851  	int			error = 0;
1852  
1853  	/*
1854  	 * No inode?  That means we failed the _iget verifiers.  Repair all
1855  	 * the things that the inode verifiers care about, then retry _iget.
1856  	 */
1857  	if (!sc->ip) {
1858  		struct xrep_inode	*ri = sc->buf;
1859  
1860  		ASSERT(ri != NULL);
1861  
1862  		error = xrep_dinode_problems(ri);
1863  		if (error == -EBUSY) {
1864  			/*
1865  			 * Directory scan to recover inode mode encountered a
1866  			 * busy inode, so we did not continue repairing things.
1867  			 */
1868  			return 0;
1869  		}
1870  		if (error)
1871  			return error;
1872  
1873  		/* By this point we had better have a working incore inode. */
1874  		if (!sc->ip)
1875  			return -EFSCORRUPTED;
1876  	}
1877  
1878  	xfs_trans_ijoin(sc->tp, sc->ip, 0);
1879  
1880  	/* If we found corruption of any kind, try to fix it. */
1881  	if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) ||
1882  	    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) {
1883  		error = xrep_inode_problems(sc);
1884  		if (error)
1885  			return error;
1886  	}
1887  
1888  	/* See if we can clear the reflink flag. */
1889  	if (xfs_is_reflink_inode(sc->ip)) {
1890  		error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
1891  		if (error)
1892  			return error;
1893  	}
1894  
1895  	/* Reconnect incore unlinked list */
1896  	error = xrep_inode_unlinked(sc);
1897  	if (error)
1898  		return error;
1899  
1900  	return xrep_defer_finish(sc);
1901  }
1902