1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
4   * Author: Darrick J. Wong <djwong@kernel.org>
5   */
6  #include "xfs.h"
7  #include "xfs_fs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_trans_resv.h"
11  #include "xfs_mount.h"
12  #include "xfs_defer.h"
13  #include "xfs_bit.h"
14  #include "xfs_log_format.h"
15  #include "xfs_trans.h"
16  #include "xfs_sb.h"
17  #include "xfs_inode.h"
18  #include "xfs_icache.h"
19  #include "xfs_da_format.h"
20  #include "xfs_da_btree.h"
21  #include "xfs_dir2.h"
22  #include "xfs_dir2_priv.h"
23  #include "xfs_bmap.h"
24  #include "xfs_quota.h"
25  #include "xfs_bmap_btree.h"
26  #include "xfs_trans_space.h"
27  #include "xfs_bmap_util.h"
28  #include "xfs_exchmaps.h"
29  #include "xfs_exchrange.h"
30  #include "xfs_ag.h"
31  #include "xfs_parent.h"
32  #include "scrub/xfs_scrub.h"
33  #include "scrub/scrub.h"
34  #include "scrub/common.h"
35  #include "scrub/trace.h"
36  #include "scrub/repair.h"
37  #include "scrub/tempfile.h"
38  #include "scrub/tempexch.h"
39  #include "scrub/xfile.h"
40  #include "scrub/xfarray.h"
41  #include "scrub/xfblob.h"
42  #include "scrub/iscan.h"
43  #include "scrub/readdir.h"
44  #include "scrub/reap.h"
45  #include "scrub/findparent.h"
46  #include "scrub/orphanage.h"
47  #include "scrub/listxattr.h"
48  
49  /*
50   * Directory Repair
51   * ================
52   *
53   * We repair directories by reading the directory data blocks looking for
54   * directory entries that look salvageable (name passes verifiers, entry points
55   * to a valid allocated inode, etc).  Each entry worth salvaging is stashed in
56   * memory, and the stashed entries are periodically replayed into a temporary
57   * directory to constrain memory use.  Batching the construction of the
58   * temporary directory in this fashion reduces lock cycling of the directory
59   * being repaired and the temporary directory, and will later become important
60   * for parent pointer scanning.
61   *
62   * If parent pointers are enabled on this filesystem, we instead reconstruct
63   * the directory by visiting each parent pointer of each file in the filesystem
64   * and translating the relevant parent pointer records into dirents.  In this
65   * case, it is advantageous to stash all directory entries created from parent
66   * pointers for a single child file before replaying them into the temporary
67   * directory.  To save memory, the live filesystem scan reuses the findparent
68   * fields.  Directory repair chooses either parent pointer scanning or
69   * directory entry salvaging, but not both.
70   *
71   * Directory entries added to the temporary directory do not elevate the link
72   * counts of the inodes found.  When salvaging completes, the remaining stashed
73   * entries are replayed to the temporary directory.  An atomic mapping exchange
74   * is used to commit the new directory blocks to the directory being repaired.
75   * This will disrupt readdir cursors.
76   *
77   * Locking Issues
78   * --------------
79   *
80   * If /a, /a/b, and /c are all directories, the VFS does not take i_rwsem on
81   * /a/b for a "mv /a/b /c/" operation.  This means that only b's ILOCK protects
82   * b's dotdot update.  This is in contrast to every other dotdot update (link,
83   * remove, mkdir).  If the repair code drops the ILOCK, it must either
84   * revalidate the dotdot entry or use dirent hooks to capture updates from
85   * other threads.
86   */
87  
88  /* Create a dirent in the tempdir. */
89  #define XREP_DIRENT_ADD		(1)
90  
91  /* Remove a dirent from the tempdir. */
92  #define XREP_DIRENT_REMOVE	(2)
93  
94  /* Directory entry to be restored in the new directory. */
95  struct xrep_dirent {
96  	/* Cookie for retrieval of the dirent name. */
97  	xfblob_cookie		name_cookie;
98  
99  	/* Target inode number. */
100  	xfs_ino_t		ino;
101  
102  	/* Length of the dirent name. */
103  	uint8_t			namelen;
104  
105  	/* File type of the dirent. */
106  	uint8_t			ftype;
107  
108  	/* XREP_DIRENT_{ADD,REMOVE} */
109  	uint8_t			action;
110  };
111  
112  /*
113   * Stash up to 8 pages of recovered dirent data in dir_entries and dir_names
114   * before we write them to the temp dir.
115   */
116  #define XREP_DIR_MAX_STASH_BYTES	(PAGE_SIZE * 8)
117  
118  struct xrep_dir {
119  	struct xfs_scrub	*sc;
120  
121  	/* Fixed-size array of xrep_dirent structures. */
122  	struct xfarray		*dir_entries;
123  
124  	/* Blobs containing directory entry names. */
125  	struct xfblob		*dir_names;
126  
127  	/* Information for exchanging data forks at the end. */
128  	struct xrep_tempexch	tx;
129  
130  	/* Preallocated args struct for performing dir operations */
131  	struct xfs_da_args	args;
132  
133  	/*
134  	 * Information used to scan the filesystem to find the inumber of the
135  	 * dotdot entry for this directory.  For directory salvaging when
136  	 * parent pointers are not enabled, we use the findparent_* functions
137  	 * on this object and access only the parent_ino field directly.
138  	 *
139  	 * When parent pointers are enabled, however, the pptr scanner uses the
140  	 * iscan, hooks, lock, and parent_ino fields of this object directly.
141  	 * @pscan.lock coordinates access to dir_entries, dir_names,
142  	 * parent_ino, subdirs, dirents, and args.  This reduces the memory
143  	 * requirements of this structure.
144  	 */
145  	struct xrep_parent_scan_info pscan;
146  
147  	/*
148  	 * Context information for attaching this directory to the lost+found
149  	 * if this directory does not have a parent.
150  	 */
151  	struct xrep_adoption	adoption;
152  
153  	/* How many subdirectories did we find? */
154  	uint64_t		subdirs;
155  
156  	/* How many dirents did we find? */
157  	unsigned int		dirents;
158  
159  	/* Should we move this directory to the orphanage? */
160  	bool			needs_adoption;
161  
162  	/* Directory entry name, plus the trailing null. */
163  	struct xfs_name		xname;
164  	unsigned char		namebuf[MAXNAMELEN];
165  };
166  
167  /* Tear down all the incore stuff we created. */
168  static void
xrep_dir_teardown(struct xfs_scrub * sc)169  xrep_dir_teardown(
170  	struct xfs_scrub	*sc)
171  {
172  	struct xrep_dir		*rd = sc->buf;
173  
174  	xrep_findparent_scan_teardown(&rd->pscan);
175  	xfblob_destroy(rd->dir_names);
176  	xfarray_destroy(rd->dir_entries);
177  }
178  
179  /* Set up for a directory repair. */
180  int
xrep_setup_directory(struct xfs_scrub * sc)181  xrep_setup_directory(
182  	struct xfs_scrub	*sc)
183  {
184  	struct xrep_dir		*rd;
185  	int			error;
186  
187  	xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
188  
189  	error = xrep_orphanage_try_create(sc);
190  	if (error)
191  		return error;
192  
193  	error = xrep_tempfile_create(sc, S_IFDIR);
194  	if (error)
195  		return error;
196  
197  	rd = kvzalloc(sizeof(struct xrep_dir), XCHK_GFP_FLAGS);
198  	if (!rd)
199  		return -ENOMEM;
200  	rd->sc = sc;
201  	rd->xname.name = rd->namebuf;
202  	sc->buf = rd;
203  
204  	return 0;
205  }
206  
207  /*
208   * Look up the dotdot entry and confirm that it's really the parent.
209   * Returns NULLFSINO if we don't know what to do.
210   */
211  static inline xfs_ino_t
xrep_dir_lookup_parent(struct xrep_dir * rd)212  xrep_dir_lookup_parent(
213  	struct xrep_dir		*rd)
214  {
215  	struct xfs_scrub	*sc = rd->sc;
216  	xfs_ino_t		ino;
217  	int			error;
218  
219  	error = xfs_dir_lookup(sc->tp, sc->ip, &xfs_name_dotdot, &ino, NULL);
220  	if (error)
221  		return NULLFSINO;
222  	if (!xfs_verify_dir_ino(sc->mp, ino))
223  		return NULLFSINO;
224  
225  	error = xrep_findparent_confirm(sc, &ino);
226  	if (error)
227  		return NULLFSINO;
228  
229  	return ino;
230  }
231  
232  /*
233   * Look up '..' in the dentry cache and confirm that it's really the parent.
234   * Returns NULLFSINO if the dcache misses or if the hit is implausible.
235   */
236  static inline xfs_ino_t
xrep_dir_dcache_parent(struct xrep_dir * rd)237  xrep_dir_dcache_parent(
238  	struct xrep_dir		*rd)
239  {
240  	struct xfs_scrub	*sc = rd->sc;
241  	xfs_ino_t		parent_ino;
242  	int			error;
243  
244  	parent_ino = xrep_findparent_from_dcache(sc);
245  	if (parent_ino == NULLFSINO)
246  		return parent_ino;
247  
248  	error = xrep_findparent_confirm(sc, &parent_ino);
249  	if (error)
250  		return NULLFSINO;
251  
252  	return parent_ino;
253  }
254  
255  /* Try to find the parent of the directory being repaired. */
256  STATIC int
xrep_dir_find_parent(struct xrep_dir * rd)257  xrep_dir_find_parent(
258  	struct xrep_dir		*rd)
259  {
260  	xfs_ino_t		ino;
261  
262  	ino = xrep_findparent_self_reference(rd->sc);
263  	if (ino != NULLFSINO) {
264  		xrep_findparent_scan_finish_early(&rd->pscan, ino);
265  		return 0;
266  	}
267  
268  	ino = xrep_dir_dcache_parent(rd);
269  	if (ino != NULLFSINO) {
270  		xrep_findparent_scan_finish_early(&rd->pscan, ino);
271  		return 0;
272  	}
273  
274  	ino = xrep_dir_lookup_parent(rd);
275  	if (ino != NULLFSINO) {
276  		xrep_findparent_scan_finish_early(&rd->pscan, ino);
277  		return 0;
278  	}
279  
280  	/*
281  	 * A full filesystem scan is the last resort.  On a busy filesystem,
282  	 * the scan can fail with -EBUSY if we cannot grab IOLOCKs.  That means
283  	 * that we don't know what who the parent is, so we should return to
284  	 * userspace.
285  	 */
286  	return xrep_findparent_scan(&rd->pscan);
287  }
288  
289  /*
290   * Decide if we want to salvage this entry.  We don't bother with oversized
291   * names or the dot entry.
292   */
293  STATIC int
xrep_dir_want_salvage(struct xrep_dir * rd,const char * name,int namelen,xfs_ino_t ino)294  xrep_dir_want_salvage(
295  	struct xrep_dir		*rd,
296  	const char		*name,
297  	int			namelen,
298  	xfs_ino_t		ino)
299  {
300  	struct xfs_mount	*mp = rd->sc->mp;
301  
302  	/* No pointers to ourselves or to garbage. */
303  	if (ino == rd->sc->ip->i_ino)
304  		return false;
305  	if (!xfs_verify_dir_ino(mp, ino))
306  		return false;
307  
308  	/* No weird looking names or dot entries. */
309  	if (namelen >= MAXNAMELEN || namelen <= 0)
310  		return false;
311  	if (namelen == 1 && name[0] == '.')
312  		return false;
313  	if (!xfs_dir2_namecheck(name, namelen))
314  		return false;
315  
316  	return true;
317  }
318  
319  /*
320   * Remember that we want to create a dirent in the tempdir.  These stashed
321   * actions will be replayed later.
322   */
323  STATIC int
xrep_dir_stash_createname(struct xrep_dir * rd,const struct xfs_name * name,xfs_ino_t ino)324  xrep_dir_stash_createname(
325  	struct xrep_dir		*rd,
326  	const struct xfs_name	*name,
327  	xfs_ino_t		ino)
328  {
329  	struct xrep_dirent	dirent = {
330  		.action		= XREP_DIRENT_ADD,
331  		.ino		= ino,
332  		.namelen	= name->len,
333  		.ftype		= name->type,
334  	};
335  	int			error;
336  
337  	trace_xrep_dir_stash_createname(rd->sc->tempip, name, ino);
338  
339  	error = xfblob_storename(rd->dir_names, &dirent.name_cookie, name);
340  	if (error)
341  		return error;
342  
343  	return xfarray_append(rd->dir_entries, &dirent);
344  }
345  
346  /*
347   * Remember that we want to remove a dirent from the tempdir.  These stashed
348   * actions will be replayed later.
349   */
350  STATIC int
xrep_dir_stash_removename(struct xrep_dir * rd,const struct xfs_name * name,xfs_ino_t ino)351  xrep_dir_stash_removename(
352  	struct xrep_dir		*rd,
353  	const struct xfs_name	*name,
354  	xfs_ino_t		ino)
355  {
356  	struct xrep_dirent	dirent = {
357  		.action		= XREP_DIRENT_REMOVE,
358  		.ino		= ino,
359  		.namelen	= name->len,
360  		.ftype		= name->type,
361  	};
362  	int			error;
363  
364  	trace_xrep_dir_stash_removename(rd->sc->tempip, name, ino);
365  
366  	error = xfblob_storename(rd->dir_names, &dirent.name_cookie, name);
367  	if (error)
368  		return error;
369  
370  	return xfarray_append(rd->dir_entries, &dirent);
371  }
372  
373  /* Allocate an in-core record to hold entries while we rebuild the dir data. */
374  STATIC int
xrep_dir_salvage_entry(struct xrep_dir * rd,unsigned char * name,unsigned int namelen,xfs_ino_t ino)375  xrep_dir_salvage_entry(
376  	struct xrep_dir		*rd,
377  	unsigned char		*name,
378  	unsigned int		namelen,
379  	xfs_ino_t		ino)
380  {
381  	struct xfs_name		xname = {
382  		.name		= name,
383  	};
384  	struct xfs_scrub	*sc = rd->sc;
385  	struct xfs_inode	*ip;
386  	unsigned int		i = 0;
387  	int			error = 0;
388  
389  	if (xchk_should_terminate(sc, &error))
390  		return error;
391  
392  	/*
393  	 * Truncate the name to the first character that would trip namecheck.
394  	 * If we no longer have a name after that, ignore this entry.
395  	 */
396  	while (i < namelen && name[i] != 0 && name[i] != '/')
397  		i++;
398  	if (i == 0)
399  		return 0;
400  	xname.len = i;
401  
402  	/* Ignore '..' entries; we already picked the new parent. */
403  	if (xname.len == 2 && name[0] == '.' && name[1] == '.') {
404  		trace_xrep_dir_salvaged_parent(sc->ip, ino);
405  		return 0;
406  	}
407  
408  	trace_xrep_dir_salvage_entry(sc->ip, &xname, ino);
409  
410  	/*
411  	 * Compute the ftype or dump the entry if we can't.  We don't lock the
412  	 * inode because inodes can't change type while we have a reference.
413  	 */
414  	error = xchk_iget(sc, ino, &ip);
415  	if (error)
416  		return 0;
417  
418  	xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
419  	xchk_irele(sc, ip);
420  
421  	return xrep_dir_stash_createname(rd, &xname, ino);
422  }
423  
424  /* Record a shortform directory entry for later reinsertion. */
425  STATIC int
xrep_dir_salvage_sf_entry(struct xrep_dir * rd,struct xfs_dir2_sf_hdr * sfp,struct xfs_dir2_sf_entry * sfep)426  xrep_dir_salvage_sf_entry(
427  	struct xrep_dir			*rd,
428  	struct xfs_dir2_sf_hdr		*sfp,
429  	struct xfs_dir2_sf_entry	*sfep)
430  {
431  	xfs_ino_t			ino;
432  
433  	ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep);
434  	if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino))
435  		return 0;
436  
437  	return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino);
438  }
439  
440  /* Record a regular directory entry for later reinsertion. */
441  STATIC int
xrep_dir_salvage_data_entry(struct xrep_dir * rd,struct xfs_dir2_data_entry * dep)442  xrep_dir_salvage_data_entry(
443  	struct xrep_dir			*rd,
444  	struct xfs_dir2_data_entry	*dep)
445  {
446  	xfs_ino_t			ino;
447  
448  	ino = be64_to_cpu(dep->inumber);
449  	if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino))
450  		return 0;
451  
452  	return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino);
453  }
454  
455  /* Try to recover block/data format directory entries. */
456  STATIC int
xrep_dir_recover_data(struct xrep_dir * rd,struct xfs_buf * bp)457  xrep_dir_recover_data(
458  	struct xrep_dir		*rd,
459  	struct xfs_buf		*bp)
460  {
461  	struct xfs_da_geometry	*geo = rd->sc->mp->m_dir_geo;
462  	unsigned int		offset;
463  	unsigned int		end;
464  	int			error = 0;
465  
466  	/*
467  	 * Loop over the data portion of the block.
468  	 * Each object is a real entry (dep) or an unused one (dup).
469  	 */
470  	offset = geo->data_entry_offset;
471  	end = min_t(unsigned int, BBTOB(bp->b_length),
472  			xfs_dir3_data_end_offset(geo, bp->b_addr));
473  
474  	while (offset < end) {
475  		struct xfs_dir2_data_unused	*dup = bp->b_addr + offset;
476  		struct xfs_dir2_data_entry	*dep = bp->b_addr + offset;
477  
478  		if (xchk_should_terminate(rd->sc, &error))
479  			return error;
480  
481  		/* Skip unused entries. */
482  		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
483  			offset += be16_to_cpu(dup->length);
484  			continue;
485  		}
486  
487  		/* Don't walk off the end of the block. */
488  		offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen);
489  		if (offset > end)
490  			break;
491  
492  		/* Ok, let's save this entry. */
493  		error = xrep_dir_salvage_data_entry(rd, dep);
494  		if (error)
495  			return error;
496  
497  	}
498  
499  	return 0;
500  }
501  
502  /* Try to recover shortform directory entries. */
503  STATIC int
xrep_dir_recover_sf(struct xrep_dir * rd)504  xrep_dir_recover_sf(
505  	struct xrep_dir			*rd)
506  {
507  	struct xfs_dir2_sf_hdr		*hdr;
508  	struct xfs_dir2_sf_entry	*sfep;
509  	struct xfs_dir2_sf_entry	*next;
510  	struct xfs_ifork		*ifp;
511  	xfs_ino_t			ino;
512  	unsigned char			*end;
513  	int				error = 0;
514  
515  	ifp = xfs_ifork_ptr(rd->sc->ip, XFS_DATA_FORK);
516  	hdr = ifp->if_data;
517  	end = (unsigned char *)ifp->if_data + ifp->if_bytes;
518  
519  	ino = xfs_dir2_sf_get_parent_ino(hdr);
520  	trace_xrep_dir_salvaged_parent(rd->sc->ip, ino);
521  
522  	sfep = xfs_dir2_sf_firstentry(hdr);
523  	while ((unsigned char *)sfep < end) {
524  		if (xchk_should_terminate(rd->sc, &error))
525  			return error;
526  
527  		next = xfs_dir2_sf_nextentry(rd->sc->mp, hdr, sfep);
528  		if ((unsigned char *)next > end)
529  			break;
530  
531  		/* Ok, let's save this entry. */
532  		error = xrep_dir_salvage_sf_entry(rd, hdr, sfep);
533  		if (error)
534  			return error;
535  
536  		sfep = next;
537  	}
538  
539  	return 0;
540  }
541  
542  /*
543   * Try to figure out the format of this directory from the data fork mappings
544   * and the directory size.  If we can be reasonably sure of format, we can be
545   * more aggressive in salvaging directory entries.  On return, @magic_guess
546   * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format"
547   * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory,
548   * and 0 if we can't tell.
549   */
550  STATIC void
xrep_dir_guess_format(struct xrep_dir * rd,__be32 * magic_guess)551  xrep_dir_guess_format(
552  	struct xrep_dir		*rd,
553  	__be32			*magic_guess)
554  {
555  	struct xfs_inode	*dp = rd->sc->ip;
556  	struct xfs_mount	*mp = rd->sc->mp;
557  	struct xfs_da_geometry	*geo = mp->m_dir_geo;
558  	xfs_fileoff_t		last;
559  	int			error;
560  
561  	ASSERT(xfs_has_crc(mp));
562  
563  	*magic_guess = 0;
564  
565  	/*
566  	 * If there's a single directory block and the directory size is
567  	 * exactly one block, this has to be a single block format directory.
568  	 */
569  	error = xfs_bmap_last_offset(dp, &last, XFS_DATA_FORK);
570  	if (!error && XFS_FSB_TO_B(mp, last) == geo->blksize &&
571  	    dp->i_disk_size == geo->blksize) {
572  		*magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC);
573  		return;
574  	}
575  
576  	/*
577  	 * If the last extent before the leaf offset matches the directory
578  	 * size and the directory size is larger than 1 block, this is a
579  	 * data format directory.
580  	 */
581  	last = geo->leafblk;
582  	error = xfs_bmap_last_before(rd->sc->tp, dp, &last, XFS_DATA_FORK);
583  	if (!error &&
584  	    XFS_FSB_TO_B(mp, last) > geo->blksize &&
585  	    XFS_FSB_TO_B(mp, last) == dp->i_disk_size) {
586  		*magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
587  		return;
588  	}
589  }
590  
591  /* Recover directory entries from a specific directory block. */
592  STATIC int
xrep_dir_recover_dirblock(struct xrep_dir * rd,__be32 magic_guess,xfs_dablk_t dabno)593  xrep_dir_recover_dirblock(
594  	struct xrep_dir		*rd,
595  	__be32			magic_guess,
596  	xfs_dablk_t		dabno)
597  {
598  	struct xfs_dir2_data_hdr *hdr;
599  	struct xfs_buf		*bp;
600  	__be32			oldmagic;
601  	int			error;
602  
603  	/*
604  	 * Try to read buffer.  We invalidate them in the next step so we don't
605  	 * bother to set a buffer type or ops.
606  	 */
607  	error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno,
608  			XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL);
609  	if (error || !bp)
610  		return error;
611  
612  	hdr = bp->b_addr;
613  	oldmagic = hdr->magic;
614  
615  	trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno,
616  			be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess));
617  
618  	/*
619  	 * If we're sure of the block's format, proceed with the salvage
620  	 * operation using the specified magic number.
621  	 */
622  	if (magic_guess) {
623  		hdr->magic = magic_guess;
624  		goto recover;
625  	}
626  
627  	/*
628  	 * If we couldn't guess what type of directory this is, then we will
629  	 * only salvage entries from directory blocks that match the magic
630  	 * number and pass verifiers.
631  	 */
632  	switch (hdr->magic) {
633  	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
634  	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
635  		if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops))
636  			goto out;
637  		if (xfs_dir3_block_header_check(bp, rd->sc->ip->i_ino) != NULL)
638  			goto out;
639  		break;
640  	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
641  	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
642  		if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops))
643  			goto out;
644  		if (xfs_dir3_data_header_check(bp, rd->sc->ip->i_ino) != NULL)
645  			goto out;
646  		break;
647  	default:
648  		goto out;
649  	}
650  
651  recover:
652  	error = xrep_dir_recover_data(rd, bp);
653  
654  out:
655  	hdr->magic = oldmagic;
656  	xfs_trans_brelse(rd->sc->tp, bp);
657  	return error;
658  }
659  
660  static inline void
xrep_dir_init_args(struct xrep_dir * rd,struct xfs_inode * dp,const struct xfs_name * name)661  xrep_dir_init_args(
662  	struct xrep_dir		*rd,
663  	struct xfs_inode	*dp,
664  	const struct xfs_name	*name)
665  {
666  	memset(&rd->args, 0, sizeof(struct xfs_da_args));
667  	rd->args.geo = rd->sc->mp->m_dir_geo;
668  	rd->args.whichfork = XFS_DATA_FORK;
669  	rd->args.owner = rd->sc->ip->i_ino;
670  	rd->args.trans = rd->sc->tp;
671  	rd->args.dp = dp;
672  	if (!name)
673  		return;
674  	rd->args.name = name->name;
675  	rd->args.namelen = name->len;
676  	rd->args.filetype = name->type;
677  	rd->args.hashval = xfs_dir2_hashname(rd->sc->mp, name);
678  }
679  
680  /* Replay a stashed createname into the temporary directory. */
681  STATIC int
xrep_dir_replay_createname(struct xrep_dir * rd,const struct xfs_name * name,xfs_ino_t inum,xfs_extlen_t total)682  xrep_dir_replay_createname(
683  	struct xrep_dir		*rd,
684  	const struct xfs_name	*name,
685  	xfs_ino_t		inum,
686  	xfs_extlen_t		total)
687  {
688  	struct xfs_scrub	*sc = rd->sc;
689  	struct xfs_inode	*dp = rd->sc->tempip;
690  	int			error;
691  
692  	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
693  
694  	error = xfs_dir_ino_validate(sc->mp, inum);
695  	if (error)
696  		return error;
697  
698  	trace_xrep_dir_replay_createname(dp, name, inum);
699  
700  	xrep_dir_init_args(rd, dp, name);
701  	rd->args.inumber = inum;
702  	rd->args.total = total;
703  	rd->args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
704  	return xfs_dir_createname_args(&rd->args);
705  }
706  
707  /* Replay a stashed removename onto the temporary directory. */
708  STATIC int
xrep_dir_replay_removename(struct xrep_dir * rd,const struct xfs_name * name,xfs_extlen_t total)709  xrep_dir_replay_removename(
710  	struct xrep_dir		*rd,
711  	const struct xfs_name	*name,
712  	xfs_extlen_t		total)
713  {
714  	struct xfs_inode	*dp = rd->args.dp;
715  
716  	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
717  
718  	xrep_dir_init_args(rd, dp, name);
719  	rd->args.op_flags = 0;
720  	rd->args.total = total;
721  
722  	trace_xrep_dir_replay_removename(dp, name, 0);
723  	return xfs_dir_removename_args(&rd->args);
724  }
725  
726  /*
727   * Add this stashed incore directory entry to the temporary directory.
728   * The caller must hold the tempdir's IOLOCK, must not hold any ILOCKs, and
729   * must not be in transaction context.
730   */
731  STATIC int
xrep_dir_replay_update(struct xrep_dir * rd,const struct xfs_name * xname,const struct xrep_dirent * dirent)732  xrep_dir_replay_update(
733  	struct xrep_dir			*rd,
734  	const struct xfs_name		*xname,
735  	const struct xrep_dirent	*dirent)
736  {
737  	struct xfs_mount		*mp = rd->sc->mp;
738  #ifdef DEBUG
739  	xfs_ino_t			ino;
740  #endif
741  	uint				resblks;
742  	int				error;
743  
744  	resblks = xfs_link_space_res(mp, xname->len);
745  	error = xchk_trans_alloc(rd->sc, resblks);
746  	if (error)
747  		return error;
748  
749  	/* Lock the temporary directory and join it to the transaction */
750  	xrep_tempfile_ilock(rd->sc);
751  	xfs_trans_ijoin(rd->sc->tp, rd->sc->tempip, 0);
752  
753  	switch (dirent->action) {
754  	case XREP_DIRENT_ADD:
755  		/*
756  		 * Create a replacement dirent in the temporary directory.
757  		 * Note that _createname doesn't check for existing entries.
758  		 * There shouldn't be any in the temporary dir, but we'll
759  		 * verify this in debug mode.
760  		 */
761  #ifdef DEBUG
762  		error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino);
763  		if (error != -ENOENT) {
764  			ASSERT(error != -ENOENT);
765  			goto out_cancel;
766  		}
767  #endif
768  
769  		error = xrep_dir_replay_createname(rd, xname, dirent->ino,
770  				resblks);
771  		if (error)
772  			goto out_cancel;
773  
774  		if (xname->type == XFS_DIR3_FT_DIR)
775  			rd->subdirs++;
776  		rd->dirents++;
777  		break;
778  	case XREP_DIRENT_REMOVE:
779  		/*
780  		 * Remove a dirent from the temporary directory.  Note that
781  		 * _removename doesn't check the inode target of the exist
782  		 * entry.  There should be a perfect match in the temporary
783  		 * dir, but we'll verify this in debug mode.
784  		 */
785  #ifdef DEBUG
786  		error = xchk_dir_lookup(rd->sc, rd->sc->tempip, xname, &ino);
787  		if (error) {
788  			ASSERT(error != 0);
789  			goto out_cancel;
790  		}
791  		if (ino != dirent->ino) {
792  			ASSERT(ino == dirent->ino);
793  			error = -EIO;
794  			goto out_cancel;
795  		}
796  #endif
797  
798  		error = xrep_dir_replay_removename(rd, xname, resblks);
799  		if (error)
800  			goto out_cancel;
801  
802  		if (xname->type == XFS_DIR3_FT_DIR)
803  			rd->subdirs--;
804  		rd->dirents--;
805  		break;
806  	default:
807  		ASSERT(0);
808  		error = -EIO;
809  		goto out_cancel;
810  	}
811  
812  	/* Commit and unlock. */
813  	error = xrep_trans_commit(rd->sc);
814  	if (error)
815  		return error;
816  
817  	xrep_tempfile_iunlock(rd->sc);
818  	return 0;
819  out_cancel:
820  	xchk_trans_cancel(rd->sc);
821  	xrep_tempfile_iunlock(rd->sc);
822  	return error;
823  }
824  
825  /*
826   * Flush stashed incore dirent updates that have been recorded by the scanner.
827   * This is done to reduce the memory requirements of the directory rebuild,
828   * since directories can contain up to 32GB of directory data.
829   *
830   * Caller must not hold transactions or ILOCKs.  Caller must hold the tempdir
831   * IOLOCK.
832   */
833  STATIC int
xrep_dir_replay_updates(struct xrep_dir * rd)834  xrep_dir_replay_updates(
835  	struct xrep_dir		*rd)
836  {
837  	xfarray_idx_t		array_cur;
838  	int			error;
839  
840  	/* Add all the salvaged dirents to the temporary directory. */
841  	mutex_lock(&rd->pscan.lock);
842  	foreach_xfarray_idx(rd->dir_entries, array_cur) {
843  		struct xrep_dirent	dirent;
844  
845  		error = xfarray_load(rd->dir_entries, array_cur, &dirent);
846  		if (error)
847  			goto out_unlock;
848  
849  		error = xfblob_loadname(rd->dir_names, dirent.name_cookie,
850  				&rd->xname, dirent.namelen);
851  		if (error)
852  			goto out_unlock;
853  		rd->xname.type = dirent.ftype;
854  		mutex_unlock(&rd->pscan.lock);
855  
856  		error = xrep_dir_replay_update(rd, &rd->xname, &dirent);
857  		if (error)
858  			return error;
859  		mutex_lock(&rd->pscan.lock);
860  	}
861  
862  	/* Empty out both arrays now that we've added the entries. */
863  	xfarray_truncate(rd->dir_entries);
864  	xfblob_truncate(rd->dir_names);
865  	mutex_unlock(&rd->pscan.lock);
866  	return 0;
867  out_unlock:
868  	mutex_unlock(&rd->pscan.lock);
869  	return error;
870  }
871  
872  /*
873   * Periodically flush stashed directory entries to the temporary dir.  This
874   * is done to reduce the memory requirements of the directory rebuild, since
875   * directories can contain up to 32GB of directory data.
876   */
877  STATIC int
xrep_dir_flush_stashed(struct xrep_dir * rd)878  xrep_dir_flush_stashed(
879  	struct xrep_dir		*rd)
880  {
881  	int			error;
882  
883  	/*
884  	 * Entering this function, the scrub context has a reference to the
885  	 * inode being repaired, the temporary file, and a scrub transaction
886  	 * that we use during dirent salvaging to avoid livelocking if there
887  	 * are cycles in the directory structures.  We hold ILOCK_EXCL on both
888  	 * the inode being repaired and the temporary file, though they are
889  	 * not ijoined to the scrub transaction.
890  	 *
891  	 * To constrain kernel memory use, we occasionally write salvaged
892  	 * dirents from the xfarray and xfblob structures into the temporary
893  	 * directory in preparation for exchanging the directory structures at
894  	 * the end.  Updating the temporary file requires a transaction, so we
895  	 * commit the scrub transaction and drop the two ILOCKs so that
896  	 * we can allocate whatever transaction we want.
897  	 *
898  	 * We still hold IOLOCK_EXCL on the inode being repaired, which
899  	 * prevents anyone from accessing the damaged directory data while we
900  	 * repair it.
901  	 */
902  	error = xrep_trans_commit(rd->sc);
903  	if (error)
904  		return error;
905  	xchk_iunlock(rd->sc, XFS_ILOCK_EXCL);
906  
907  	/*
908  	 * Take the IOLOCK of the temporary file while we modify dirents.  This
909  	 * isn't strictly required because the temporary file is never revealed
910  	 * to userspace, but we follow the same locking rules.  We still hold
911  	 * sc->ip's IOLOCK.
912  	 */
913  	error = xrep_tempfile_iolock_polled(rd->sc);
914  	if (error)
915  		return error;
916  
917  	/* Write to the tempdir all the updates that we've stashed. */
918  	error = xrep_dir_replay_updates(rd);
919  	xrep_tempfile_iounlock(rd->sc);
920  	if (error)
921  		return error;
922  
923  	/*
924  	 * Recreate the salvage transaction and relock the dir we're salvaging.
925  	 */
926  	error = xchk_trans_alloc(rd->sc, 0);
927  	if (error)
928  		return error;
929  	xchk_ilock(rd->sc, XFS_ILOCK_EXCL);
930  	return 0;
931  }
932  
933  /* Decide if we've stashed too much dirent data in memory. */
934  static inline bool
xrep_dir_want_flush_stashed(struct xrep_dir * rd)935  xrep_dir_want_flush_stashed(
936  	struct xrep_dir		*rd)
937  {
938  	unsigned long long	bytes;
939  
940  	bytes = xfarray_bytes(rd->dir_entries) + xfblob_bytes(rd->dir_names);
941  	return bytes > XREP_DIR_MAX_STASH_BYTES;
942  }
943  
944  /* Extract as many directory entries as we can. */
945  STATIC int
xrep_dir_recover(struct xrep_dir * rd)946  xrep_dir_recover(
947  	struct xrep_dir		*rd)
948  {
949  	struct xfs_bmbt_irec	got;
950  	struct xfs_scrub	*sc = rd->sc;
951  	struct xfs_da_geometry	*geo = sc->mp->m_dir_geo;
952  	xfs_fileoff_t		offset;
953  	xfs_dablk_t		dabno;
954  	__be32			magic_guess;
955  	int			nmap;
956  	int			error;
957  
958  	xrep_dir_guess_format(rd, &magic_guess);
959  
960  	/* Iterate each directory data block in the data fork. */
961  	for (offset = 0;
962  	     offset < geo->leafblk;
963  	     offset = got.br_startoff + got.br_blockcount) {
964  		nmap = 1;
965  		error = xfs_bmapi_read(sc->ip, offset, geo->leafblk - offset,
966  				&got, &nmap, 0);
967  		if (error)
968  			return error;
969  		if (nmap != 1)
970  			return -EFSCORRUPTED;
971  		if (!xfs_bmap_is_written_extent(&got))
972  			continue;
973  
974  		for (dabno = round_up(got.br_startoff, geo->fsbcount);
975  		     dabno < got.br_startoff + got.br_blockcount;
976  		     dabno += geo->fsbcount) {
977  			if (xchk_should_terminate(rd->sc, &error))
978  				return error;
979  
980  			error = xrep_dir_recover_dirblock(rd,
981  					magic_guess, dabno);
982  			if (error)
983  				return error;
984  
985  			/* Flush dirents to constrain memory usage. */
986  			if (xrep_dir_want_flush_stashed(rd)) {
987  				error = xrep_dir_flush_stashed(rd);
988  				if (error)
989  					return error;
990  			}
991  		}
992  	}
993  
994  	return 0;
995  }
996  
997  /*
998   * Find all the directory entries for this inode by scraping them out of the
999   * directory leaf blocks by hand, and flushing them into the temp dir.
1000   */
1001  STATIC int
xrep_dir_find_entries(struct xrep_dir * rd)1002  xrep_dir_find_entries(
1003  	struct xrep_dir		*rd)
1004  {
1005  	struct xfs_inode	*dp = rd->sc->ip;
1006  	int			error;
1007  
1008  	/*
1009  	 * Salvage directory entries from the old directory, and write them to
1010  	 * the temporary directory.
1011  	 */
1012  	if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
1013  		error = xrep_dir_recover_sf(rd);
1014  	} else {
1015  		error = xfs_iread_extents(rd->sc->tp, dp, XFS_DATA_FORK);
1016  		if (error)
1017  			return error;
1018  
1019  		error = xrep_dir_recover(rd);
1020  	}
1021  	if (error)
1022  		return error;
1023  
1024  	return xrep_dir_flush_stashed(rd);
1025  }
1026  
1027  /* Scan all files in the filesystem for dirents. */
1028  STATIC int
xrep_dir_salvage_entries(struct xrep_dir * rd)1029  xrep_dir_salvage_entries(
1030  	struct xrep_dir		*rd)
1031  {
1032  	struct xfs_scrub	*sc = rd->sc;
1033  	int			error;
1034  
1035  	/*
1036  	 * Drop the ILOCK on this directory so that we can scan for this
1037  	 * directory's parent.  Figure out who is going to be the parent of
1038  	 * this directory, then retake the ILOCK so that we can salvage
1039  	 * directory entries.
1040  	 */
1041  	xchk_iunlock(sc, XFS_ILOCK_EXCL);
1042  	error = xrep_dir_find_parent(rd);
1043  	xchk_ilock(sc, XFS_ILOCK_EXCL);
1044  	if (error)
1045  		return error;
1046  
1047  	/*
1048  	 * Collect directory entries by parsing raw leaf blocks to salvage
1049  	 * whatever we can.  When we're done, free the staging memory before
1050  	 * exchanging the directories to reduce memory usage.
1051  	 */
1052  	error = xrep_dir_find_entries(rd);
1053  	if (error)
1054  		return error;
1055  
1056  	/*
1057  	 * Cancel the repair transaction and drop the ILOCK so that we can
1058  	 * (later) use the atomic mapping exchange functions to compute the
1059  	 * correct block reservations and re-lock the inodes.
1060  	 *
1061  	 * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent directory
1062  	 * modifications, but there's nothing to prevent userspace from reading
1063  	 * the directory until we're ready for the exchange operation.  Reads
1064  	 * will return -EIO without shutting down the fs, so we're ok with
1065  	 * that.
1066  	 *
1067  	 * The VFS can change dotdot on us, but the findparent scan will keep
1068  	 * our incore parent inode up to date.  See the note on locking issues
1069  	 * for more details.
1070  	 */
1071  	error = xrep_trans_commit(sc);
1072  	if (error)
1073  		return error;
1074  
1075  	xchk_iunlock(sc, XFS_ILOCK_EXCL);
1076  	return 0;
1077  }
1078  
1079  
1080  /*
1081   * Examine a parent pointer of a file.  If it leads us back to the directory
1082   * that we're rebuilding, create an incore dirent from the parent pointer and
1083   * stash it.
1084   */
1085  STATIC int
xrep_dir_scan_pptr(struct xfs_scrub * sc,struct xfs_inode * ip,unsigned int attr_flags,const unsigned char * name,unsigned int namelen,const void * value,unsigned int valuelen,void * priv)1086  xrep_dir_scan_pptr(
1087  	struct xfs_scrub		*sc,
1088  	struct xfs_inode		*ip,
1089  	unsigned int			attr_flags,
1090  	const unsigned char		*name,
1091  	unsigned int			namelen,
1092  	const void			*value,
1093  	unsigned int			valuelen,
1094  	void				*priv)
1095  {
1096  	struct xfs_name			xname = {
1097  		.name			= name,
1098  		.len			= namelen,
1099  		.type			= xfs_mode_to_ftype(VFS_I(ip)->i_mode),
1100  	};
1101  	xfs_ino_t			parent_ino;
1102  	uint32_t			parent_gen;
1103  	struct xrep_dir			*rd = priv;
1104  	int				error;
1105  
1106  	if (!(attr_flags & XFS_ATTR_PARENT))
1107  		return 0;
1108  
1109  	/*
1110  	 * Ignore parent pointers that point back to a different dir, list the
1111  	 * wrong generation number, or are invalid.
1112  	 */
1113  	error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value,
1114  			valuelen, &parent_ino, &parent_gen);
1115  	if (error)
1116  		return error;
1117  
1118  	if (parent_ino != sc->ip->i_ino ||
1119  	    parent_gen != VFS_I(sc->ip)->i_generation)
1120  		return 0;
1121  
1122  	mutex_lock(&rd->pscan.lock);
1123  	error = xrep_dir_stash_createname(rd, &xname, ip->i_ino);
1124  	mutex_unlock(&rd->pscan.lock);
1125  	return error;
1126  }
1127  
1128  /*
1129   * If this child dirent points to the directory being repaired, remember that
1130   * fact so that we can reset the dotdot entry if necessary.
1131   */
1132  STATIC int
xrep_dir_scan_dirent(struct xfs_scrub * sc,struct xfs_inode * dp,xfs_dir2_dataptr_t dapos,const struct xfs_name * name,xfs_ino_t ino,void * priv)1133  xrep_dir_scan_dirent(
1134  	struct xfs_scrub	*sc,
1135  	struct xfs_inode	*dp,
1136  	xfs_dir2_dataptr_t	dapos,
1137  	const struct xfs_name	*name,
1138  	xfs_ino_t		ino,
1139  	void			*priv)
1140  {
1141  	struct xrep_dir		*rd = priv;
1142  
1143  	/* Dirent doesn't point to this directory. */
1144  	if (ino != rd->sc->ip->i_ino)
1145  		return 0;
1146  
1147  	/* Ignore garbage inum. */
1148  	if (!xfs_verify_dir_ino(rd->sc->mp, ino))
1149  		return 0;
1150  
1151  	/* No weird looking names. */
1152  	if (name->len >= MAXNAMELEN || name->len <= 0)
1153  		return 0;
1154  
1155  	/* Don't pick up dot or dotdot entries; we only want child dirents. */
1156  	if (xfs_dir2_samename(name, &xfs_name_dotdot) ||
1157  	    xfs_dir2_samename(name, &xfs_name_dot))
1158  		return 0;
1159  
1160  	trace_xrep_dir_stash_createname(sc->tempip, &xfs_name_dotdot,
1161  			dp->i_ino);
1162  
1163  	xrep_findparent_scan_found(&rd->pscan, dp->i_ino);
1164  	return 0;
1165  }
1166  
1167  /*
1168   * Decide if we want to look for child dirents or parent pointers in this file.
1169   * Skip the dir being repaired and any files being used to stage repairs.
1170   */
1171  static inline bool
xrep_dir_want_scan(struct xrep_dir * rd,const struct xfs_inode * ip)1172  xrep_dir_want_scan(
1173  	struct xrep_dir		*rd,
1174  	const struct xfs_inode	*ip)
1175  {
1176  	return ip != rd->sc->ip && !xrep_is_tempfile(ip);
1177  }
1178  
1179  /*
1180   * Take ILOCK on a file that we want to scan.
1181   *
1182   * Select ILOCK_EXCL if the file is a directory with an unloaded data bmbt or
1183   * has an unloaded attr bmbt.  Otherwise, take ILOCK_SHARED.
1184   */
1185  static inline unsigned int
xrep_dir_scan_ilock(struct xrep_dir * rd,struct xfs_inode * ip)1186  xrep_dir_scan_ilock(
1187  	struct xrep_dir		*rd,
1188  	struct xfs_inode	*ip)
1189  {
1190  	uint			lock_mode = XFS_ILOCK_SHARED;
1191  
1192  	/* Need to take the shared ILOCK to advance the iscan cursor. */
1193  	if (!xrep_dir_want_scan(rd, ip))
1194  		goto lock;
1195  
1196  	if (S_ISDIR(VFS_I(ip)->i_mode) && xfs_need_iread_extents(&ip->i_df)) {
1197  		lock_mode = XFS_ILOCK_EXCL;
1198  		goto lock;
1199  	}
1200  
1201  	if (xfs_inode_has_attr_fork(ip) && xfs_need_iread_extents(&ip->i_af))
1202  		lock_mode = XFS_ILOCK_EXCL;
1203  
1204  lock:
1205  	xfs_ilock(ip, lock_mode);
1206  	return lock_mode;
1207  }
1208  
1209  /*
1210   * Scan this file for relevant child dirents or parent pointers that point to
1211   * the directory we're rebuilding.
1212   */
1213  STATIC int
xrep_dir_scan_file(struct xrep_dir * rd,struct xfs_inode * ip)1214  xrep_dir_scan_file(
1215  	struct xrep_dir		*rd,
1216  	struct xfs_inode	*ip)
1217  {
1218  	unsigned int		lock_mode;
1219  	int			error = 0;
1220  
1221  	lock_mode = xrep_dir_scan_ilock(rd, ip);
1222  
1223  	if (!xrep_dir_want_scan(rd, ip))
1224  		goto scan_done;
1225  
1226  	/*
1227  	 * If the extended attributes look as though they has been zapped by
1228  	 * the inode record repair code, we cannot scan for parent pointers.
1229  	 */
1230  	if (xchk_pptr_looks_zapped(ip)) {
1231  		error = -EBUSY;
1232  		goto scan_done;
1233  	}
1234  
1235  	error = xchk_xattr_walk(rd->sc, ip, xrep_dir_scan_pptr, NULL, rd);
1236  	if (error)
1237  		goto scan_done;
1238  
1239  	if (S_ISDIR(VFS_I(ip)->i_mode)) {
1240  		/*
1241  		 * If the directory looks as though it has been zapped by the
1242  		 * inode record repair code, we cannot scan for child dirents.
1243  		 */
1244  		if (xchk_dir_looks_zapped(ip)) {
1245  			error = -EBUSY;
1246  			goto scan_done;
1247  		}
1248  
1249  		error = xchk_dir_walk(rd->sc, ip, xrep_dir_scan_dirent, rd);
1250  		if (error)
1251  			goto scan_done;
1252  	}
1253  
1254  scan_done:
1255  	xchk_iscan_mark_visited(&rd->pscan.iscan, ip);
1256  	xfs_iunlock(ip, lock_mode);
1257  	return error;
1258  }
1259  
1260  /*
1261   * Scan all files in the filesystem for parent pointers that we can turn into
1262   * replacement dirents, and a dirent that we can use to set the dotdot pointer.
1263   */
1264  STATIC int
xrep_dir_scan_dirtree(struct xrep_dir * rd)1265  xrep_dir_scan_dirtree(
1266  	struct xrep_dir		*rd)
1267  {
1268  	struct xfs_scrub	*sc = rd->sc;
1269  	struct xfs_inode	*ip;
1270  	int			error;
1271  
1272  	/* Roots of directory trees are their own parents. */
1273  	if (sc->ip == sc->mp->m_rootip)
1274  		xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
1275  
1276  	/*
1277  	 * Filesystem scans are time consuming.  Drop the directory ILOCK and
1278  	 * all other resources for the duration of the scan and hope for the
1279  	 * best.  The live update hooks will keep our scan information up to
1280  	 * date even though we've dropped the locks.
1281  	 */
1282  	xchk_trans_cancel(sc);
1283  	if (sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))
1284  		xchk_iunlock(sc, sc->ilock_flags & (XFS_ILOCK_SHARED |
1285  						    XFS_ILOCK_EXCL));
1286  	error = xchk_trans_alloc_empty(sc);
1287  	if (error)
1288  		return error;
1289  
1290  	while ((error = xchk_iscan_iter(&rd->pscan.iscan, &ip)) == 1) {
1291  		bool		flush;
1292  
1293  		error = xrep_dir_scan_file(rd, ip);
1294  		xchk_irele(sc, ip);
1295  		if (error)
1296  			break;
1297  
1298  		/* Flush stashed dirent updates to constrain memory usage. */
1299  		mutex_lock(&rd->pscan.lock);
1300  		flush = xrep_dir_want_flush_stashed(rd);
1301  		mutex_unlock(&rd->pscan.lock);
1302  		if (flush) {
1303  			xchk_trans_cancel(sc);
1304  
1305  			error = xrep_tempfile_iolock_polled(sc);
1306  			if (error)
1307  				break;
1308  
1309  			error = xrep_dir_replay_updates(rd);
1310  			xrep_tempfile_iounlock(sc);
1311  			if (error)
1312  				break;
1313  
1314  			error = xchk_trans_alloc_empty(sc);
1315  			if (error)
1316  				break;
1317  		}
1318  
1319  		if (xchk_should_terminate(sc, &error))
1320  			break;
1321  	}
1322  	xchk_iscan_iter_finish(&rd->pscan.iscan);
1323  	if (error) {
1324  		/*
1325  		 * If we couldn't grab an inode that was busy with a state
1326  		 * change, change the error code so that we exit to userspace
1327  		 * as quickly as possible.
1328  		 */
1329  		if (error == -EBUSY)
1330  			return -ECANCELED;
1331  		return error;
1332  	}
1333  
1334  	/*
1335  	 * Cancel the empty transaction so that we can (later) use the atomic
1336  	 * file mapping exchange functions to lock files and commit the new
1337  	 * directory.
1338  	 */
1339  	xchk_trans_cancel(rd->sc);
1340  	return 0;
1341  }
1342  
1343  /*
1344   * Capture dirent updates being made by other threads which are relevant to the
1345   * directory being repaired.
1346   */
1347  STATIC int
xrep_dir_live_update(struct notifier_block * nb,unsigned long action,void * data)1348  xrep_dir_live_update(
1349  	struct notifier_block		*nb,
1350  	unsigned long			action,
1351  	void				*data)
1352  {
1353  	struct xfs_dir_update_params	*p = data;
1354  	struct xrep_dir			*rd;
1355  	struct xfs_scrub		*sc;
1356  	int				error = 0;
1357  
1358  	rd = container_of(nb, struct xrep_dir, pscan.dhook.dirent_hook.nb);
1359  	sc = rd->sc;
1360  
1361  	/*
1362  	 * This thread updated a child dirent in the directory that we're
1363  	 * rebuilding.  Stash the update for replay against the temporary
1364  	 * directory.
1365  	 */
1366  	if (p->dp->i_ino == sc->ip->i_ino &&
1367  	    xchk_iscan_want_live_update(&rd->pscan.iscan, p->ip->i_ino)) {
1368  		mutex_lock(&rd->pscan.lock);
1369  		if (p->delta > 0)
1370  			error = xrep_dir_stash_createname(rd, p->name,
1371  					p->ip->i_ino);
1372  		else
1373  			error = xrep_dir_stash_removename(rd, p->name,
1374  					p->ip->i_ino);
1375  		mutex_unlock(&rd->pscan.lock);
1376  		if (error)
1377  			goto out_abort;
1378  	}
1379  
1380  	/*
1381  	 * This thread updated another directory's child dirent that points to
1382  	 * the directory that we're rebuilding, so remember the new dotdot
1383  	 * target.
1384  	 */
1385  	if (p->ip->i_ino == sc->ip->i_ino &&
1386  	    xchk_iscan_want_live_update(&rd->pscan.iscan, p->dp->i_ino)) {
1387  		if (p->delta > 0) {
1388  			trace_xrep_dir_stash_createname(sc->tempip,
1389  					&xfs_name_dotdot,
1390  					p->dp->i_ino);
1391  
1392  			xrep_findparent_scan_found(&rd->pscan, p->dp->i_ino);
1393  		} else {
1394  			trace_xrep_dir_stash_removename(sc->tempip,
1395  					&xfs_name_dotdot,
1396  					rd->pscan.parent_ino);
1397  
1398  			xrep_findparent_scan_found(&rd->pscan, NULLFSINO);
1399  		}
1400  	}
1401  
1402  	return NOTIFY_DONE;
1403  out_abort:
1404  	xchk_iscan_abort(&rd->pscan.iscan);
1405  	return NOTIFY_DONE;
1406  }
1407  
1408  /*
1409   * Free all the directory blocks and reset the data fork.  The caller must
1410   * join the inode to the transaction.  This function returns with the inode
1411   * joined to a clean scrub transaction.
1412   */
1413  STATIC int
xrep_dir_reset_fork(struct xrep_dir * rd,xfs_ino_t parent_ino)1414  xrep_dir_reset_fork(
1415  	struct xrep_dir		*rd,
1416  	xfs_ino_t		parent_ino)
1417  {
1418  	struct xfs_scrub	*sc = rd->sc;
1419  	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->tempip, XFS_DATA_FORK);
1420  	int			error;
1421  
1422  	/* Unmap all the directory buffers. */
1423  	if (xfs_ifork_has_extents(ifp)) {
1424  		error = xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
1425  		if (error)
1426  			return error;
1427  	}
1428  
1429  	trace_xrep_dir_reset_fork(sc->tempip, parent_ino);
1430  
1431  	/* Reset the data fork to an empty data fork. */
1432  	xfs_idestroy_fork(ifp);
1433  	ifp->if_bytes = 0;
1434  	sc->tempip->i_disk_size = 0;
1435  
1436  	/* Reinitialize the short form directory. */
1437  	xrep_dir_init_args(rd, sc->tempip, NULL);
1438  	return xfs_dir2_sf_create(&rd->args, parent_ino);
1439  }
1440  
1441  /*
1442   * Prepare both inodes' directory forks for exchanging mappings.  Promote the
1443   * tempfile from short format to leaf format, and if the file being repaired
1444   * has a short format data fork, turn it into an empty extent list.
1445   */
1446  STATIC int
xrep_dir_swap_prep(struct xfs_scrub * sc,bool temp_local,bool ip_local)1447  xrep_dir_swap_prep(
1448  	struct xfs_scrub	*sc,
1449  	bool			temp_local,
1450  	bool			ip_local)
1451  {
1452  	int			error;
1453  
1454  	/*
1455  	 * If the tempfile's directory is in shortform format, convert that to
1456  	 * a single leaf extent so that we can use the atomic mapping exchange.
1457  	 */
1458  	if (temp_local) {
1459  		struct xfs_da_args	args = {
1460  			.dp		= sc->tempip,
1461  			.geo		= sc->mp->m_dir_geo,
1462  			.whichfork	= XFS_DATA_FORK,
1463  			.trans		= sc->tp,
1464  			.total		= 1,
1465  			.owner		= sc->ip->i_ino,
1466  		};
1467  
1468  		error = xfs_dir2_sf_to_block(&args);
1469  		if (error)
1470  			return error;
1471  
1472  		/*
1473  		 * Roll the deferred log items to get us back to a clean
1474  		 * transaction.
1475  		 */
1476  		error = xfs_defer_finish(&sc->tp);
1477  		if (error)
1478  			return error;
1479  	}
1480  
1481  	/*
1482  	 * If the file being repaired had a shortform data fork, convert that
1483  	 * to an empty extent list in preparation for the atomic mapping
1484  	 * exchange.
1485  	 */
1486  	if (ip_local) {
1487  		struct xfs_ifork	*ifp;
1488  
1489  		ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
1490  		xfs_idestroy_fork(ifp);
1491  		ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1492  		ifp->if_nextents = 0;
1493  		ifp->if_bytes = 0;
1494  		ifp->if_data = NULL;
1495  		ifp->if_height = 0;
1496  
1497  		xfs_trans_log_inode(sc->tp, sc->ip,
1498  				XFS_ILOG_CORE | XFS_ILOG_DDATA);
1499  	}
1500  
1501  	return 0;
1502  }
1503  
1504  /*
1505   * Replace the inode number of a directory entry.
1506   */
1507  static int
xrep_dir_replace(struct xrep_dir * rd,struct xfs_inode * dp,const struct xfs_name * name,xfs_ino_t inum,xfs_extlen_t total)1508  xrep_dir_replace(
1509  	struct xrep_dir		*rd,
1510  	struct xfs_inode	*dp,
1511  	const struct xfs_name	*name,
1512  	xfs_ino_t		inum,
1513  	xfs_extlen_t		total)
1514  {
1515  	struct xfs_scrub	*sc = rd->sc;
1516  	int			error;
1517  
1518  	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
1519  
1520  	error = xfs_dir_ino_validate(sc->mp, inum);
1521  	if (error)
1522  		return error;
1523  
1524  	xrep_dir_init_args(rd, dp, name);
1525  	rd->args.inumber = inum;
1526  	rd->args.total = total;
1527  	return xfs_dir_replace_args(&rd->args);
1528  }
1529  
1530  /*
1531   * Reset the link count of this directory and adjust the unlinked list pointers
1532   * as needed.
1533   */
1534  STATIC int
xrep_dir_set_nlink(struct xrep_dir * rd)1535  xrep_dir_set_nlink(
1536  	struct xrep_dir		*rd)
1537  {
1538  	struct xfs_scrub	*sc = rd->sc;
1539  	struct xfs_inode	*dp = sc->ip;
1540  	struct xfs_perag	*pag;
1541  	unsigned int		new_nlink = min_t(unsigned long long,
1542  						  rd->subdirs + 2,
1543  						  XFS_NLINK_PINNED);
1544  	int			error;
1545  
1546  	/*
1547  	 * The directory is not on the incore unlinked list, which means that
1548  	 * it needs to be reachable via the directory tree.  Update the nlink
1549  	 * with our observed link count.  If the directory has no parent, it
1550  	 * will be moved to the orphanage.
1551  	 */
1552  	if (!xfs_inode_on_unlinked_list(dp))
1553  		goto reset_nlink;
1554  
1555  	/*
1556  	 * The directory is on the unlinked list and we did not find any
1557  	 * dirents.  Set the link count to zero and let the directory
1558  	 * inactivate when the last reference drops.
1559  	 */
1560  	if (rd->dirents == 0) {
1561  		rd->needs_adoption = false;
1562  		new_nlink = 0;
1563  		goto reset_nlink;
1564  	}
1565  
1566  	/*
1567  	 * The directory is on the unlinked list and we found dirents.  This
1568  	 * directory needs to be reachable via the directory tree.  Remove the
1569  	 * dir from the unlinked list and update nlink with the observed link
1570  	 * count.  If the directory has no parent, it will be moved to the
1571  	 * orphanage.
1572  	 */
1573  	pag = xfs_perag_get(sc->mp, XFS_INO_TO_AGNO(sc->mp, dp->i_ino));
1574  	if (!pag) {
1575  		ASSERT(0);
1576  		return -EFSCORRUPTED;
1577  	}
1578  
1579  	error = xfs_iunlink_remove(sc->tp, pag, dp);
1580  	xfs_perag_put(pag);
1581  	if (error)
1582  		return error;
1583  
1584  reset_nlink:
1585  	if (VFS_I(dp)->i_nlink != new_nlink)
1586  		set_nlink(VFS_I(dp), new_nlink);
1587  	return 0;
1588  }
1589  
1590  /*
1591   * Finish replaying stashed dirent updates, allocate a transaction for
1592   * exchanging data fork mappings, and take the ILOCKs of both directories
1593   * before we commit the new directory structure.
1594   */
1595  STATIC int
xrep_dir_finalize_tempdir(struct xrep_dir * rd)1596  xrep_dir_finalize_tempdir(
1597  	struct xrep_dir		*rd)
1598  {
1599  	struct xfs_scrub	*sc = rd->sc;
1600  	int			error;
1601  
1602  	if (!xfs_has_parent(sc->mp))
1603  		return xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1604  
1605  	/*
1606  	 * Repair relies on the ILOCK to quiesce all possible dirent updates.
1607  	 * Replay all queued dirent updates into the tempdir before exchanging
1608  	 * the contents, even if that means dropping the ILOCKs and the
1609  	 * transaction.
1610  	 */
1611  	do {
1612  		error = xrep_dir_replay_updates(rd);
1613  		if (error)
1614  			return error;
1615  
1616  		error = xrep_tempexch_trans_alloc(sc, XFS_DATA_FORK, &rd->tx);
1617  		if (error)
1618  			return error;
1619  
1620  		if (xfarray_length(rd->dir_entries) == 0)
1621  			break;
1622  
1623  		xchk_trans_cancel(sc);
1624  		xrep_tempfile_iunlock_both(sc);
1625  	} while (!xchk_should_terminate(sc, &error));
1626  	return error;
1627  }
1628  
1629  /* Exchange the temporary directory's data fork with the one being repaired. */
1630  STATIC int
xrep_dir_swap(struct xrep_dir * rd)1631  xrep_dir_swap(
1632  	struct xrep_dir		*rd)
1633  {
1634  	struct xfs_scrub	*sc = rd->sc;
1635  	bool			ip_local, temp_local;
1636  	int			error = 0;
1637  
1638  	/*
1639  	 * If we never found the parent for this directory, temporarily assign
1640  	 * the root dir as the parent; we'll move this to the orphanage after
1641  	 * exchanging the dir contents.  We hold the ILOCK of the dir being
1642  	 * repaired, so we're not worried about racy updates of dotdot.
1643  	 */
1644  	ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1645  	if (rd->pscan.parent_ino == NULLFSINO) {
1646  		rd->needs_adoption = true;
1647  		rd->pscan.parent_ino = rd->sc->mp->m_sb.sb_rootino;
1648  	}
1649  
1650  	/*
1651  	 * Reset the temporary directory's '..' entry to point to the parent
1652  	 * that we found.  The temporary directory was created with the root
1653  	 * directory as the parent, so we can skip this if repairing a
1654  	 * subdirectory of the root.
1655  	 *
1656  	 * It's also possible that this replacement could also expand a sf
1657  	 * tempdir into block format.
1658  	 */
1659  	if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
1660  		error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
1661  				rd->pscan.parent_ino, rd->tx.req.resblks);
1662  		if (error)
1663  			return error;
1664  	}
1665  
1666  	/*
1667  	 * Changing the dot and dotdot entries could have changed the shape of
1668  	 * the directory, so we recompute these.
1669  	 */
1670  	ip_local = sc->ip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1671  	temp_local = sc->tempip->i_df.if_format == XFS_DINODE_FMT_LOCAL;
1672  
1673  	/*
1674  	 * If the both files have a local format data fork and the rebuilt
1675  	 * directory data would fit in the repaired file's data fork, copy
1676  	 * the contents from the tempfile and update the directory link count.
1677  	 * We're done now.
1678  	 */
1679  	if (ip_local && temp_local &&
1680  	    sc->tempip->i_disk_size <= xfs_inode_data_fork_size(sc->ip)) {
1681  		xrep_tempfile_copyout_local(sc, XFS_DATA_FORK);
1682  		return xrep_dir_set_nlink(rd);
1683  	}
1684  
1685  	/*
1686  	 * Clean the transaction before we start working on exchanging
1687  	 * directory contents.
1688  	 */
1689  	error = xrep_tempfile_roll_trans(rd->sc);
1690  	if (error)
1691  		return error;
1692  
1693  	/* Otherwise, make sure both data forks are in block-mapping mode. */
1694  	error = xrep_dir_swap_prep(sc, temp_local, ip_local);
1695  	if (error)
1696  		return error;
1697  
1698  	/*
1699  	 * Set nlink of the directory in the same transaction sequence that
1700  	 * (atomically) commits the new directory data.
1701  	 */
1702  	error = xrep_dir_set_nlink(rd);
1703  	if (error)
1704  		return error;
1705  
1706  	return xrep_tempexch_contents(sc, &rd->tx);
1707  }
1708  
1709  /*
1710   * Exchange the new directory contents (which we created in the tempfile) with
1711   * the directory being repaired.
1712   */
1713  STATIC int
xrep_dir_rebuild_tree(struct xrep_dir * rd)1714  xrep_dir_rebuild_tree(
1715  	struct xrep_dir		*rd)
1716  {
1717  	struct xfs_scrub	*sc = rd->sc;
1718  	int			error;
1719  
1720  	trace_xrep_dir_rebuild_tree(sc->ip, rd->pscan.parent_ino);
1721  
1722  	/*
1723  	 * Take the IOLOCK on the temporary file so that we can run dir
1724  	 * operations with the same locks held as we would for a normal file.
1725  	 * We still hold sc->ip's IOLOCK.
1726  	 */
1727  	error = xrep_tempfile_iolock_polled(rd->sc);
1728  	if (error)
1729  		return error;
1730  
1731  	/*
1732  	 * Allocate transaction, lock inodes, and make sure that we've replayed
1733  	 * all the stashed dirent updates to the tempdir.  After this point,
1734  	 * we're ready to exchange data fork mappings.
1735  	 */
1736  	error = xrep_dir_finalize_tempdir(rd);
1737  	if (error)
1738  		return error;
1739  
1740  	if (xchk_iscan_aborted(&rd->pscan.iscan))
1741  		return -ECANCELED;
1742  
1743  	/*
1744  	 * Exchange the tempdir's data fork with the file being repaired.  This
1745  	 * recreates the transaction and re-takes the ILOCK in the scrub
1746  	 * context.
1747  	 */
1748  	error = xrep_dir_swap(rd);
1749  	if (error)
1750  		return error;
1751  
1752  	/*
1753  	 * Release the old directory blocks and reset the data fork of the temp
1754  	 * directory to an empty shortform directory because inactivation does
1755  	 * nothing for directories.
1756  	 */
1757  	error = xrep_dir_reset_fork(rd, sc->mp->m_rootip->i_ino);
1758  	if (error)
1759  		return error;
1760  
1761  	/*
1762  	 * Roll to get a transaction without any inodes joined to it.  Then we
1763  	 * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1764  	 * the scrub target directory.
1765  	 */
1766  	error = xfs_trans_roll(&sc->tp);
1767  	if (error)
1768  		return error;
1769  
1770  	xrep_tempfile_iunlock(sc);
1771  	xrep_tempfile_iounlock(sc);
1772  	return 0;
1773  }
1774  
1775  /* Set up the filesystem scan so we can regenerate directory entries. */
1776  STATIC int
xrep_dir_setup_scan(struct xrep_dir * rd)1777  xrep_dir_setup_scan(
1778  	struct xrep_dir		*rd)
1779  {
1780  	struct xfs_scrub	*sc = rd->sc;
1781  	char			*descr;
1782  	int			error;
1783  
1784  	/* Set up some staging memory for salvaging dirents. */
1785  	descr = xchk_xfile_ino_descr(sc, "directory entries");
1786  	error = xfarray_create(descr, 0, sizeof(struct xrep_dirent),
1787  			&rd->dir_entries);
1788  	kfree(descr);
1789  	if (error)
1790  		return error;
1791  
1792  	descr = xchk_xfile_ino_descr(sc, "directory entry names");
1793  	error = xfblob_create(descr, &rd->dir_names);
1794  	kfree(descr);
1795  	if (error)
1796  		goto out_xfarray;
1797  
1798  	if (xfs_has_parent(sc->mp))
1799  		error = __xrep_findparent_scan_start(sc, &rd->pscan,
1800  				xrep_dir_live_update);
1801  	else
1802  		error = xrep_findparent_scan_start(sc, &rd->pscan);
1803  	if (error)
1804  		goto out_xfblob;
1805  
1806  	return 0;
1807  
1808  out_xfblob:
1809  	xfblob_destroy(rd->dir_names);
1810  	rd->dir_names = NULL;
1811  out_xfarray:
1812  	xfarray_destroy(rd->dir_entries);
1813  	rd->dir_entries = NULL;
1814  	return error;
1815  }
1816  
1817  /*
1818   * Move the current file to the orphanage.
1819   *
1820   * Caller must hold IOLOCK_EXCL on @sc->ip, and no other inode locks.  Upon
1821   * successful return, the scrub transaction will have enough extra reservation
1822   * to make the move; it will hold IOLOCK_EXCL and ILOCK_EXCL of @sc->ip and the
1823   * orphanage; and both inodes will be ijoined.
1824   */
1825  STATIC int
xrep_dir_move_to_orphanage(struct xrep_dir * rd)1826  xrep_dir_move_to_orphanage(
1827  	struct xrep_dir		*rd)
1828  {
1829  	struct xfs_scrub	*sc = rd->sc;
1830  	xfs_ino_t		orig_parent, new_parent;
1831  	int			error;
1832  
1833  	/*
1834  	 * We are about to drop the ILOCK on sc->ip to lock the orphanage and
1835  	 * prepare for the adoption.  Therefore, look up the old dotdot entry
1836  	 * for sc->ip so that we can compare it after we re-lock sc->ip.
1837  	 */
1838  	error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent);
1839  	if (error)
1840  		return error;
1841  
1842  	/*
1843  	 * Drop the ILOCK on the scrub target and commit the transaction.
1844  	 * Adoption computes its own resource requirements and gathers the
1845  	 * necessary components.
1846  	 */
1847  	error = xrep_trans_commit(sc);
1848  	if (error)
1849  		return error;
1850  	xchk_iunlock(sc, XFS_ILOCK_EXCL);
1851  
1852  	/* If we can take the orphanage's iolock then we're ready to move. */
1853  	if (!xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) {
1854  		xchk_iunlock(sc, sc->ilock_flags);
1855  		error = xrep_orphanage_iolock_two(sc);
1856  		if (error)
1857  			return error;
1858  	}
1859  
1860  	/* Grab transaction and ILOCK the two files. */
1861  	error = xrep_adoption_trans_alloc(sc, &rd->adoption);
1862  	if (error)
1863  		return error;
1864  
1865  	error = xrep_adoption_compute_name(&rd->adoption, &rd->xname);
1866  	if (error)
1867  		return error;
1868  
1869  	/*
1870  	 * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot
1871  	 * entry again.  If the parent changed or the child was unlinked while
1872  	 * the child directory was unlocked, we don't need to move the child to
1873  	 * the orphanage after all.
1874  	 */
1875  	error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent);
1876  	if (error)
1877  		return error;
1878  
1879  	/*
1880  	 * Attach to the orphanage if we still have a linked directory and it
1881  	 * hasn't been moved.
1882  	 */
1883  	if (orig_parent == new_parent && VFS_I(sc->ip)->i_nlink > 0) {
1884  		error = xrep_adoption_move(&rd->adoption);
1885  		if (error)
1886  			return error;
1887  	}
1888  
1889  	/*
1890  	 * Launder the scrub transaction so we can drop the orphanage ILOCK
1891  	 * and IOLOCK.  Return holding the scrub target's ILOCK and IOLOCK.
1892  	 */
1893  	error = xrep_adoption_trans_roll(&rd->adoption);
1894  	if (error)
1895  		return error;
1896  
1897  	xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL);
1898  	xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL);
1899  	return 0;
1900  }
1901  
1902  /*
1903   * Repair the directory metadata.
1904   *
1905   * XXX: Directory entry buffers can be multiple fsblocks in size.  The buffer
1906   * cache in XFS can't handle aliased multiblock buffers, so this might
1907   * misbehave if the directory blocks are crosslinked with other filesystem
1908   * metadata.
1909   *
1910   * XXX: Is it necessary to check the dcache for this directory to make sure
1911   * that we always recreate every cached entry?
1912   */
1913  int
xrep_directory(struct xfs_scrub * sc)1914  xrep_directory(
1915  	struct xfs_scrub	*sc)
1916  {
1917  	struct xrep_dir		*rd = sc->buf;
1918  	int			error;
1919  
1920  	/* The rmapbt is required to reap the old data fork. */
1921  	if (!xfs_has_rmapbt(sc->mp))
1922  		return -EOPNOTSUPP;
1923  	/* We require atomic file exchange range to rebuild anything. */
1924  	if (!xfs_has_exchange_range(sc->mp))
1925  		return -EOPNOTSUPP;
1926  
1927  	error = xrep_dir_setup_scan(rd);
1928  	if (error)
1929  		return error;
1930  
1931  	if (xfs_has_parent(sc->mp))
1932  		error = xrep_dir_scan_dirtree(rd);
1933  	else
1934  		error = xrep_dir_salvage_entries(rd);
1935  	if (error)
1936  		goto out_teardown;
1937  
1938  	/* Last chance to abort before we start committing fixes. */
1939  	if (xchk_should_terminate(sc, &error))
1940  		goto out_teardown;
1941  
1942  	error = xrep_dir_rebuild_tree(rd);
1943  	if (error)
1944  		goto out_teardown;
1945  
1946  	if (rd->needs_adoption) {
1947  		if (!xrep_orphanage_can_adopt(rd->sc))
1948  			error = -EFSCORRUPTED;
1949  		else
1950  			error = xrep_dir_move_to_orphanage(rd);
1951  		if (error)
1952  			goto out_teardown;
1953  	}
1954  
1955  out_teardown:
1956  	xrep_dir_teardown(sc);
1957  	return error;
1958  }
1959