1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Copyright (c) 2018-2024 Oracle.  All Rights Reserved.
4   * Author: Darrick J. Wong <djwong@kernel.org>
5   */
6  #include "xfs.h"
7  #include "xfs_fs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_trans_resv.h"
11  #include "xfs_mount.h"
12  #include "xfs_defer.h"
13  #include "xfs_btree.h"
14  #include "xfs_bit.h"
15  #include "xfs_log_format.h"
16  #include "xfs_trans.h"
17  #include "xfs_sb.h"
18  #include "xfs_inode.h"
19  #include "xfs_da_format.h"
20  #include "xfs_da_btree.h"
21  #include "xfs_dir2.h"
22  #include "xfs_attr.h"
23  #include "xfs_attr_leaf.h"
24  #include "xfs_attr_sf.h"
25  #include "xfs_attr_remote.h"
26  #include "xfs_bmap.h"
27  #include "xfs_bmap_util.h"
28  #include "xfs_exchmaps.h"
29  #include "xfs_exchrange.h"
30  #include "xfs_acl.h"
31  #include "xfs_parent.h"
32  #include "scrub/xfs_scrub.h"
33  #include "scrub/scrub.h"
34  #include "scrub/common.h"
35  #include "scrub/trace.h"
36  #include "scrub/repair.h"
37  #include "scrub/tempfile.h"
38  #include "scrub/tempexch.h"
39  #include "scrub/xfile.h"
40  #include "scrub/xfarray.h"
41  #include "scrub/xfblob.h"
42  #include "scrub/attr.h"
43  #include "scrub/reap.h"
44  #include "scrub/attr_repair.h"
45  
46  /*
47   * Extended Attribute Repair
48   * =========================
49   *
50   * We repair extended attributes by reading the attr leaf blocks looking for
51   * attributes entries that look salvageable (name passes verifiers, value can
52   * be retrieved, etc).  Each extended attribute worth salvaging is stashed in
53   * memory, and the stashed entries are periodically replayed into a temporary
54   * file to constrain memory use.  Batching the construction of the temporary
55   * extended attribute structure in this fashion reduces lock cycling of the
56   * file being repaired and the temporary file.
57   *
58   * When salvaging completes, the remaining stashed attributes are replayed to
59   * the temporary file.  An atomic file contents exchange is used to commit the
60   * new xattr blocks to the file being repaired.  This will disrupt attrmulti
61   * cursors.
62   */
63  
64  struct xrep_xattr_key {
65  	/* Cookie for retrieval of the xattr name. */
66  	xfblob_cookie		name_cookie;
67  
68  	/* Cookie for retrieval of the xattr value. */
69  	xfblob_cookie		value_cookie;
70  
71  	/* XFS_ATTR_* flags */
72  	int			flags;
73  
74  	/* Length of the value and name. */
75  	uint32_t		valuelen;
76  	uint16_t		namelen;
77  };
78  
79  /*
80   * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
81   * them to the temp file.
82   */
83  #define XREP_XATTR_MAX_STASH_BYTES	(PAGE_SIZE * 8)
84  
85  struct xrep_xattr {
86  	struct xfs_scrub	*sc;
87  
88  	/* Information for exchanging attr fork mappings at the end. */
89  	struct xrep_tempexch	tx;
90  
91  	/* xattr keys */
92  	struct xfarray		*xattr_records;
93  
94  	/* xattr values */
95  	struct xfblob		*xattr_blobs;
96  
97  	/* Number of attributes that we are salvaging. */
98  	unsigned long long	attrs_found;
99  
100  	/* Can we flush stashed attrs to the tempfile? */
101  	bool			can_flush;
102  
103  	/* Did the live update fail, and hence the repair is now out of date? */
104  	bool			live_update_aborted;
105  
106  	/* Lock protecting parent pointer updates */
107  	struct mutex		lock;
108  
109  	/* Fixed-size array of xrep_xattr_pptr structures. */
110  	struct xfarray		*pptr_recs;
111  
112  	/* Blobs containing parent pointer names. */
113  	struct xfblob		*pptr_names;
114  
115  	/* Hook to capture parent pointer updates. */
116  	struct xfs_dir_hook	dhook;
117  
118  	/* Scratch buffer for capturing parent pointers. */
119  	struct xfs_da_args	pptr_args;
120  
121  	/* Name buffer */
122  	struct xfs_name		xname;
123  	char			namebuf[MAXNAMELEN];
124  };
125  
126  /* Create a parent pointer in the tempfile. */
127  #define XREP_XATTR_PPTR_ADD	(1)
128  
129  /* Remove a parent pointer from the tempfile. */
130  #define XREP_XATTR_PPTR_REMOVE	(2)
131  
132  /* A stashed parent pointer update. */
133  struct xrep_xattr_pptr {
134  	/* Cookie for retrieval of the pptr name. */
135  	xfblob_cookie		name_cookie;
136  
137  	/* Parent pointer record. */
138  	struct xfs_parent_rec	pptr_rec;
139  
140  	/* Length of the pptr name. */
141  	uint8_t			namelen;
142  
143  	/* XREP_XATTR_PPTR_{ADD,REMOVE} */
144  	uint8_t			action;
145  };
146  
147  /* Set up to recreate the extended attributes. */
148  int
xrep_setup_xattr(struct xfs_scrub * sc)149  xrep_setup_xattr(
150  	struct xfs_scrub	*sc)
151  {
152  	if (xfs_has_parent(sc->mp))
153  		xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS);
154  
155  	return xrep_tempfile_create(sc, S_IFREG);
156  }
157  
158  /*
159   * Decide if we want to salvage this attribute.  We don't bother with
160   * incomplete or oversized keys or values.  The @value parameter can be null
161   * for remote attrs.
162   */
163  STATIC int
xrep_xattr_want_salvage(struct xrep_xattr * rx,unsigned int attr_flags,const void * name,int namelen,const void * value,int valuelen)164  xrep_xattr_want_salvage(
165  	struct xrep_xattr	*rx,
166  	unsigned int		attr_flags,
167  	const void		*name,
168  	int			namelen,
169  	const void		*value,
170  	int			valuelen)
171  {
172  	if (attr_flags & XFS_ATTR_INCOMPLETE)
173  		return false;
174  	if (namelen > XATTR_NAME_MAX || namelen <= 0)
175  		return false;
176  	if (!xfs_attr_namecheck(attr_flags, name, namelen))
177  		return false;
178  	if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
179  		return false;
180  	if (attr_flags & XFS_ATTR_PARENT)
181  		return xfs_parent_valuecheck(rx->sc->mp, value, valuelen);
182  
183  	return true;
184  }
185  
186  /* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
187  STATIC int
xrep_xattr_salvage_key(struct xrep_xattr * rx,int flags,unsigned char * name,int namelen,unsigned char * value,int valuelen)188  xrep_xattr_salvage_key(
189  	struct xrep_xattr	*rx,
190  	int			flags,
191  	unsigned char		*name,
192  	int			namelen,
193  	unsigned char		*value,
194  	int			valuelen)
195  {
196  	struct xrep_xattr_key	key = {
197  		.valuelen	= valuelen,
198  		.flags		= flags & XFS_ATTR_NSP_ONDISK_MASK,
199  	};
200  	unsigned int		i = 0;
201  	int			error = 0;
202  
203  	if (xchk_should_terminate(rx->sc, &error))
204  		return error;
205  
206  	/*
207  	 * Truncate the name to the first character that would trip namecheck.
208  	 * If we no longer have a name after that, ignore this attribute.
209  	 */
210  	if (flags & XFS_ATTR_PARENT) {
211  		key.namelen = namelen;
212  
213  		trace_xrep_xattr_salvage_pptr(rx->sc->ip, flags, name,
214  				key.namelen, value, valuelen);
215  	} else {
216  		while (i < namelen && name[i] != 0)
217  			i++;
218  		if (i == 0)
219  			return 0;
220  		key.namelen = i;
221  
222  		trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name,
223  				key.namelen, valuelen);
224  	}
225  
226  	error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
227  			key.namelen);
228  	if (error)
229  		return error;
230  
231  	error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value,
232  			key.valuelen);
233  	if (error)
234  		return error;
235  
236  	error = xfarray_append(rx->xattr_records, &key);
237  	if (error)
238  		return error;
239  
240  	rx->attrs_found++;
241  	return 0;
242  }
243  
244  /*
245   * Record a shortform extended attribute key & value for later reinsertion
246   * into the inode.
247   */
248  STATIC int
xrep_xattr_salvage_sf_attr(struct xrep_xattr * rx,struct xfs_attr_sf_hdr * hdr,struct xfs_attr_sf_entry * sfe)249  xrep_xattr_salvage_sf_attr(
250  	struct xrep_xattr		*rx,
251  	struct xfs_attr_sf_hdr		*hdr,
252  	struct xfs_attr_sf_entry	*sfe)
253  {
254  	struct xfs_scrub		*sc = rx->sc;
255  	struct xchk_xattr_buf		*ab = sc->buf;
256  	unsigned char			*name = sfe->nameval;
257  	unsigned char			*value = &sfe->nameval[sfe->namelen];
258  
259  	if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr,
260  			sfe->namelen))
261  		return 0;
262  
263  	if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr,
264  			sfe->valuelen))
265  		return 0;
266  
267  	if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval,
268  			sfe->namelen, value, sfe->valuelen))
269  		return 0;
270  
271  	return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
272  			sfe->namelen, value, sfe->valuelen);
273  }
274  
275  /*
276   * Record a local format extended attribute key & value for later reinsertion
277   * into the inode.
278   */
279  STATIC int
xrep_xattr_salvage_local_attr(struct xrep_xattr * rx,struct xfs_attr_leaf_entry * ent,unsigned int nameidx,const char * buf_end,struct xfs_attr_leaf_name_local * lentry)280  xrep_xattr_salvage_local_attr(
281  	struct xrep_xattr		*rx,
282  	struct xfs_attr_leaf_entry	*ent,
283  	unsigned int			nameidx,
284  	const char			*buf_end,
285  	struct xfs_attr_leaf_name_local	*lentry)
286  {
287  	struct xchk_xattr_buf		*ab = rx->sc->buf;
288  	unsigned char			*value;
289  	unsigned int			valuelen;
290  	unsigned int			namesize;
291  
292  	/*
293  	 * Decode the leaf local entry format.  If something seems wrong, we
294  	 * junk the attribute.
295  	 */
296  	value = &lentry->nameval[lentry->namelen];
297  	valuelen = be16_to_cpu(lentry->valuelen);
298  	namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
299  	if ((char *)lentry + namesize > buf_end)
300  		return 0;
301  	if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval,
302  			lentry->namelen, value, valuelen))
303  		return 0;
304  	if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
305  		return 0;
306  
307  	/* Try to save this attribute. */
308  	return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
309  			lentry->namelen, value, valuelen);
310  }
311  
312  /*
313   * Record a remote format extended attribute key & value for later reinsertion
314   * into the inode.
315   */
316  STATIC int
xrep_xattr_salvage_remote_attr(struct xrep_xattr * rx,struct xfs_attr_leaf_entry * ent,unsigned int nameidx,const char * buf_end,struct xfs_attr_leaf_name_remote * rentry,unsigned int ent_idx,struct xfs_buf * leaf_bp)317  xrep_xattr_salvage_remote_attr(
318  	struct xrep_xattr		*rx,
319  	struct xfs_attr_leaf_entry	*ent,
320  	unsigned int			nameidx,
321  	const char			*buf_end,
322  	struct xfs_attr_leaf_name_remote *rentry,
323  	unsigned int			ent_idx,
324  	struct xfs_buf			*leaf_bp)
325  {
326  	struct xchk_xattr_buf		*ab = rx->sc->buf;
327  	struct xfs_da_args		args = {
328  		.trans			= rx->sc->tp,
329  		.dp			= rx->sc->ip,
330  		.index			= ent_idx,
331  		.geo			= rx->sc->mp->m_attr_geo,
332  		.owner			= rx->sc->ip->i_ino,
333  		.attr_filter		= ent->flags & XFS_ATTR_NSP_ONDISK_MASK,
334  		.namelen		= rentry->namelen,
335  		.name			= rentry->name,
336  		.value			= ab->value,
337  		.valuelen		= be32_to_cpu(rentry->valuelen),
338  	};
339  	unsigned int			namesize;
340  	int				error;
341  
342  	/*
343  	 * Decode the leaf remote entry format.  If something seems wrong, we
344  	 * junk the attribute.  Note that we should never find a zero-length
345  	 * remote attribute value.
346  	 */
347  	namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
348  	if ((char *)rentry + namesize > buf_end)
349  		return 0;
350  	if (args.valuelen == 0 ||
351  	    !xrep_xattr_want_salvage(rx, ent->flags, rentry->name,
352  			rentry->namelen, NULL, args.valuelen))
353  		return 0;
354  	if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
355  		return 0;
356  
357  	/*
358  	 * Enlarge the buffer (if needed) to hold the value that we're trying
359  	 * to salvage from the old extended attribute data.
360  	 */
361  	error = xchk_setup_xattr_buf(rx->sc, args.valuelen);
362  	if (error == -ENOMEM)
363  		error = -EDEADLOCK;
364  	if (error)
365  		return error;
366  
367  	/* Look up the remote value and stash it for reconstruction. */
368  	error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
369  	if (error || args.rmtblkno == 0)
370  		goto err_free;
371  
372  	error = xfs_attr_rmtval_get(&args);
373  	if (error)
374  		goto err_free;
375  
376  	/* Try to save this attribute. */
377  	error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
378  			rentry->namelen, ab->value, args.valuelen);
379  err_free:
380  	/* remote value was garbage, junk it */
381  	if (error == -EFSBADCRC || error == -EFSCORRUPTED)
382  		error = 0;
383  	return error;
384  }
385  
386  /* Extract every xattr key that we can from this attr fork block. */
387  STATIC int
xrep_xattr_recover_leaf(struct xrep_xattr * rx,struct xfs_buf * bp)388  xrep_xattr_recover_leaf(
389  	struct xrep_xattr		*rx,
390  	struct xfs_buf			*bp)
391  {
392  	struct xfs_attr3_icleaf_hdr	leafhdr;
393  	struct xfs_scrub		*sc = rx->sc;
394  	struct xfs_mount		*mp = sc->mp;
395  	struct xfs_attr_leafblock	*leaf;
396  	struct xfs_attr_leaf_name_local	*lentry;
397  	struct xfs_attr_leaf_name_remote *rentry;
398  	struct xfs_attr_leaf_entry	*ent;
399  	struct xfs_attr_leaf_entry	*entries;
400  	struct xchk_xattr_buf		*ab = rx->sc->buf;
401  	char				*buf_end;
402  	size_t				off;
403  	unsigned int			nameidx;
404  	unsigned int			hdrsize;
405  	int				i;
406  	int				error = 0;
407  
408  	bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
409  
410  	/* Check the leaf header */
411  	leaf = bp->b_addr;
412  	xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
413  	hdrsize = xfs_attr3_leaf_hdr_size(leaf);
414  	xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize);
415  	entries = xfs_attr3_leaf_entryp(leaf);
416  
417  	buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
418  	for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
419  		if (xchk_should_terminate(sc, &error))
420  			return error;
421  
422  		/* Skip key if it conflicts with something else? */
423  		off = (char *)ent - (char *)leaf;
424  		if (!xchk_xattr_set_map(sc, ab->usedmap, off,
425  				sizeof(xfs_attr_leaf_entry_t)))
426  			continue;
427  
428  		/* Check the name information. */
429  		nameidx = be16_to_cpu(ent->nameidx);
430  		if (nameidx < leafhdr.firstused ||
431  		    nameidx >= mp->m_attr_geo->blksize)
432  			continue;
433  
434  		if (ent->flags & XFS_ATTR_LOCAL) {
435  			lentry = xfs_attr3_leaf_name_local(leaf, i);
436  			error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
437  					buf_end, lentry);
438  		} else {
439  			rentry = xfs_attr3_leaf_name_remote(leaf, i);
440  			error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
441  					buf_end, rentry, i, bp);
442  		}
443  		if (error)
444  			return error;
445  	}
446  
447  	return 0;
448  }
449  
450  /* Try to recover shortform attrs. */
451  STATIC int
xrep_xattr_recover_sf(struct xrep_xattr * rx)452  xrep_xattr_recover_sf(
453  	struct xrep_xattr		*rx)
454  {
455  	struct xfs_scrub		*sc = rx->sc;
456  	struct xchk_xattr_buf		*ab = sc->buf;
457  	struct xfs_attr_sf_hdr		*hdr;
458  	struct xfs_attr_sf_entry	*sfe;
459  	struct xfs_attr_sf_entry	*next;
460  	struct xfs_ifork		*ifp;
461  	unsigned char			*end;
462  	int				i;
463  	int				error = 0;
464  
465  	ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK);
466  	hdr = ifp->if_data;
467  
468  	bitmap_zero(ab->usedmap, ifp->if_bytes);
469  	end = (unsigned char *)ifp->if_data + ifp->if_bytes;
470  	xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr));
471  
472  	sfe = xfs_attr_sf_firstentry(hdr);
473  	if ((unsigned char *)sfe > end)
474  		return 0;
475  
476  	for (i = 0; i < hdr->count; i++) {
477  		if (xchk_should_terminate(sc, &error))
478  			return error;
479  
480  		next = xfs_attr_sf_nextentry(sfe);
481  		if ((unsigned char *)next > end)
482  			break;
483  
484  		if (xchk_xattr_set_map(sc, ab->usedmap,
485  				(char *)sfe - (char *)hdr,
486  				sizeof(struct xfs_attr_sf_entry))) {
487  			/*
488  			 * No conflicts with the sf entry; let's save this
489  			 * attribute.
490  			 */
491  			error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe);
492  			if (error)
493  				return error;
494  		}
495  
496  		sfe = next;
497  	}
498  
499  	return 0;
500  }
501  
502  /*
503   * Try to return a buffer of xattr data for a given physical extent.
504   *
505   * Because the buffer cache get function complains if it finds a buffer
506   * matching the block number but not matching the length, we must be careful to
507   * look for incore buffers (up to the maximum length of a remote value) that
508   * could be hiding anywhere in the physical range.  If we find an incore
509   * buffer, we can pass that to the caller.  Optionally, read a single block and
510   * pass that back.
511   *
512   * Note the subtlety that remote attr value blocks for which there is no incore
513   * buffer will be passed to the callback one block at a time.  These buffers
514   * will not have any ops attached and must be staled to prevent aliasing with
515   * multiblock buffers once we drop the ILOCK.
516   */
517  STATIC int
xrep_xattr_find_buf(struct xfs_mount * mp,xfs_fsblock_t fsbno,xfs_extlen_t max_len,bool can_read,struct xfs_buf ** bpp)518  xrep_xattr_find_buf(
519  	struct xfs_mount	*mp,
520  	xfs_fsblock_t		fsbno,
521  	xfs_extlen_t		max_len,
522  	bool			can_read,
523  	struct xfs_buf		**bpp)
524  {
525  	struct xrep_bufscan	scan = {
526  		.daddr		= XFS_FSB_TO_DADDR(mp, fsbno),
527  		.max_sectors	= xrep_bufscan_max_sectors(mp, max_len),
528  		.daddr_step	= XFS_FSB_TO_BB(mp, 1),
529  	};
530  	struct xfs_buf		*bp;
531  
532  	while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
533  		*bpp = bp;
534  		return 0;
535  	}
536  
537  	if (!can_read) {
538  		*bpp = NULL;
539  		return 0;
540  	}
541  
542  	return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1),
543  			XBF_TRYLOCK, bpp, NULL);
544  }
545  
546  /*
547   * Deal with a buffer that we found during our walk of the attr fork.
548   *
549   * Attribute leaf and node blocks are simple -- they're a single block, so we
550   * can walk them one at a time and we never have to worry about discontiguous
551   * multiblock buffers like we do for directories.
552   *
553   * Unfortunately, remote attr blocks add a lot of complexity here.  Each disk
554   * block is totally self contained, in the sense that the v5 header provides no
555   * indication that there could be more data in the next block.  The incore
556   * buffers can span multiple blocks, though they never cross extent records.
557   * However, they don't necessarily start or end on an extent record boundary.
558   * Therefore, we need a special buffer find function to walk the buffer cache
559   * for us.
560   *
561   * The caller must hold the ILOCK on the file being repaired.  We use
562   * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
563   * own the block and don't want to hang the system on a potentially garbage
564   * buffer.
565   */
566  STATIC int
xrep_xattr_recover_block(struct xrep_xattr * rx,xfs_dablk_t dabno,xfs_fsblock_t fsbno,xfs_extlen_t max_len,xfs_extlen_t * actual_len)567  xrep_xattr_recover_block(
568  	struct xrep_xattr	*rx,
569  	xfs_dablk_t		dabno,
570  	xfs_fsblock_t		fsbno,
571  	xfs_extlen_t		max_len,
572  	xfs_extlen_t		*actual_len)
573  {
574  	struct xfs_da_blkinfo	*info;
575  	struct xfs_buf		*bp;
576  	int			error;
577  
578  	error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
579  	if (error)
580  		return error;
581  	info = bp->b_addr;
582  	*actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
583  
584  	trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
585  			be16_to_cpu(info->magic));
586  
587  	/*
588  	 * If the buffer has the right magic number for an attr leaf block and
589  	 * passes a structure check (we don't care about checksums), salvage
590  	 * as much as we can from the block. */
591  	if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
592  	    xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) &&
593  	    xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL)
594  		error = xrep_xattr_recover_leaf(rx, bp);
595  
596  	/*
597  	 * If the buffer didn't already have buffer ops set, it was read in by
598  	 * the _find_buf function and could very well be /part/ of a multiblock
599  	 * remote block.  Mark it stale so that it doesn't hang around in
600  	 * memory to cause problems.
601  	 */
602  	if (bp->b_ops == NULL)
603  		xfs_buf_stale(bp);
604  
605  	xfs_buf_relse(bp);
606  	return error;
607  }
608  
609  /* Insert one xattr key/value. */
610  STATIC int
xrep_xattr_insert_rec(struct xrep_xattr * rx,const struct xrep_xattr_key * key)611  xrep_xattr_insert_rec(
612  	struct xrep_xattr		*rx,
613  	const struct xrep_xattr_key	*key)
614  {
615  	struct xfs_da_args		args = {
616  		.dp			= rx->sc->tempip,
617  		.attr_filter		= key->flags,
618  		.namelen		= key->namelen,
619  		.valuelen		= key->valuelen,
620  		.owner			= rx->sc->ip->i_ino,
621  		.geo			= rx->sc->mp->m_attr_geo,
622  		.whichfork		= XFS_ATTR_FORK,
623  		.op_flags		= XFS_DA_OP_OKNOENT,
624  	};
625  	struct xchk_xattr_buf		*ab = rx->sc->buf;
626  	int				error;
627  
628  	/*
629  	 * Grab pointers to the scrub buffer so that we can use them to insert
630  	 * attrs into the temp file.
631  	 */
632  	args.name = ab->name;
633  	args.value = ab->value;
634  
635  	/*
636  	 * The attribute name is stored near the end of the in-core buffer,
637  	 * though we reserve one more byte to ensure null termination.
638  	 */
639  	ab->name[XATTR_NAME_MAX] = 0;
640  
641  	error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name,
642  			key->namelen);
643  	if (error)
644  		return error;
645  
646  	error = xfblob_free(rx->xattr_blobs, key->name_cookie);
647  	if (error)
648  		return error;
649  
650  	error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value,
651  			key->valuelen);
652  	if (error)
653  		return error;
654  
655  	error = xfblob_free(rx->xattr_blobs, key->value_cookie);
656  	if (error)
657  		return error;
658  
659  	ab->name[key->namelen] = 0;
660  
661  	if (key->flags & XFS_ATTR_PARENT) {
662  		trace_xrep_xattr_insert_pptr(rx->sc->tempip, key->flags,
663  				ab->name, key->namelen, ab->value,
664  				key->valuelen);
665  		args.op_flags |= XFS_DA_OP_LOGGED;
666  	} else {
667  		trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags,
668  				ab->name, key->namelen, key->valuelen);
669  	}
670  
671  	/*
672  	 * xfs_attr_set creates and commits its own transaction.  If the attr
673  	 * already exists, we'll just drop it during the rebuild.
674  	 */
675  	xfs_attr_sethash(&args);
676  	error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE, false);
677  	if (error == -EEXIST)
678  		error = 0;
679  
680  	return error;
681  }
682  
683  /*
684   * Periodically flush salvaged attributes to the temporary file.  This is done
685   * to reduce the memory requirements of the xattr rebuild because files can
686   * contain millions of attributes.
687   */
688  STATIC int
xrep_xattr_flush_stashed(struct xrep_xattr * rx)689  xrep_xattr_flush_stashed(
690  	struct xrep_xattr	*rx)
691  {
692  	xfarray_idx_t		array_cur;
693  	int			error;
694  
695  	/*
696  	 * Entering this function, the scrub context has a reference to the
697  	 * inode being repaired, the temporary file, and a scrub transaction
698  	 * that we use during xattr salvaging to avoid livelocking if there
699  	 * are cycles in the xattr structures.  We hold ILOCK_EXCL on both
700  	 * the inode being repaired, though it is not ijoined to the scrub
701  	 * transaction.
702  	 *
703  	 * To constrain kernel memory use, we occasionally flush salvaged
704  	 * xattrs from the xfarray and xfblob structures into the temporary
705  	 * file in preparation for exchanging the xattr structures at the end.
706  	 * Updating the temporary file requires a transaction, so we commit the
707  	 * scrub transaction and drop the two ILOCKs so that xfs_attr_set can
708  	 * allocate whatever transaction it wants.
709  	 *
710  	 * We still hold IOLOCK_EXCL on the inode being repaired, which
711  	 * prevents anyone from modifying the damaged xattr data while we
712  	 * repair it.
713  	 */
714  	error = xrep_trans_commit(rx->sc);
715  	if (error)
716  		return error;
717  	xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
718  
719  	/*
720  	 * Take the IOLOCK of the temporary file while we modify xattrs.  This
721  	 * isn't strictly required because the temporary file is never revealed
722  	 * to userspace, but we follow the same locking rules.  We still hold
723  	 * sc->ip's IOLOCK.
724  	 */
725  	error = xrep_tempfile_iolock_polled(rx->sc);
726  	if (error)
727  		return error;
728  
729  	/* Add all the salvaged attrs to the temporary file. */
730  	foreach_xfarray_idx(rx->xattr_records, array_cur) {
731  		struct xrep_xattr_key	key;
732  
733  		error = xfarray_load(rx->xattr_records, array_cur, &key);
734  		if (error)
735  			return error;
736  
737  		error = xrep_xattr_insert_rec(rx, &key);
738  		if (error)
739  			return error;
740  	}
741  
742  	/* Empty out both arrays now that we've added the entries. */
743  	xfarray_truncate(rx->xattr_records);
744  	xfblob_truncate(rx->xattr_blobs);
745  
746  	xrep_tempfile_iounlock(rx->sc);
747  
748  	/* Recreate the salvage transaction and relock the inode. */
749  	error = xchk_trans_alloc(rx->sc, 0);
750  	if (error)
751  		return error;
752  	xchk_ilock(rx->sc, XFS_ILOCK_EXCL);
753  	return 0;
754  }
755  
756  /* Decide if we've stashed too much xattr data in memory. */
757  static inline bool
xrep_xattr_want_flush_stashed(struct xrep_xattr * rx)758  xrep_xattr_want_flush_stashed(
759  	struct xrep_xattr	*rx)
760  {
761  	unsigned long long	bytes;
762  
763  	if (!rx->can_flush)
764  		return false;
765  
766  	bytes = xfarray_bytes(rx->xattr_records) +
767  		xfblob_bytes(rx->xattr_blobs);
768  	return bytes > XREP_XATTR_MAX_STASH_BYTES;
769  }
770  
771  /*
772   * Did we observe rename changing parent pointer xattrs while we were flushing
773   * salvaged attrs?
774   */
775  static inline bool
xrep_xattr_saw_pptr_conflict(struct xrep_xattr * rx)776  xrep_xattr_saw_pptr_conflict(
777  	struct xrep_xattr	*rx)
778  {
779  	bool			ret;
780  
781  	ASSERT(rx->can_flush);
782  
783  	if (!xfs_has_parent(rx->sc->mp))
784  		return false;
785  
786  	xfs_assert_ilocked(rx->sc->ip, XFS_ILOCK_EXCL);
787  
788  	mutex_lock(&rx->lock);
789  	ret = xfarray_bytes(rx->pptr_recs) > 0;
790  	mutex_unlock(&rx->lock);
791  
792  	return ret;
793  }
794  
795  /*
796   * Reset the entire repair state back to initial conditions, now that we've
797   * detected a parent pointer update to the attr structure while we were
798   * flushing salvaged attrs.  See the locking notes in dir_repair.c for more
799   * information on why this is all necessary.
800   */
801  STATIC int
xrep_xattr_full_reset(struct xrep_xattr * rx)802  xrep_xattr_full_reset(
803  	struct xrep_xattr	*rx)
804  {
805  	struct xfs_scrub	*sc = rx->sc;
806  	struct xfs_attr_sf_hdr	*hdr;
807  	struct xfs_ifork	*ifp = &sc->tempip->i_af;
808  	int			error;
809  
810  	trace_xrep_xattr_full_reset(sc->ip, sc->tempip);
811  
812  	/* The temporary file's data fork had better not be in btree format. */
813  	if (sc->tempip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
814  		ASSERT(0);
815  		return -EIO;
816  	}
817  
818  	/*
819  	 * We begin in transaction context with sc->ip ILOCKed but not joined
820  	 * to the transaction.  To reset to the initial state, we must hold
821  	 * sc->ip's ILOCK to prevent rename from updating parent pointer
822  	 * information and the tempfile's ILOCK to clear its contents.
823  	 */
824  	xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
825  	xrep_tempfile_ilock_both(sc);
826  	xfs_trans_ijoin(sc->tp, sc->ip, 0);
827  	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
828  
829  	/*
830  	 * Free all the blocks of the attr fork of the temp file, and reset
831  	 * it back to local format.
832  	 */
833  	if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
834  		error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
835  		if (error)
836  			return error;
837  
838  		ASSERT(ifp->if_bytes == 0);
839  		ifp->if_format = XFS_DINODE_FMT_LOCAL;
840  		xfs_idata_realloc(sc->tempip, sizeof(*hdr), XFS_ATTR_FORK);
841  	}
842  
843  	/* Reinitialize the attr fork to an empty shortform structure. */
844  	hdr = ifp->if_data;
845  	memset(hdr, 0, sizeof(*hdr));
846  	hdr->totsize = cpu_to_be16(sizeof(*hdr));
847  	xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE | XFS_ILOG_ADATA);
848  
849  	/*
850  	 * Roll this transaction to commit our reset ondisk.  The tempfile
851  	 * should no longer be joined to the transaction, so we drop its ILOCK.
852  	 * This should leave us in transaction context with sc->ip ILOCKed but
853  	 * not joined to the transaction.
854  	 */
855  	error = xrep_roll_trans(sc);
856  	if (error)
857  		return error;
858  	xrep_tempfile_iunlock(sc);
859  
860  	/*
861  	 * Erase any accumulated parent pointer updates now that we've erased
862  	 * the tempfile's attr fork.  We're resetting the entire repair state
863  	 * back to where we were initially, except now we won't flush salvaged
864  	 * xattrs until the very end.
865  	 */
866  	mutex_lock(&rx->lock);
867  	xfarray_truncate(rx->pptr_recs);
868  	xfblob_truncate(rx->pptr_names);
869  	mutex_unlock(&rx->lock);
870  
871  	rx->can_flush = false;
872  	rx->attrs_found = 0;
873  
874  	ASSERT(xfarray_bytes(rx->xattr_records) == 0);
875  	ASSERT(xfblob_bytes(rx->xattr_blobs) == 0);
876  	return 0;
877  }
878  
879  /* Extract as many attribute keys and values as we can. */
880  STATIC int
xrep_xattr_recover(struct xrep_xattr * rx)881  xrep_xattr_recover(
882  	struct xrep_xattr	*rx)
883  {
884  	struct xfs_bmbt_irec	got;
885  	struct xfs_scrub	*sc = rx->sc;
886  	struct xfs_da_geometry	*geo = sc->mp->m_attr_geo;
887  	xfs_fileoff_t		offset;
888  	xfs_extlen_t		len;
889  	xfs_dablk_t		dabno;
890  	int			nmap;
891  	int			error;
892  
893  restart:
894  	/*
895  	 * Iterate each xattr leaf block in the attr fork to scan them for any
896  	 * attributes that we might salvage.
897  	 */
898  	for (offset = 0;
899  	     offset < XFS_MAX_FILEOFF;
900  	     offset = got.br_startoff + got.br_blockcount) {
901  		nmap = 1;
902  		error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
903  				&got, &nmap, XFS_BMAPI_ATTRFORK);
904  		if (error)
905  			return error;
906  		if (nmap != 1)
907  			return -EFSCORRUPTED;
908  		if (!xfs_bmap_is_written_extent(&got))
909  			continue;
910  
911  		for (dabno = round_up(got.br_startoff, geo->fsbcount);
912  		     dabno < got.br_startoff + got.br_blockcount;
913  		     dabno += len) {
914  			xfs_fileoff_t	curr_offset = dabno - got.br_startoff;
915  			xfs_extlen_t	maxlen;
916  
917  			if (xchk_should_terminate(rx->sc, &error))
918  				return error;
919  
920  			maxlen = min_t(xfs_filblks_t, INT_MAX,
921  					got.br_blockcount - curr_offset);
922  			error = xrep_xattr_recover_block(rx, dabno,
923  					curr_offset + got.br_startblock,
924  					maxlen, &len);
925  			if (error)
926  				return error;
927  
928  			if (xrep_xattr_want_flush_stashed(rx)) {
929  				error = xrep_xattr_flush_stashed(rx);
930  				if (error)
931  					return error;
932  
933  				if (xrep_xattr_saw_pptr_conflict(rx)) {
934  					error = xrep_xattr_full_reset(rx);
935  					if (error)
936  						return error;
937  
938  					goto restart;
939  				}
940  			}
941  		}
942  	}
943  
944  	return 0;
945  }
946  
947  /*
948   * Reset the extended attribute fork to a state where we can start re-adding
949   * the salvaged attributes.
950   */
951  STATIC int
xrep_xattr_fork_remove(struct xfs_scrub * sc,struct xfs_inode * ip)952  xrep_xattr_fork_remove(
953  	struct xfs_scrub	*sc,
954  	struct xfs_inode	*ip)
955  {
956  	struct xfs_attr_sf_hdr	*hdr;
957  	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
958  
959  	/*
960  	 * If the data fork is in btree format, we can't change di_forkoff
961  	 * because we could run afoul of the rule that the data fork isn't
962  	 * supposed to be in btree format if there's enough space in the fork
963  	 * that it could have used extents format.  Instead, reinitialize the
964  	 * attr fork to have a shortform structure with zero attributes.
965  	 */
966  	if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
967  		ifp->if_format = XFS_DINODE_FMT_LOCAL;
968  		hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
969  				XFS_ATTR_FORK);
970  		hdr->count = 0;
971  		hdr->totsize = cpu_to_be16(sizeof(*hdr));
972  		xfs_trans_log_inode(sc->tp, ip,
973  				XFS_ILOG_CORE | XFS_ILOG_ADATA);
974  		return 0;
975  	}
976  
977  	/* If we still have attr fork extents, something's wrong. */
978  	if (ifp->if_nextents != 0) {
979  		struct xfs_iext_cursor	icur;
980  		struct xfs_bmbt_irec	irec;
981  		unsigned int		i = 0;
982  
983  		xfs_emerg(sc->mp,
984  	"inode 0x%llx attr fork still has %llu attr extents, format %d?!",
985  				ip->i_ino, ifp->if_nextents, ifp->if_format);
986  		for_each_xfs_iext(ifp, &icur, &irec) {
987  			xfs_err(sc->mp,
988  	"[%u]: startoff %llu startblock %llu blockcount %llu state %u",
989  					i++, irec.br_startoff,
990  					irec.br_startblock, irec.br_blockcount,
991  					irec.br_state);
992  		}
993  		ASSERT(0);
994  		return -EFSCORRUPTED;
995  	}
996  
997  	xfs_attr_fork_remove(ip, sc->tp);
998  	return 0;
999  }
1000  
1001  /*
1002   * Free all the attribute fork blocks of the file being repaired and delete the
1003   * fork.  The caller must ILOCK the scrub file and join it to the transaction.
1004   * This function returns with the inode joined to a clean transaction.
1005   */
1006  int
xrep_xattr_reset_fork(struct xfs_scrub * sc)1007  xrep_xattr_reset_fork(
1008  	struct xfs_scrub	*sc)
1009  {
1010  	int			error;
1011  
1012  	trace_xrep_xattr_reset_fork(sc->ip, sc->ip);
1013  
1014  	/* Unmap all the attr blocks. */
1015  	if (xfs_ifork_has_extents(&sc->ip->i_af)) {
1016  		error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK);
1017  		if (error)
1018  			return error;
1019  	}
1020  
1021  	error = xrep_xattr_fork_remove(sc, sc->ip);
1022  	if (error)
1023  		return error;
1024  
1025  	return xfs_trans_roll_inode(&sc->tp, sc->ip);
1026  }
1027  
1028  /*
1029   * Free all the attribute fork blocks of the temporary file and delete the attr
1030   * fork.  The caller must ILOCK the tempfile and join it to the transaction.
1031   * This function returns with the inode joined to a clean scrub transaction.
1032   */
1033  int
xrep_xattr_reset_tempfile_fork(struct xfs_scrub * sc)1034  xrep_xattr_reset_tempfile_fork(
1035  	struct xfs_scrub	*sc)
1036  {
1037  	int			error;
1038  
1039  	trace_xrep_xattr_reset_fork(sc->ip, sc->tempip);
1040  
1041  	/*
1042  	 * Wipe out the attr fork of the temp file so that regular inode
1043  	 * inactivation won't trip over the corrupt attr fork.
1044  	 */
1045  	if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
1046  		error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
1047  		if (error)
1048  			return error;
1049  	}
1050  
1051  	return xrep_xattr_fork_remove(sc, sc->tempip);
1052  }
1053  
1054  /*
1055   * Find all the extended attributes for this inode by scraping them out of the
1056   * attribute key blocks by hand, and flushing them into the temp file.
1057   * When we're done, free the staging memory before exchanging the xattr
1058   * structures to reduce memory usage.
1059   */
1060  STATIC int
xrep_xattr_salvage_attributes(struct xrep_xattr * rx)1061  xrep_xattr_salvage_attributes(
1062  	struct xrep_xattr	*rx)
1063  {
1064  	struct xfs_inode	*ip = rx->sc->ip;
1065  	int			error;
1066  
1067  	/* Short format xattrs are easy! */
1068  	if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
1069  		error = xrep_xattr_recover_sf(rx);
1070  		if (error)
1071  			return error;
1072  
1073  		return xrep_xattr_flush_stashed(rx);
1074  	}
1075  
1076  	/*
1077  	 * For non-inline xattr structures, the salvage function scans the
1078  	 * buffer cache looking for potential attr leaf blocks.  The scan
1079  	 * requires the ability to lock any buffer found and runs independently
1080  	 * of any transaction <-> buffer item <-> buffer linkage.  Therefore,
1081  	 * roll the transaction to ensure there are no buffers joined.  We hold
1082  	 * the ILOCK independently of the transaction.
1083  	 */
1084  	error = xfs_trans_roll(&rx->sc->tp);
1085  	if (error)
1086  		return error;
1087  
1088  	error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
1089  	if (error)
1090  		return error;
1091  
1092  	error = xrep_xattr_recover(rx);
1093  	if (error)
1094  		return error;
1095  
1096  	return xrep_xattr_flush_stashed(rx);
1097  }
1098  
1099  /*
1100   * Add this stashed incore parent pointer to the temporary file.  The caller
1101   * must hold the tempdir's IOLOCK, must not hold any ILOCKs, and must not be in
1102   * transaction context.
1103   */
1104  STATIC int
xrep_xattr_replay_pptr_update(struct xrep_xattr * rx,const struct xfs_name * xname,struct xrep_xattr_pptr * pptr)1105  xrep_xattr_replay_pptr_update(
1106  	struct xrep_xattr		*rx,
1107  	const struct xfs_name		*xname,
1108  	struct xrep_xattr_pptr		*pptr)
1109  {
1110  	struct xfs_scrub		*sc = rx->sc;
1111  	int				error;
1112  
1113  	switch (pptr->action) {
1114  	case XREP_XATTR_PPTR_ADD:
1115  		/* Create parent pointer. */
1116  		trace_xrep_xattr_replay_parentadd(sc->tempip, xname,
1117  				&pptr->pptr_rec);
1118  
1119  		error = xfs_parent_set(sc->tempip, sc->ip->i_ino, xname,
1120  				&pptr->pptr_rec, &rx->pptr_args);
1121  		ASSERT(error != -EEXIST);
1122  		return error;
1123  	case XREP_XATTR_PPTR_REMOVE:
1124  		/* Remove parent pointer. */
1125  		trace_xrep_xattr_replay_parentremove(sc->tempip, xname,
1126  				&pptr->pptr_rec);
1127  
1128  		error = xfs_parent_unset(sc->tempip, sc->ip->i_ino, xname,
1129  				&pptr->pptr_rec, &rx->pptr_args);
1130  		ASSERT(error != -ENOATTR);
1131  		return error;
1132  	}
1133  
1134  	ASSERT(0);
1135  	return -EIO;
1136  }
1137  
1138  /*
1139   * Flush stashed parent pointer updates that have been recorded by the scanner.
1140   * This is done to reduce the memory requirements of the xattr rebuild, since
1141   * files can have a lot of hardlinks and the fs can be busy.
1142   *
1143   * Caller must not hold transactions or ILOCKs.  Caller must hold the tempfile
1144   * IOLOCK.
1145   */
1146  STATIC int
xrep_xattr_replay_pptr_updates(struct xrep_xattr * rx)1147  xrep_xattr_replay_pptr_updates(
1148  	struct xrep_xattr	*rx)
1149  {
1150  	xfarray_idx_t		array_cur;
1151  	int			error;
1152  
1153  	mutex_lock(&rx->lock);
1154  	foreach_xfarray_idx(rx->pptr_recs, array_cur) {
1155  		struct xrep_xattr_pptr	pptr;
1156  
1157  		error = xfarray_load(rx->pptr_recs, array_cur, &pptr);
1158  		if (error)
1159  			goto out_unlock;
1160  
1161  		error = xfblob_loadname(rx->pptr_names, pptr.name_cookie,
1162  				&rx->xname, pptr.namelen);
1163  		if (error)
1164  			goto out_unlock;
1165  		mutex_unlock(&rx->lock);
1166  
1167  		error = xrep_xattr_replay_pptr_update(rx, &rx->xname, &pptr);
1168  		if (error)
1169  			return error;
1170  
1171  		mutex_lock(&rx->lock);
1172  	}
1173  
1174  	/* Empty out both arrays now that we've added the entries. */
1175  	xfarray_truncate(rx->pptr_recs);
1176  	xfblob_truncate(rx->pptr_names);
1177  	mutex_unlock(&rx->lock);
1178  	return 0;
1179  out_unlock:
1180  	mutex_unlock(&rx->lock);
1181  	return error;
1182  }
1183  
1184  /*
1185   * Remember that we want to create a parent pointer in the tempfile.  These
1186   * stashed actions will be replayed later.
1187   */
1188  STATIC int
xrep_xattr_stash_parentadd(struct xrep_xattr * rx,const struct xfs_name * name,const struct xfs_inode * dp)1189  xrep_xattr_stash_parentadd(
1190  	struct xrep_xattr	*rx,
1191  	const struct xfs_name	*name,
1192  	const struct xfs_inode	*dp)
1193  {
1194  	struct xrep_xattr_pptr	pptr = {
1195  		.action		= XREP_XATTR_PPTR_ADD,
1196  		.namelen	= name->len,
1197  	};
1198  	int			error;
1199  
1200  	trace_xrep_xattr_stash_parentadd(rx->sc->tempip, dp, name);
1201  
1202  	xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1203  	error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1204  	if (error)
1205  		return error;
1206  
1207  	return xfarray_append(rx->pptr_recs, &pptr);
1208  }
1209  
1210  /*
1211   * Remember that we want to remove a parent pointer from the tempfile.  These
1212   * stashed actions will be replayed later.
1213   */
1214  STATIC int
xrep_xattr_stash_parentremove(struct xrep_xattr * rx,const struct xfs_name * name,const struct xfs_inode * dp)1215  xrep_xattr_stash_parentremove(
1216  	struct xrep_xattr	*rx,
1217  	const struct xfs_name	*name,
1218  	const struct xfs_inode	*dp)
1219  {
1220  	struct xrep_xattr_pptr	pptr = {
1221  		.action		= XREP_XATTR_PPTR_REMOVE,
1222  		.namelen	= name->len,
1223  	};
1224  	int			error;
1225  
1226  	trace_xrep_xattr_stash_parentremove(rx->sc->tempip, dp, name);
1227  
1228  	xfs_inode_to_parent_rec(&pptr.pptr_rec, dp);
1229  	error = xfblob_storename(rx->pptr_names, &pptr.name_cookie, name);
1230  	if (error)
1231  		return error;
1232  
1233  	return xfarray_append(rx->pptr_recs, &pptr);
1234  }
1235  
1236  /*
1237   * Capture dirent updates being made by other threads.  We will have to replay
1238   * the parent pointer updates before exchanging attr forks.
1239   */
1240  STATIC int
xrep_xattr_live_dirent_update(struct notifier_block * nb,unsigned long action,void * data)1241  xrep_xattr_live_dirent_update(
1242  	struct notifier_block		*nb,
1243  	unsigned long			action,
1244  	void				*data)
1245  {
1246  	struct xfs_dir_update_params	*p = data;
1247  	struct xrep_xattr		*rx;
1248  	struct xfs_scrub		*sc;
1249  	int				error;
1250  
1251  	rx = container_of(nb, struct xrep_xattr, dhook.dirent_hook.nb);
1252  	sc = rx->sc;
1253  
1254  	/*
1255  	 * This thread updated a dirent that points to the file that we're
1256  	 * repairing, so stash the update for replay against the temporary
1257  	 * file.
1258  	 */
1259  	if (p->ip->i_ino != sc->ip->i_ino)
1260  		return NOTIFY_DONE;
1261  
1262  	mutex_lock(&rx->lock);
1263  	if (p->delta > 0)
1264  		error = xrep_xattr_stash_parentadd(rx, p->name, p->dp);
1265  	else
1266  		error = xrep_xattr_stash_parentremove(rx, p->name, p->dp);
1267  	if (error)
1268  		rx->live_update_aborted = true;
1269  	mutex_unlock(&rx->lock);
1270  	return NOTIFY_DONE;
1271  }
1272  
1273  /*
1274   * Prepare both inodes' attribute forks for an exchange.  Promote the tempfile
1275   * from short format to leaf format, and if the file being repaired has a short
1276   * format attr fork, turn it into an empty extent list.
1277   */
1278  STATIC int
xrep_xattr_swap_prep(struct xfs_scrub * sc,bool temp_local,bool ip_local)1279  xrep_xattr_swap_prep(
1280  	struct xfs_scrub	*sc,
1281  	bool			temp_local,
1282  	bool			ip_local)
1283  {
1284  	int			error;
1285  
1286  	/*
1287  	 * If the tempfile's attributes are in shortform format, convert that
1288  	 * to a single leaf extent so that we can use the atomic mapping
1289  	 * exchange.
1290  	 */
1291  	if (temp_local) {
1292  		struct xfs_da_args	args = {
1293  			.dp		= sc->tempip,
1294  			.geo		= sc->mp->m_attr_geo,
1295  			.whichfork	= XFS_ATTR_FORK,
1296  			.trans		= sc->tp,
1297  			.total		= 1,
1298  			.owner		= sc->ip->i_ino,
1299  		};
1300  
1301  		error = xfs_attr_shortform_to_leaf(&args);
1302  		if (error)
1303  			return error;
1304  
1305  		/*
1306  		 * Roll the deferred log items to get us back to a clean
1307  		 * transaction.
1308  		 */
1309  		error = xfs_defer_finish(&sc->tp);
1310  		if (error)
1311  			return error;
1312  	}
1313  
1314  	/*
1315  	 * If the file being repaired had a shortform attribute fork, convert
1316  	 * that to an empty extent list in preparation for the atomic mapping
1317  	 * exchange.
1318  	 */
1319  	if (ip_local) {
1320  		struct xfs_ifork	*ifp;
1321  
1322  		ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
1323  
1324  		xfs_idestroy_fork(ifp);
1325  		ifp->if_format = XFS_DINODE_FMT_EXTENTS;
1326  		ifp->if_nextents = 0;
1327  		ifp->if_bytes = 0;
1328  		ifp->if_data = NULL;
1329  		ifp->if_height = 0;
1330  
1331  		xfs_trans_log_inode(sc->tp, sc->ip,
1332  				XFS_ILOG_CORE | XFS_ILOG_ADATA);
1333  	}
1334  
1335  	return 0;
1336  }
1337  
1338  /* Exchange the temporary file's attribute fork with the one being repaired. */
1339  int
xrep_xattr_swap(struct xfs_scrub * sc,struct xrep_tempexch * tx)1340  xrep_xattr_swap(
1341  	struct xfs_scrub	*sc,
1342  	struct xrep_tempexch	*tx)
1343  {
1344  	bool			ip_local, temp_local;
1345  	int			error = 0;
1346  
1347  	ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1348  	temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
1349  
1350  	/*
1351  	 * If the both files have a local format attr fork and the rebuilt
1352  	 * xattr data would fit in the repaired file's attr fork, just copy
1353  	 * the contents from the tempfile and declare ourselves done.
1354  	 */
1355  	if (ip_local && temp_local) {
1356  		int	forkoff;
1357  		int	newsize;
1358  
1359  		newsize = xfs_attr_sf_totsize(sc->tempip);
1360  		forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
1361  		if (forkoff > 0) {
1362  			sc->ip->i_forkoff = forkoff;
1363  			xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK);
1364  			return 0;
1365  		}
1366  	}
1367  
1368  	/* Otherwise, make sure both attr forks are in block-mapping mode. */
1369  	error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
1370  	if (error)
1371  		return error;
1372  
1373  	return xrep_tempexch_contents(sc, tx);
1374  }
1375  
1376  /*
1377   * Finish replaying stashed parent pointer updates, allocate a transaction for
1378   * exchanging extent mappings, and take the ILOCKs of both files before we
1379   * commit the new extended attribute structure.
1380   */
1381  STATIC int
xrep_xattr_finalize_tempfile(struct xrep_xattr * rx)1382  xrep_xattr_finalize_tempfile(
1383  	struct xrep_xattr	*rx)
1384  {
1385  	struct xfs_scrub	*sc = rx->sc;
1386  	int			error;
1387  
1388  	if (!xfs_has_parent(sc->mp))
1389  		return xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1390  
1391  	/*
1392  	 * Repair relies on the ILOCK to quiesce all possible xattr updates.
1393  	 * Replay all queued parent pointer updates into the tempfile before
1394  	 * exchanging the contents, even if that means dropping the ILOCKs and
1395  	 * the transaction.
1396  	 */
1397  	do {
1398  		error = xrep_xattr_replay_pptr_updates(rx);
1399  		if (error)
1400  			return error;
1401  
1402  		error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rx->tx);
1403  		if (error)
1404  			return error;
1405  
1406  		if (xfarray_length(rx->pptr_recs) == 0)
1407  			break;
1408  
1409  		xchk_trans_cancel(sc);
1410  		xrep_tempfile_iunlock_both(sc);
1411  	} while (!xchk_should_terminate(sc, &error));
1412  	return error;
1413  }
1414  
1415  /*
1416   * Exchange the new extended attribute data (which we created in the tempfile)
1417   * with the file being repaired.
1418   */
1419  STATIC int
xrep_xattr_rebuild_tree(struct xrep_xattr * rx)1420  xrep_xattr_rebuild_tree(
1421  	struct xrep_xattr	*rx)
1422  {
1423  	struct xfs_scrub	*sc = rx->sc;
1424  	int			error;
1425  
1426  	/*
1427  	 * If we didn't find any attributes to salvage, repair the file by
1428  	 * zapping its attr fork.
1429  	 */
1430  	if (rx->attrs_found == 0) {
1431  		xfs_trans_ijoin(sc->tp, sc->ip, 0);
1432  		error = xrep_xattr_reset_fork(sc);
1433  		if (error)
1434  			return error;
1435  
1436  		goto forget_acls;
1437  	}
1438  
1439  	trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip);
1440  
1441  	/*
1442  	 * Commit the repair transaction and drop the ILOCKs so that we can use
1443  	 * the atomic file content exchange helper functions to compute the
1444  	 * correct resource reservations.
1445  	 *
1446  	 * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr
1447  	 * modifications, but there's nothing to prevent userspace from reading
1448  	 * the attributes until we're ready for the exchange operation.  Reads
1449  	 * will return -EIO without shutting down the fs, so we're ok with
1450  	 * that.
1451  	 */
1452  	error = xrep_trans_commit(sc);
1453  	if (error)
1454  		return error;
1455  
1456  	xchk_iunlock(sc, XFS_ILOCK_EXCL);
1457  
1458  	/*
1459  	 * Take the IOLOCK on the temporary file so that we can run xattr
1460  	 * operations with the same locks held as we would for a normal file.
1461  	 * We still hold sc->ip's IOLOCK.
1462  	 */
1463  	error = xrep_tempfile_iolock_polled(rx->sc);
1464  	if (error)
1465  		return error;
1466  
1467  	/*
1468  	 * Allocate transaction, lock inodes, and make sure that we've replayed
1469  	 * all the stashed parent pointer updates to the temp file.  After this
1470  	 * point, we're ready to exchange attr fork mappings.
1471  	 */
1472  	error = xrep_xattr_finalize_tempfile(rx);
1473  	if (error)
1474  		return error;
1475  
1476  	/*
1477  	 * Exchange the blocks mapped by the tempfile's attr fork with the file
1478  	 * being repaired.  The old attr blocks will then be attached to the
1479  	 * tempfile, so reap its attr fork.
1480  	 */
1481  	error = xrep_xattr_swap(sc, &rx->tx);
1482  	if (error)
1483  		return error;
1484  
1485  	error = xrep_xattr_reset_tempfile_fork(sc);
1486  	if (error)
1487  		return error;
1488  
1489  	/*
1490  	 * Roll to get a transaction without any inodes joined to it.  Then we
1491  	 * can drop the tempfile's ILOCK and IOLOCK before doing more work on
1492  	 * the scrub target file.
1493  	 */
1494  	error = xfs_trans_roll(&sc->tp);
1495  	if (error)
1496  		return error;
1497  
1498  	xrep_tempfile_iunlock(sc);
1499  	xrep_tempfile_iounlock(sc);
1500  
1501  forget_acls:
1502  	/* Invalidate cached ACLs now that we've reloaded all the xattrs. */
1503  	xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE);
1504  	xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT);
1505  	return 0;
1506  }
1507  
1508  /* Tear down all the incore scan stuff we created. */
1509  STATIC void
xrep_xattr_teardown(struct xrep_xattr * rx)1510  xrep_xattr_teardown(
1511  	struct xrep_xattr	*rx)
1512  {
1513  	if (xfs_has_parent(rx->sc->mp))
1514  		xfs_dir_hook_del(rx->sc->mp, &rx->dhook);
1515  	if (rx->pptr_names)
1516  		xfblob_destroy(rx->pptr_names);
1517  	if (rx->pptr_recs)
1518  		xfarray_destroy(rx->pptr_recs);
1519  	xfblob_destroy(rx->xattr_blobs);
1520  	xfarray_destroy(rx->xattr_records);
1521  	mutex_destroy(&rx->lock);
1522  	kfree(rx);
1523  }
1524  
1525  /* Set up the filesystem scan so we can regenerate extended attributes. */
1526  STATIC int
xrep_xattr_setup_scan(struct xfs_scrub * sc,struct xrep_xattr ** rxp)1527  xrep_xattr_setup_scan(
1528  	struct xfs_scrub	*sc,
1529  	struct xrep_xattr	**rxp)
1530  {
1531  	struct xrep_xattr	*rx;
1532  	char			*descr;
1533  	int			max_len;
1534  	int			error;
1535  
1536  	rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS);
1537  	if (!rx)
1538  		return -ENOMEM;
1539  	rx->sc = sc;
1540  	rx->can_flush = true;
1541  	rx->xname.name = rx->namebuf;
1542  
1543  	mutex_init(&rx->lock);
1544  
1545  	/*
1546  	 * Allocate enough memory to handle loading local attr values from the
1547  	 * xfblob data while flushing stashed attrs to the temporary file.
1548  	 * We only realloc the buffer when salvaging remote attr values.
1549  	 */
1550  	max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
1551  	error = xchk_setup_xattr_buf(rx->sc, max_len);
1552  	if (error == -ENOMEM)
1553  		error = -EDEADLOCK;
1554  	if (error)
1555  		goto out_rx;
1556  
1557  	/* Set up some staging for salvaged attribute keys and values */
1558  	descr = xchk_xfile_ino_descr(sc, "xattr keys");
1559  	error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
1560  			&rx->xattr_records);
1561  	kfree(descr);
1562  	if (error)
1563  		goto out_rx;
1564  
1565  	descr = xchk_xfile_ino_descr(sc, "xattr names");
1566  	error = xfblob_create(descr, &rx->xattr_blobs);
1567  	kfree(descr);
1568  	if (error)
1569  		goto out_keys;
1570  
1571  	if (xfs_has_parent(sc->mp)) {
1572  		ASSERT(sc->flags & XCHK_FSGATES_DIRENTS);
1573  
1574  		descr = xchk_xfile_ino_descr(sc,
1575  				"xattr retained parent pointer entries");
1576  		error = xfarray_create(descr, 0,
1577  				sizeof(struct xrep_xattr_pptr),
1578  				&rx->pptr_recs);
1579  		kfree(descr);
1580  		if (error)
1581  			goto out_values;
1582  
1583  		descr = xchk_xfile_ino_descr(sc,
1584  				"xattr retained parent pointer names");
1585  		error = xfblob_create(descr, &rx->pptr_names);
1586  		kfree(descr);
1587  		if (error)
1588  			goto out_pprecs;
1589  
1590  		xfs_dir_hook_setup(&rx->dhook, xrep_xattr_live_dirent_update);
1591  		error = xfs_dir_hook_add(sc->mp, &rx->dhook);
1592  		if (error)
1593  			goto out_ppnames;
1594  	}
1595  
1596  	*rxp = rx;
1597  	return 0;
1598  out_ppnames:
1599  	xfblob_destroy(rx->pptr_names);
1600  out_pprecs:
1601  	xfarray_destroy(rx->pptr_recs);
1602  out_values:
1603  	xfblob_destroy(rx->xattr_blobs);
1604  out_keys:
1605  	xfarray_destroy(rx->xattr_records);
1606  out_rx:
1607  	mutex_destroy(&rx->lock);
1608  	kfree(rx);
1609  	return error;
1610  }
1611  
1612  /*
1613   * Repair the extended attribute metadata.
1614   *
1615   * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
1616   * The buffer cache in XFS can't handle aliased multiblock buffers, so this
1617   * might misbehave if the attr fork is crosslinked with other filesystem
1618   * metadata.
1619   */
1620  int
xrep_xattr(struct xfs_scrub * sc)1621  xrep_xattr(
1622  	struct xfs_scrub	*sc)
1623  {
1624  	struct xrep_xattr	*rx = NULL;
1625  	int			error;
1626  
1627  	if (!xfs_inode_hasattr(sc->ip))
1628  		return -ENOENT;
1629  
1630  	/* The rmapbt is required to reap the old attr fork. */
1631  	if (!xfs_has_rmapbt(sc->mp))
1632  		return -EOPNOTSUPP;
1633  	/* We require atomic file exchange range to rebuild anything. */
1634  	if (!xfs_has_exchange_range(sc->mp))
1635  		return -EOPNOTSUPP;
1636  
1637  	error = xrep_xattr_setup_scan(sc, &rx);
1638  	if (error)
1639  		return error;
1640  
1641  	ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
1642  
1643  	error = xrep_xattr_salvage_attributes(rx);
1644  	if (error)
1645  		goto out_scan;
1646  
1647  	if (rx->live_update_aborted) {
1648  		error = -EIO;
1649  		goto out_scan;
1650  	}
1651  
1652  	/* Last chance to abort before we start committing fixes. */
1653  	if (xchk_should_terminate(sc, &error))
1654  		goto out_scan;
1655  
1656  	error = xrep_xattr_rebuild_tree(rx);
1657  	if (error)
1658  		goto out_scan;
1659  
1660  out_scan:
1661  	xrep_xattr_teardown(rx);
1662  	return error;
1663  }
1664