1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4   * Author: Darrick J. Wong <djwong@kernel.org>
5   */
6  #include "xfs.h"
7  #include "xfs_fs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_trans_resv.h"
11  #include "xfs_mount.h"
12  #include "xfs_btree.h"
13  #include "xfs_log_format.h"
14  #include "xfs_trans.h"
15  #include "xfs_inode.h"
16  #include "xfs_ialloc.h"
17  #include "xfs_ialloc_btree.h"
18  #include "xfs_icache.h"
19  #include "xfs_rmap.h"
20  #include "scrub/scrub.h"
21  #include "scrub/common.h"
22  #include "scrub/btree.h"
23  #include "scrub/trace.h"
24  #include "xfs_ag.h"
25  
26  /*
27   * Set us up to scrub inode btrees.
28   * If we detect a discrepancy between the inobt and the inode,
29   * try again after forcing logged inode cores out to disk.
30   */
31  int
xchk_setup_ag_iallocbt(struct xfs_scrub * sc)32  xchk_setup_ag_iallocbt(
33  	struct xfs_scrub	*sc)
34  {
35  	if (xchk_need_intent_drain(sc))
36  		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
37  	return xchk_setup_ag_btree(sc, sc->flags & XCHK_TRY_HARDER);
38  }
39  
40  /* Inode btree scrubber. */
41  
42  struct xchk_iallocbt {
43  	/* Number of inodes we see while scanning inobt. */
44  	unsigned long long	inodes;
45  
46  	/* Expected next startino, for big block filesystems. */
47  	xfs_agino_t		next_startino;
48  
49  	/* Expected end of the current inode cluster. */
50  	xfs_agino_t		next_cluster_ino;
51  };
52  
53  /*
54   * Does the finobt have a record for this inode with the same hole/free state?
55   * This is a bit complicated because of the following:
56   *
57   * - The finobt need not have a record if all inodes in the inobt record are
58   *   allocated.
59   * - The finobt need not have a record if all inodes in the inobt record are
60   *   free.
61   * - The finobt need not have a record if the inobt record says this is a hole.
62   *   This likely doesn't happen in practice.
63   */
64  STATIC int
xchk_inobt_xref_finobt(struct xfs_scrub * sc,struct xfs_inobt_rec_incore * irec,xfs_agino_t agino,bool free,bool hole)65  xchk_inobt_xref_finobt(
66  	struct xfs_scrub	*sc,
67  	struct xfs_inobt_rec_incore *irec,
68  	xfs_agino_t		agino,
69  	bool			free,
70  	bool			hole)
71  {
72  	struct xfs_inobt_rec_incore frec;
73  	struct xfs_btree_cur	*cur = sc->sa.fino_cur;
74  	bool			ffree, fhole;
75  	unsigned int		frec_idx, fhole_idx;
76  	int			has_record;
77  	int			error;
78  
79  	ASSERT(xfs_btree_is_fino(cur->bc_ops));
80  
81  	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_record);
82  	if (error)
83  		return error;
84  	if (!has_record)
85  		goto no_record;
86  
87  	error = xfs_inobt_get_rec(cur, &frec, &has_record);
88  	if (!has_record)
89  		return -EFSCORRUPTED;
90  
91  	if (frec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
92  		goto no_record;
93  
94  	/* There's a finobt record; free and hole status must match. */
95  	frec_idx = agino - frec.ir_startino;
96  	ffree = frec.ir_free & (1ULL << frec_idx);
97  	fhole_idx = frec_idx / XFS_INODES_PER_HOLEMASK_BIT;
98  	fhole = frec.ir_holemask & (1U << fhole_idx);
99  
100  	if (ffree != free)
101  		xchk_btree_xref_set_corrupt(sc, cur, 0);
102  	if (fhole != hole)
103  		xchk_btree_xref_set_corrupt(sc, cur, 0);
104  	return 0;
105  
106  no_record:
107  	/* inobt record is fully allocated */
108  	if (irec->ir_free == 0)
109  		return 0;
110  
111  	/* inobt record is totally unallocated */
112  	if (irec->ir_free == XFS_INOBT_ALL_FREE)
113  		return 0;
114  
115  	/* inobt record says this is a hole */
116  	if (hole)
117  		return 0;
118  
119  	/* finobt doesn't care about allocated inodes */
120  	if (!free)
121  		return 0;
122  
123  	xchk_btree_xref_set_corrupt(sc, cur, 0);
124  	return 0;
125  }
126  
127  /*
128   * Make sure that each inode of this part of an inobt record has the same
129   * sparse and free status as the finobt.
130   */
131  STATIC void
xchk_inobt_chunk_xref_finobt(struct xfs_scrub * sc,struct xfs_inobt_rec_incore * irec,xfs_agino_t agino,unsigned int nr_inodes)132  xchk_inobt_chunk_xref_finobt(
133  	struct xfs_scrub		*sc,
134  	struct xfs_inobt_rec_incore	*irec,
135  	xfs_agino_t			agino,
136  	unsigned int			nr_inodes)
137  {
138  	xfs_agino_t			i;
139  	unsigned int			rec_idx;
140  	int				error;
141  
142  	ASSERT(sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT);
143  
144  	if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm))
145  		return;
146  
147  	for (i = agino, rec_idx = agino - irec->ir_startino;
148  	     i < agino + nr_inodes;
149  	     i++, rec_idx++) {
150  		bool			free, hole;
151  		unsigned int		hole_idx;
152  
153  		free = irec->ir_free & (1ULL << rec_idx);
154  		hole_idx = rec_idx / XFS_INODES_PER_HOLEMASK_BIT;
155  		hole = irec->ir_holemask & (1U << hole_idx);
156  
157  		error = xchk_inobt_xref_finobt(sc, irec, i, free, hole);
158  		if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur))
159  			return;
160  	}
161  }
162  
163  /*
164   * Does the inobt have a record for this inode with the same hole/free state?
165   * The inobt must always have a record if there's a finobt record.
166   */
167  STATIC int
xchk_finobt_xref_inobt(struct xfs_scrub * sc,struct xfs_inobt_rec_incore * frec,xfs_agino_t agino,bool ffree,bool fhole)168  xchk_finobt_xref_inobt(
169  	struct xfs_scrub	*sc,
170  	struct xfs_inobt_rec_incore *frec,
171  	xfs_agino_t		agino,
172  	bool			ffree,
173  	bool			fhole)
174  {
175  	struct xfs_inobt_rec_incore irec;
176  	struct xfs_btree_cur	*cur = sc->sa.ino_cur;
177  	bool			free, hole;
178  	unsigned int		rec_idx, hole_idx;
179  	int			has_record;
180  	int			error;
181  
182  	ASSERT(xfs_btree_is_ino(cur->bc_ops));
183  
184  	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_record);
185  	if (error)
186  		return error;
187  	if (!has_record)
188  		goto no_record;
189  
190  	error = xfs_inobt_get_rec(cur, &irec, &has_record);
191  	if (!has_record)
192  		return -EFSCORRUPTED;
193  
194  	if (irec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
195  		goto no_record;
196  
197  	/* There's an inobt record; free and hole status must match. */
198  	rec_idx = agino - irec.ir_startino;
199  	free = irec.ir_free & (1ULL << rec_idx);
200  	hole_idx = rec_idx / XFS_INODES_PER_HOLEMASK_BIT;
201  	hole = irec.ir_holemask & (1U << hole_idx);
202  
203  	if (ffree != free)
204  		xchk_btree_xref_set_corrupt(sc, cur, 0);
205  	if (fhole != hole)
206  		xchk_btree_xref_set_corrupt(sc, cur, 0);
207  	return 0;
208  
209  no_record:
210  	/* finobt should never have a record for which the inobt does not */
211  	xchk_btree_xref_set_corrupt(sc, cur, 0);
212  	return 0;
213  }
214  
215  /*
216   * Make sure that each inode of this part of an finobt record has the same
217   * sparse and free status as the inobt.
218   */
219  STATIC void
xchk_finobt_chunk_xref_inobt(struct xfs_scrub * sc,struct xfs_inobt_rec_incore * frec,xfs_agino_t agino,unsigned int nr_inodes)220  xchk_finobt_chunk_xref_inobt(
221  	struct xfs_scrub		*sc,
222  	struct xfs_inobt_rec_incore	*frec,
223  	xfs_agino_t			agino,
224  	unsigned int			nr_inodes)
225  {
226  	xfs_agino_t			i;
227  	unsigned int			rec_idx;
228  	int				error;
229  
230  	ASSERT(sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT);
231  
232  	if (!sc->sa.ino_cur || xchk_skip_xref(sc->sm))
233  		return;
234  
235  	for (i = agino, rec_idx = agino - frec->ir_startino;
236  	     i < agino + nr_inodes;
237  	     i++, rec_idx++) {
238  		bool			ffree, fhole;
239  		unsigned int		hole_idx;
240  
241  		ffree = frec->ir_free & (1ULL << rec_idx);
242  		hole_idx = rec_idx / XFS_INODES_PER_HOLEMASK_BIT;
243  		fhole = frec->ir_holemask & (1U << hole_idx);
244  
245  		error = xchk_finobt_xref_inobt(sc, frec, i, ffree, fhole);
246  		if (!xchk_should_check_xref(sc, &error, &sc->sa.ino_cur))
247  			return;
248  	}
249  }
250  
251  /* Is this chunk worth checking and cross-referencing? */
252  STATIC bool
xchk_iallocbt_chunk(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec,xfs_agino_t agino,unsigned int nr_inodes)253  xchk_iallocbt_chunk(
254  	struct xchk_btree		*bs,
255  	struct xfs_inobt_rec_incore	*irec,
256  	xfs_agino_t			agino,
257  	unsigned int			nr_inodes)
258  {
259  	struct xfs_scrub		*sc = bs->sc;
260  	struct xfs_mount		*mp = bs->cur->bc_mp;
261  	struct xfs_perag		*pag = bs->cur->bc_ag.pag;
262  	xfs_agblock_t			agbno;
263  	xfs_extlen_t			len;
264  
265  	agbno = XFS_AGINO_TO_AGBNO(mp, agino);
266  	len = XFS_B_TO_FSB(mp, nr_inodes * mp->m_sb.sb_inodesize);
267  
268  	if (!xfs_verify_agbext(pag, agbno, len))
269  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
270  
271  	if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
272  		return false;
273  
274  	xchk_xref_is_used_space(sc, agbno, len);
275  	if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT)
276  		xchk_inobt_chunk_xref_finobt(sc, irec, agino, nr_inodes);
277  	else
278  		xchk_finobt_chunk_xref_inobt(sc, irec, agino, nr_inodes);
279  	xchk_xref_is_only_owned_by(sc, agbno, len, &XFS_RMAP_OINFO_INODES);
280  	xchk_xref_is_not_shared(sc, agbno, len);
281  	xchk_xref_is_not_cow_staging(sc, agbno, len);
282  	return true;
283  }
284  
285  /*
286   * Check that an inode's allocation status matches ir_free in the inobt
287   * record.  First we try querying the in-core inode state, and if the inode
288   * isn't loaded we examine the on-disk inode directly.
289   *
290   * Since there can be 1:M and M:1 mappings between inobt records and inode
291   * clusters, we pass in the inode location information as an inobt record;
292   * the index of an inode cluster within the inobt record (as well as the
293   * cluster buffer itself); and the index of the inode within the cluster.
294   *
295   * @irec is the inobt record.
296   * @irec_ino is the inode offset from the start of the record.
297   * @dip is the on-disk inode.
298   */
299  STATIC int
xchk_iallocbt_check_cluster_ifree(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec,unsigned int irec_ino,struct xfs_dinode * dip)300  xchk_iallocbt_check_cluster_ifree(
301  	struct xchk_btree		*bs,
302  	struct xfs_inobt_rec_incore	*irec,
303  	unsigned int			irec_ino,
304  	struct xfs_dinode		*dip)
305  {
306  	struct xfs_mount		*mp = bs->cur->bc_mp;
307  	xfs_ino_t			fsino;
308  	xfs_agino_t			agino;
309  	bool				irec_free;
310  	bool				ino_inuse;
311  	bool				freemask_ok;
312  	int				error = 0;
313  
314  	if (xchk_should_terminate(bs->sc, &error))
315  		return error;
316  
317  	/*
318  	 * Given an inobt record and the offset of an inode from the start of
319  	 * the record, compute which fs inode we're talking about.
320  	 */
321  	agino = irec->ir_startino + irec_ino;
322  	fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_ag.pag->pag_agno, agino);
323  	irec_free = (irec->ir_free & XFS_INOBT_MASK(irec_ino));
324  
325  	if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
326  	    (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)) {
327  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
328  		goto out;
329  	}
330  
331  	error = xchk_inode_is_allocated(bs->sc, agino, &ino_inuse);
332  	if (error == -ENODATA) {
333  		/* Not cached, just read the disk buffer */
334  		freemask_ok = irec_free ^ !!(dip->di_mode);
335  		if (!(bs->sc->flags & XCHK_TRY_HARDER) && !freemask_ok)
336  			return -EDEADLOCK;
337  	} else if (error < 0) {
338  		/*
339  		 * Inode is only half assembled, or there was an IO error,
340  		 * or the verifier failed, so don't bother trying to check.
341  		 * The inode scrubber can deal with this.
342  		 */
343  		goto out;
344  	} else {
345  		/* Inode is all there. */
346  		freemask_ok = irec_free ^ ino_inuse;
347  	}
348  	if (!freemask_ok)
349  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
350  out:
351  	return 0;
352  }
353  
354  /*
355   * Check that the holemask and freemask of a hypothetical inode cluster match
356   * what's actually on disk.  If sparse inodes are enabled, the cluster does
357   * not actually have to map to inodes if the corresponding holemask bit is set.
358   *
359   * @cluster_base is the first inode in the cluster within the @irec.
360   */
361  STATIC int
xchk_iallocbt_check_cluster(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec,unsigned int cluster_base)362  xchk_iallocbt_check_cluster(
363  	struct xchk_btree		*bs,
364  	struct xfs_inobt_rec_incore	*irec,
365  	unsigned int			cluster_base)
366  {
367  	struct xfs_imap			imap;
368  	struct xfs_mount		*mp = bs->cur->bc_mp;
369  	struct xfs_buf			*cluster_bp;
370  	unsigned int			nr_inodes;
371  	xfs_agnumber_t			agno = bs->cur->bc_ag.pag->pag_agno;
372  	xfs_agblock_t			agbno;
373  	unsigned int			cluster_index;
374  	uint16_t			cluster_mask = 0;
375  	uint16_t			ir_holemask;
376  	int				error = 0;
377  
378  	nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
379  			M_IGEO(mp)->inodes_per_cluster);
380  
381  	/* Map this inode cluster */
382  	agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
383  
384  	/* Compute a bitmask for this cluster that can be used for holemask. */
385  	for (cluster_index = 0;
386  	     cluster_index < nr_inodes;
387  	     cluster_index += XFS_INODES_PER_HOLEMASK_BIT)
388  		cluster_mask |= XFS_INOBT_MASK((cluster_base + cluster_index) /
389  				XFS_INODES_PER_HOLEMASK_BIT);
390  
391  	/*
392  	 * Map the first inode of this cluster to a buffer and offset.
393  	 * Be careful about inobt records that don't align with the start of
394  	 * the inode buffer when block sizes are large enough to hold multiple
395  	 * inode chunks.  When this happens, cluster_base will be zero but
396  	 * ir_startino can be large enough to make im_boffset nonzero.
397  	 */
398  	ir_holemask = (irec->ir_holemask & cluster_mask);
399  	imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
400  	imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
401  	imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
402  			mp->m_sb.sb_inodelog;
403  
404  	if (imap.im_boffset != 0 && cluster_base != 0) {
405  		ASSERT(imap.im_boffset == 0 || cluster_base == 0);
406  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
407  		return 0;
408  	}
409  
410  	trace_xchk_iallocbt_check_cluster(mp, agno, irec->ir_startino,
411  			imap.im_blkno, imap.im_len, cluster_base, nr_inodes,
412  			cluster_mask, ir_holemask,
413  			XFS_INO_TO_OFFSET(mp, irec->ir_startino +
414  					  cluster_base));
415  
416  	/* The whole cluster must be a hole or not a hole. */
417  	if (ir_holemask != cluster_mask && ir_holemask != 0) {
418  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
419  		return 0;
420  	}
421  
422  	/* If any part of this is a hole, skip it. */
423  	if (ir_holemask) {
424  		xchk_xref_is_not_owned_by(bs->sc, agbno,
425  				M_IGEO(mp)->blocks_per_cluster,
426  				&XFS_RMAP_OINFO_INODES);
427  		return 0;
428  	}
429  
430  	xchk_xref_is_only_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster,
431  			&XFS_RMAP_OINFO_INODES);
432  
433  	/* Grab the inode cluster buffer. */
434  	error = xfs_imap_to_bp(mp, bs->cur->bc_tp, &imap, &cluster_bp);
435  	if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 0, &error))
436  		return error;
437  
438  	/* Check free status of each inode within this cluster. */
439  	for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) {
440  		struct xfs_dinode	*dip;
441  
442  		if (imap.im_boffset >= BBTOB(cluster_bp->b_length)) {
443  			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
444  			break;
445  		}
446  
447  		dip = xfs_buf_offset(cluster_bp, imap.im_boffset);
448  		error = xchk_iallocbt_check_cluster_ifree(bs, irec,
449  				cluster_base + cluster_index, dip);
450  		if (error)
451  			break;
452  		imap.im_boffset += mp->m_sb.sb_inodesize;
453  	}
454  
455  	xfs_trans_brelse(bs->cur->bc_tp, cluster_bp);
456  	return error;
457  }
458  
459  /*
460   * For all the inode clusters that could map to this inobt record, make sure
461   * that the holemask makes sense and that the allocation status of each inode
462   * matches the freemask.
463   */
464  STATIC int
xchk_iallocbt_check_clusters(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec)465  xchk_iallocbt_check_clusters(
466  	struct xchk_btree		*bs,
467  	struct xfs_inobt_rec_incore	*irec)
468  {
469  	unsigned int			cluster_base;
470  	int				error = 0;
471  
472  	/*
473  	 * For the common case where this inobt record maps to multiple inode
474  	 * clusters this will call _check_cluster for each cluster.
475  	 *
476  	 * For the case that multiple inobt records map to a single cluster,
477  	 * this will call _check_cluster once.
478  	 */
479  	for (cluster_base = 0;
480  	     cluster_base < XFS_INODES_PER_CHUNK;
481  	     cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) {
482  		error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
483  		if (error)
484  			break;
485  	}
486  
487  	return error;
488  }
489  
490  /*
491   * Make sure this inode btree record is aligned properly.  Because a fs block
492   * contains multiple inodes, we check that the inobt record is aligned to the
493   * correct inode, not just the correct block on disk.  This results in a finer
494   * grained corruption check.
495   */
496  STATIC void
xchk_iallocbt_rec_alignment(struct xchk_btree * bs,struct xfs_inobt_rec_incore * irec)497  xchk_iallocbt_rec_alignment(
498  	struct xchk_btree		*bs,
499  	struct xfs_inobt_rec_incore	*irec)
500  {
501  	struct xfs_mount		*mp = bs->sc->mp;
502  	struct xchk_iallocbt		*iabt = bs->private;
503  	struct xfs_ino_geometry		*igeo = M_IGEO(mp);
504  
505  	/*
506  	 * finobt records have different positioning requirements than inobt
507  	 * records: each finobt record must have a corresponding inobt record.
508  	 * That is checked in the xref function, so for now we only catch the
509  	 * obvious case where the record isn't at all aligned properly.
510  	 *
511  	 * Note that if a fs block contains more than a single chunk of inodes,
512  	 * we will have finobt records only for those chunks containing free
513  	 * inodes, and therefore expect chunk alignment of finobt records.
514  	 * Otherwise, we expect that the finobt record is aligned to the
515  	 * cluster alignment as told by the superblock.
516  	 */
517  	if (xfs_btree_is_fino(bs->cur->bc_ops)) {
518  		unsigned int	imask;
519  
520  		imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
521  				igeo->cluster_align_inodes) - 1;
522  		if (irec->ir_startino & imask)
523  			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
524  		return;
525  	}
526  
527  	if (iabt->next_startino != NULLAGINO) {
528  		/*
529  		 * We're midway through a cluster of inodes that is mapped by
530  		 * multiple inobt records.  Did we get the record for the next
531  		 * irec in the sequence?
532  		 */
533  		if (irec->ir_startino != iabt->next_startino) {
534  			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
535  			return;
536  		}
537  
538  		iabt->next_startino += XFS_INODES_PER_CHUNK;
539  
540  		/* Are we done with the cluster? */
541  		if (iabt->next_startino >= iabt->next_cluster_ino) {
542  			iabt->next_startino = NULLAGINO;
543  			iabt->next_cluster_ino = NULLAGINO;
544  		}
545  		return;
546  	}
547  
548  	/* inobt records must be aligned to cluster and inoalignmnt size. */
549  	if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) {
550  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
551  		return;
552  	}
553  
554  	if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) {
555  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
556  		return;
557  	}
558  
559  	if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK)
560  		return;
561  
562  	/*
563  	 * If this is the start of an inode cluster that can be mapped by
564  	 * multiple inobt records, the next inobt record must follow exactly
565  	 * after this one.
566  	 */
567  	iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
568  	iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster;
569  }
570  
571  /* Scrub an inobt/finobt record. */
572  STATIC int
xchk_iallocbt_rec(struct xchk_btree * bs,const union xfs_btree_rec * rec)573  xchk_iallocbt_rec(
574  	struct xchk_btree		*bs,
575  	const union xfs_btree_rec	*rec)
576  {
577  	struct xfs_mount		*mp = bs->cur->bc_mp;
578  	struct xchk_iallocbt		*iabt = bs->private;
579  	struct xfs_inobt_rec_incore	irec;
580  	uint64_t			holes;
581  	xfs_agino_t			agino;
582  	int				holecount;
583  	int				i;
584  	int				error = 0;
585  	uint16_t			holemask;
586  
587  	xfs_inobt_btrec_to_irec(mp, rec, &irec);
588  	if (xfs_inobt_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
589  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
590  		return 0;
591  	}
592  
593  	agino = irec.ir_startino;
594  
595  	xchk_iallocbt_rec_alignment(bs, &irec);
596  	if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
597  		goto out;
598  
599  	iabt->inodes += irec.ir_count;
600  
601  	/* Handle non-sparse inodes */
602  	if (!xfs_inobt_issparse(irec.ir_holemask)) {
603  		if (irec.ir_count != XFS_INODES_PER_CHUNK)
604  			xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
605  
606  		if (!xchk_iallocbt_chunk(bs, &irec, agino,
607  					XFS_INODES_PER_CHUNK))
608  			goto out;
609  		goto check_clusters;
610  	}
611  
612  	/* Check each chunk of a sparse inode cluster. */
613  	holemask = irec.ir_holemask;
614  	holecount = 0;
615  	holes = ~xfs_inobt_irec_to_allocmask(&irec);
616  	if ((holes & irec.ir_free) != holes ||
617  	    irec.ir_freecount > irec.ir_count)
618  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
619  
620  	for (i = 0; i < XFS_INOBT_HOLEMASK_BITS; i++) {
621  		if (holemask & 1)
622  			holecount += XFS_INODES_PER_HOLEMASK_BIT;
623  		else if (!xchk_iallocbt_chunk(bs, &irec, agino,
624  					XFS_INODES_PER_HOLEMASK_BIT))
625  			goto out;
626  		holemask >>= 1;
627  		agino += XFS_INODES_PER_HOLEMASK_BIT;
628  	}
629  
630  	if (holecount > XFS_INODES_PER_CHUNK ||
631  	    holecount + irec.ir_count != XFS_INODES_PER_CHUNK)
632  		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
633  
634  check_clusters:
635  	if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
636  		goto out;
637  
638  	error = xchk_iallocbt_check_clusters(bs, &irec);
639  	if (error)
640  		goto out;
641  
642  out:
643  	return error;
644  }
645  
646  /*
647   * Make sure the inode btrees are as large as the rmap thinks they are.
648   * Don't bother if we're missing btree cursors, as we're already corrupt.
649   */
650  STATIC void
xchk_iallocbt_xref_rmap_btreeblks(struct xfs_scrub * sc)651  xchk_iallocbt_xref_rmap_btreeblks(
652  	struct xfs_scrub	*sc)
653  {
654  	xfs_filblks_t		blocks;
655  	xfs_extlen_t		inobt_blocks = 0;
656  	xfs_extlen_t		finobt_blocks = 0;
657  	int			error;
658  
659  	if (!sc->sa.ino_cur || !sc->sa.rmap_cur ||
660  	    (xfs_has_finobt(sc->mp) && !sc->sa.fino_cur) ||
661  	    xchk_skip_xref(sc->sm))
662  		return;
663  
664  	/* Check that we saw as many inobt blocks as the rmap says. */
665  	error = xfs_btree_count_blocks(sc->sa.ino_cur, &inobt_blocks);
666  	if (!xchk_process_error(sc, 0, 0, &error))
667  		return;
668  
669  	if (sc->sa.fino_cur) {
670  		error = xfs_btree_count_blocks(sc->sa.fino_cur, &finobt_blocks);
671  		if (!xchk_process_error(sc, 0, 0, &error))
672  			return;
673  	}
674  
675  	error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
676  			&XFS_RMAP_OINFO_INOBT, &blocks);
677  	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
678  		return;
679  	if (blocks != inobt_blocks + finobt_blocks)
680  		xchk_btree_set_corrupt(sc, sc->sa.ino_cur, 0);
681  }
682  
683  /*
684   * Make sure that the inobt records point to the same number of blocks as
685   * the rmap says are owned by inodes.
686   */
687  STATIC void
xchk_iallocbt_xref_rmap_inodes(struct xfs_scrub * sc,unsigned long long inodes)688  xchk_iallocbt_xref_rmap_inodes(
689  	struct xfs_scrub	*sc,
690  	unsigned long long	inodes)
691  {
692  	xfs_filblks_t		blocks;
693  	xfs_filblks_t		inode_blocks;
694  	int			error;
695  
696  	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
697  		return;
698  
699  	/* Check that we saw as many inode blocks as the rmap knows about. */
700  	error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
701  			&XFS_RMAP_OINFO_INODES, &blocks);
702  	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
703  		return;
704  	inode_blocks = XFS_B_TO_FSB(sc->mp, inodes * sc->mp->m_sb.sb_inodesize);
705  	if (blocks != inode_blocks)
706  		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
707  }
708  
709  /* Scrub one of the inode btrees for some AG. */
710  int
xchk_iallocbt(struct xfs_scrub * sc)711  xchk_iallocbt(
712  	struct xfs_scrub	*sc)
713  {
714  	struct xfs_btree_cur	*cur;
715  	struct xchk_iallocbt	iabt = {
716  		.inodes		= 0,
717  		.next_startino	= NULLAGINO,
718  		.next_cluster_ino = NULLAGINO,
719  	};
720  	int			error;
721  
722  	switch (sc->sm->sm_type) {
723  	case XFS_SCRUB_TYPE_INOBT:
724  		cur = sc->sa.ino_cur;
725  		break;
726  	case XFS_SCRUB_TYPE_FINOBT:
727  		cur = sc->sa.fino_cur;
728  		break;
729  	default:
730  		ASSERT(0);
731  		return -EIO;
732  	}
733  
734  	error = xchk_btree(sc, cur, xchk_iallocbt_rec, &XFS_RMAP_OINFO_INOBT,
735  			&iabt);
736  	if (error)
737  		return error;
738  
739  	xchk_iallocbt_xref_rmap_btreeblks(sc);
740  
741  	/*
742  	 * If we're scrubbing the inode btree, inode_blocks is the number of
743  	 * blocks pointed to by all the inode chunk records.  Therefore, we
744  	 * should compare to the number of inode chunk blocks that the rmap
745  	 * knows about.  We can't do this for the finobt since it only points
746  	 * to inode chunks with free inodes.
747  	 */
748  	if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT)
749  		xchk_iallocbt_xref_rmap_inodes(sc, iabt.inodes);
750  	return error;
751  }
752  
753  /* See if an inode btree has (or doesn't have) an inode chunk record. */
754  static inline void
xchk_xref_inode_check(struct xfs_scrub * sc,xfs_agblock_t agbno,xfs_extlen_t len,struct xfs_btree_cur ** icur,enum xbtree_recpacking expected)755  xchk_xref_inode_check(
756  	struct xfs_scrub	*sc,
757  	xfs_agblock_t		agbno,
758  	xfs_extlen_t		len,
759  	struct xfs_btree_cur	**icur,
760  	enum xbtree_recpacking	expected)
761  {
762  	enum xbtree_recpacking	outcome;
763  	int			error;
764  
765  	if (!(*icur) || xchk_skip_xref(sc->sm))
766  		return;
767  
768  	error = xfs_ialloc_has_inodes_at_extent(*icur, agbno, len, &outcome);
769  	if (!xchk_should_check_xref(sc, &error, icur))
770  		return;
771  	if (outcome != expected)
772  		xchk_btree_xref_set_corrupt(sc, *icur, 0);
773  }
774  
775  /* xref check that the extent is not covered by inodes */
776  void
xchk_xref_is_not_inode_chunk(struct xfs_scrub * sc,xfs_agblock_t agbno,xfs_extlen_t len)777  xchk_xref_is_not_inode_chunk(
778  	struct xfs_scrub	*sc,
779  	xfs_agblock_t		agbno,
780  	xfs_extlen_t		len)
781  {
782  	xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur,
783  			XBTREE_RECPACKING_EMPTY);
784  	xchk_xref_inode_check(sc, agbno, len, &sc->sa.fino_cur,
785  			XBTREE_RECPACKING_EMPTY);
786  }
787  
788  /* xref check that the extent is covered by inodes */
789  void
xchk_xref_is_inode_chunk(struct xfs_scrub * sc,xfs_agblock_t agbno,xfs_extlen_t len)790  xchk_xref_is_inode_chunk(
791  	struct xfs_scrub	*sc,
792  	xfs_agblock_t		agbno,
793  	xfs_extlen_t		len)
794  {
795  	xchk_xref_inode_check(sc, agbno, len, &sc->sa.ino_cur,
796  			XBTREE_RECPACKING_FULL);
797  }
798