1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_btree.h"
16 #include "xfs_ialloc.h"
17 #include "xfs_ialloc_btree.h"
18 #include "xfs_ag.h"
19 #include "xfs_error.h"
20 #include "xfs_bit.h"
21 #include "xfs_icache.h"
22 #include "scrub/scrub.h"
23 #include "scrub/iscan.h"
24 #include "scrub/common.h"
25 #include "scrub/trace.h"
26 
27 /*
28  * Live File Scan
29  * ==============
30  *
31  * Live file scans walk every inode in a live filesystem.  This is more or
32  * less like a regular iwalk, except that when we're advancing the scan cursor,
33  * we must ensure that inodes cannot be added or deleted anywhere between the
34  * old cursor value and the new cursor value.  If we're advancing the cursor
35  * by one inode, the caller must hold that inode; if we're finding the next
36  * inode to scan, we must grab the AGI and hold it until we've updated the
37  * scan cursor.
38  *
39  * Callers are expected to use this code to scan all files in the filesystem to
40  * construct a new metadata index of some kind.  The scan races against other
41  * live updates, which means there must be a provision to update the new index
42  * when updates are made to inodes that already been scanned.  The iscan lock
43  * can be used in live update hook code to stop the scan and protect this data
44  * structure.
45  *
46  * To keep the new index up to date with other metadata updates being made to
47  * the live filesystem, it is assumed that the caller will add hooks as needed
48  * to be notified when a metadata update occurs.  The inode scanner must tell
49  * the hook code when an inode has been visited with xchk_iscan_mark_visit.
50  * Hook functions can use xchk_iscan_want_live_update to decide if the
51  * scanner's observations must be updated.
52  */
53 
54 /*
55  * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so
56  * that the scan ignores that inode.
57  */
58 STATIC void
xchk_iscan_mask_skipino(struct xchk_iscan * iscan,struct xfs_perag * pag,struct xfs_inobt_rec_incore * rec,xfs_agino_t lastrecino)59 xchk_iscan_mask_skipino(
60 	struct xchk_iscan	*iscan,
61 	struct xfs_perag	*pag,
62 	struct xfs_inobt_rec_incore	*rec,
63 	xfs_agino_t		lastrecino)
64 {
65 	struct xfs_scrub	*sc = iscan->sc;
66 	struct xfs_mount	*mp = sc->mp;
67 	xfs_agnumber_t		skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
68 	xfs_agnumber_t		skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);
69 
70 	if (pag->pag_agno != skip_agno)
71 		return;
72 	if (skip_agino < rec->ir_startino)
73 		return;
74 	if (skip_agino > lastrecino)
75 		return;
76 
77 	rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1);
78 }
79 
80 /*
81  * Set *cursor to the next allocated inode after whatever it's set to now.
82  * If there are no more inodes in this AG, cursor is set to NULLAGINO.
83  */
84 STATIC int
xchk_iscan_find_next(struct xchk_iscan * iscan,struct xfs_buf * agi_bp,struct xfs_perag * pag,xfs_inofree_t * allocmaskp,xfs_agino_t * cursor,uint8_t * nr_inodesp)85 xchk_iscan_find_next(
86 	struct xchk_iscan	*iscan,
87 	struct xfs_buf		*agi_bp,
88 	struct xfs_perag	*pag,
89 	xfs_inofree_t		*allocmaskp,
90 	xfs_agino_t		*cursor,
91 	uint8_t			*nr_inodesp)
92 {
93 	struct xfs_scrub	*sc = iscan->sc;
94 	struct xfs_inobt_rec_incore	rec;
95 	struct xfs_btree_cur	*cur;
96 	struct xfs_mount	*mp = sc->mp;
97 	struct xfs_trans	*tp = sc->tp;
98 	xfs_agnumber_t		agno = pag->pag_agno;
99 	xfs_agino_t		lastino = NULLAGINO;
100 	xfs_agino_t		first, last;
101 	xfs_agino_t		agino = *cursor;
102 	int			has_rec;
103 	int			error;
104 
105 	/* If the cursor is beyond the end of this AG, move to the next one. */
106 	xfs_agino_range(mp, agno, &first, &last);
107 	if (agino > last) {
108 		*cursor = NULLAGINO;
109 		return 0;
110 	}
111 
112 	/*
113 	 * Look up the inode chunk for the current cursor position.  If there
114 	 * is no chunk here, we want the next one.
115 	 */
116 	cur = xfs_inobt_init_cursor(pag, tp, agi_bp);
117 	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec);
118 	if (!error && !has_rec)
119 		error = xfs_btree_increment(cur, 0, &has_rec);
120 	for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) {
121 		xfs_inofree_t	allocmask;
122 
123 		/*
124 		 * If we've run out of inobt records in this AG, move the
125 		 * cursor on to the next AG and exit.  The caller can try
126 		 * again with the next AG.
127 		 */
128 		if (!has_rec) {
129 			*cursor = NULLAGINO;
130 			break;
131 		}
132 
133 		error = xfs_inobt_get_rec(cur, &rec, &has_rec);
134 		if (error)
135 			break;
136 		if (!has_rec) {
137 			error = -EFSCORRUPTED;
138 			break;
139 		}
140 
141 		/* Make sure that we always move forward. */
142 		if (lastino != NULLAGINO &&
143 		    XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) {
144 			error = -EFSCORRUPTED;
145 			break;
146 		}
147 		lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1;
148 
149 		/*
150 		 * If this record only covers inodes that come before the
151 		 * cursor, advance to the next record.
152 		 */
153 		if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
154 			continue;
155 
156 		if (iscan->skip_ino)
157 			xchk_iscan_mask_skipino(iscan, pag, &rec, lastino);
158 
159 		/*
160 		 * If the incoming lookup put us in the middle of an inobt
161 		 * record, mark it and the previous inodes "free" so that the
162 		 * search for allocated inodes will start at the cursor.
163 		 * We don't care about ir_freecount here.
164 		 */
165 		if (agino >= rec.ir_startino)
166 			rec.ir_free |= xfs_inobt_maskn(0,
167 						agino + 1 - rec.ir_startino);
168 
169 		/*
170 		 * If there are allocated inodes in this chunk, find them
171 		 * and update the scan cursor.
172 		 */
173 		allocmask = ~rec.ir_free;
174 		if (hweight64(allocmask) > 0) {
175 			int	next = xfs_lowbit64(allocmask);
176 
177 			ASSERT(next >= 0);
178 			*cursor = rec.ir_startino + next;
179 			*allocmaskp = allocmask >> next;
180 			*nr_inodesp = XFS_INODES_PER_CHUNK - next;
181 			break;
182 		}
183 	}
184 
185 	xfs_btree_del_cursor(cur, error);
186 	return error;
187 }
188 
189 /*
190  * Advance both the scan and the visited cursors.
191  *
192  * The inumber address space for a given filesystem is sparse, which means that
193  * the scan cursor can jump a long ways in a single iter() call.  There are no
194  * inodes in these sparse areas, so we must move the visited cursor forward at
195  * the same time so that the scan user can receive live updates for inodes that
196  * may get created once we release the AGI buffer.
197  */
198 static inline void
xchk_iscan_move_cursor(struct xchk_iscan * iscan,xfs_agnumber_t agno,xfs_agino_t agino)199 xchk_iscan_move_cursor(
200 	struct xchk_iscan	*iscan,
201 	xfs_agnumber_t		agno,
202 	xfs_agino_t		agino)
203 {
204 	struct xfs_scrub	*sc = iscan->sc;
205 	struct xfs_mount	*mp = sc->mp;
206 	xfs_ino_t		cursor, visited;
207 
208 	BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
209 
210 	/*
211 	 * Special-case ino == 0 here so that we never set visited_ino to
212 	 * NULLFSINO when wrapping around EOFS, for that will let through all
213 	 * live updates.
214 	 */
215 	cursor = XFS_AGINO_TO_INO(mp, agno, agino);
216 	if (cursor == 0)
217 		visited = XFS_MAXINUMBER;
218 	else
219 		visited = cursor - 1;
220 
221 	mutex_lock(&iscan->lock);
222 	iscan->cursor_ino = cursor;
223 	iscan->__visited_ino = visited;
224 	trace_xchk_iscan_move_cursor(iscan);
225 	mutex_unlock(&iscan->lock);
226 }
227 
228 /*
229  * Prepare to return agno/agino to the iscan caller by moving the lastino
230  * cursor to the previous inode.  Do this while we still hold the AGI so that
231  * no other threads can create or delete inodes in this AG.
232  */
233 static inline void
xchk_iscan_finish(struct xchk_iscan * iscan)234 xchk_iscan_finish(
235 	struct xchk_iscan	*iscan)
236 {
237 	mutex_lock(&iscan->lock);
238 	iscan->cursor_ino = NULLFSINO;
239 
240 	/* All live updates will be applied from now on */
241 	iscan->__visited_ino = NULLFSINO;
242 
243 	mutex_unlock(&iscan->lock);
244 }
245 
246 /* Mark an inode scan finished before we actually scan anything. */
247 void
xchk_iscan_finish_early(struct xchk_iscan * iscan)248 xchk_iscan_finish_early(
249 	struct xchk_iscan	*iscan)
250 {
251 	ASSERT(iscan->cursor_ino == iscan->scan_start_ino);
252 	ASSERT(iscan->__visited_ino == iscan->scan_start_ino);
253 
254 	xchk_iscan_finish(iscan);
255 }
256 
257 /*
258  * Grab the AGI to advance the inode scan.  Returns 0 if *agi_bpp is now set,
259  * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed,
260  * or the usual negative errno.
261  */
262 STATIC int
xchk_iscan_read_agi(struct xchk_iscan * iscan,struct xfs_perag * pag,struct xfs_buf ** agi_bpp)263 xchk_iscan_read_agi(
264 	struct xchk_iscan	*iscan,
265 	struct xfs_perag	*pag,
266 	struct xfs_buf		**agi_bpp)
267 {
268 	struct xfs_scrub	*sc = iscan->sc;
269 	unsigned long		relax;
270 	int			ret;
271 
272 	if (!xchk_iscan_agi_needs_trylock(iscan))
273 		return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp);
274 
275 	relax = msecs_to_jiffies(iscan->iget_retry_delay);
276 	do {
277 		ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK,
278 				agi_bpp);
279 		if (ret != -EAGAIN)
280 			return ret;
281 		if (!iscan->iget_timeout ||
282 		    time_is_before_jiffies(iscan->__iget_deadline))
283 			return -EBUSY;
284 
285 		trace_xchk_iscan_agi_retry_wait(iscan);
286 	} while (!schedule_timeout_killable(relax) &&
287 		 !xchk_iscan_aborted(iscan));
288 	return -ECANCELED;
289 }
290 
291 /*
292  * Advance ino to the next inode that the inobt thinks is allocated, being
293  * careful to jump to the next AG if we've reached the right end of this AG's
294  * inode btree.  Advancing ino effectively means that we've pushed the inode
295  * scan forward, so set the iscan cursor to (ino - 1) so that our live update
296  * predicates will track inode allocations in that part of the inode number
297  * key space once we release the AGI buffer.
298  *
299  * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
300  * -ECANCELED if the live scan aborted, or the usual negative errno.
301  */
302 STATIC int
xchk_iscan_advance(struct xchk_iscan * iscan,struct xfs_perag ** pagp,struct xfs_buf ** agi_bpp,xfs_inofree_t * allocmaskp,uint8_t * nr_inodesp)303 xchk_iscan_advance(
304 	struct xchk_iscan	*iscan,
305 	struct xfs_perag	**pagp,
306 	struct xfs_buf		**agi_bpp,
307 	xfs_inofree_t		*allocmaskp,
308 	uint8_t			*nr_inodesp)
309 {
310 	struct xfs_scrub	*sc = iscan->sc;
311 	struct xfs_mount	*mp = sc->mp;
312 	struct xfs_buf		*agi_bp;
313 	struct xfs_perag	*pag;
314 	xfs_agnumber_t		agno;
315 	xfs_agino_t		agino;
316 	int			ret;
317 
318 	ASSERT(iscan->cursor_ino >= iscan->__visited_ino);
319 
320 	do {
321 		if (xchk_iscan_aborted(iscan))
322 			return -ECANCELED;
323 
324 		agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino);
325 		pag = xfs_perag_get(mp, agno);
326 		if (!pag)
327 			return -ECANCELED;
328 
329 		ret = xchk_iscan_read_agi(iscan, pag, &agi_bp);
330 		if (ret)
331 			goto out_pag;
332 
333 		agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
334 		ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp,
335 				&agino, nr_inodesp);
336 		if (ret)
337 			goto out_buf;
338 
339 		if (agino != NULLAGINO) {
340 			/*
341 			 * Found the next inode in this AG, so return it along
342 			 * with the AGI buffer and the perag structure to
343 			 * ensure it cannot go away.
344 			 */
345 			xchk_iscan_move_cursor(iscan, agno, agino);
346 			*agi_bpp = agi_bp;
347 			*pagp = pag;
348 			return 1;
349 		}
350 
351 		/*
352 		 * Did not find any more inodes in this AG, move on to the next
353 		 * AG.
354 		 */
355 		agno = (agno + 1) % mp->m_sb.sb_agcount;
356 		xchk_iscan_move_cursor(iscan, agno, 0);
357 		xfs_trans_brelse(sc->tp, agi_bp);
358 		xfs_perag_put(pag);
359 
360 		trace_xchk_iscan_advance_ag(iscan);
361 	} while (iscan->cursor_ino != iscan->scan_start_ino);
362 
363 	xchk_iscan_finish(iscan);
364 	return 0;
365 
366 out_buf:
367 	xfs_trans_brelse(sc->tp, agi_bp);
368 out_pag:
369 	xfs_perag_put(pag);
370 	return ret;
371 }
372 
373 /*
374  * Grabbing the inode failed, so we need to back up the scan and ask the caller
375  * to try to _advance the scan again.  Returns -EBUSY if we've run out of retry
376  * opportunities, -ECANCELED if the process has a fatal signal pending, or
377  * -EAGAIN if we should try again.
378  */
379 STATIC int
xchk_iscan_iget_retry(struct xchk_iscan * iscan,bool wait)380 xchk_iscan_iget_retry(
381 	struct xchk_iscan	*iscan,
382 	bool			wait)
383 {
384 	ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1);
385 
386 	if (!iscan->iget_timeout ||
387 	    time_is_before_jiffies(iscan->__iget_deadline))
388 		return -EBUSY;
389 
390 	if (wait) {
391 		unsigned long	relax;
392 
393 		/*
394 		 * Sleep for a period of time to let the rest of the system
395 		 * catch up.  If we return early, someone sent a kill signal to
396 		 * the calling process.
397 		 */
398 		relax = msecs_to_jiffies(iscan->iget_retry_delay);
399 		trace_xchk_iscan_iget_retry_wait(iscan);
400 
401 		if (schedule_timeout_killable(relax) ||
402 		    xchk_iscan_aborted(iscan))
403 			return -ECANCELED;
404 	}
405 
406 	iscan->cursor_ino--;
407 	return -EAGAIN;
408 }
409 
410 /*
411  * For an inode scan, we hold the AGI and want to try to grab a batch of
412  * inodes.  Holding the AGI prevents inodegc from clearing freed inodes,
413  * so we must use noretry here.  For every inode after the first one in the
414  * batch, we don't want to wait, so we use retry there too.  Finally, use
415  * dontcache to avoid polluting the cache.
416  */
417 #define ISCAN_IGET_FLAGS	(XFS_IGET_NORETRY | XFS_IGET_DONTCACHE)
418 
419 /*
420  * Grab an inode as part of an inode scan.  While scanning this inode, the
421  * caller must ensure that no other threads can modify the inode until a call
422  * to xchk_iscan_visit succeeds.
423  *
424  * Returns the number of incore inodes grabbed; -EAGAIN if the caller should
425  * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode;
426  * -ECANCELED if there's a fatal signal pending; or some other negative errno.
427  */
428 STATIC int
xchk_iscan_iget(struct xchk_iscan * iscan,struct xfs_perag * pag,struct xfs_buf * agi_bp,xfs_inofree_t allocmask,uint8_t nr_inodes)429 xchk_iscan_iget(
430 	struct xchk_iscan	*iscan,
431 	struct xfs_perag	*pag,
432 	struct xfs_buf		*agi_bp,
433 	xfs_inofree_t		allocmask,
434 	uint8_t			nr_inodes)
435 {
436 	struct xfs_scrub	*sc = iscan->sc;
437 	struct xfs_mount	*mp = sc->mp;
438 	xfs_ino_t		ino = iscan->cursor_ino;
439 	unsigned int		idx = 0;
440 	unsigned int		i;
441 	int			error;
442 
443 	ASSERT(iscan->__inodes[0] == NULL);
444 
445 	/* Fill the first slot in the inode array. */
446 	error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
447 			&iscan->__inodes[idx]);
448 
449 	trace_xchk_iscan_iget(iscan, error);
450 
451 	if (error == -ENOENT || error == -EAGAIN) {
452 		xfs_trans_brelse(sc->tp, agi_bp);
453 		xfs_perag_put(pag);
454 
455 		/*
456 		 * It's possible that this inode has lost all of its links but
457 		 * hasn't yet been inactivated.  If we don't have a transaction
458 		 * or it's not writable, flush the inodegc workers and wait.
459 		 * If we have a non-empty transaction, we must not block on
460 		 * inodegc, which allocates its own transactions.
461 		 */
462 		if (sc->tp && !(sc->tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
463 			xfs_inodegc_push(mp);
464 		else
465 			xfs_inodegc_flush(mp);
466 		return xchk_iscan_iget_retry(iscan, true);
467 	}
468 
469 	if (error == -EINVAL) {
470 		xfs_trans_brelse(sc->tp, agi_bp);
471 		xfs_perag_put(pag);
472 
473 		/*
474 		 * We thought the inode was allocated, but the inode btree
475 		 * lookup failed, which means that it was freed since the last
476 		 * time we advanced the cursor.  Back up and try again.  This
477 		 * should never happen since still hold the AGI buffer from the
478 		 * inobt check, but we need to be careful about infinite loops.
479 		 */
480 		return xchk_iscan_iget_retry(iscan, false);
481 	}
482 
483 	if (error) {
484 		xfs_trans_brelse(sc->tp, agi_bp);
485 		xfs_perag_put(pag);
486 		return error;
487 	}
488 	idx++;
489 	ino++;
490 	allocmask >>= 1;
491 
492 	/*
493 	 * Now that we've filled the first slot in __inodes, try to fill the
494 	 * rest of the batch with consecutively ordered inodes.  to reduce the
495 	 * number of _iter calls.  Make a bitmap of unallocated inodes from the
496 	 * zeroes in the inuse bitmap; these inodes will not be scanned, but
497 	 * the _want_live_update predicate will pass through all live updates.
498 	 *
499 	 * If we can't iget an allocated inode, stop and return what we have.
500 	 */
501 	mutex_lock(&iscan->lock);
502 	iscan->__batch_ino = ino - 1;
503 	iscan->__skipped_inomask = 0;
504 	mutex_unlock(&iscan->lock);
505 
506 	for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) {
507 		if (!(allocmask & 1)) {
508 			ASSERT(!(iscan->__skipped_inomask & (1ULL << i)));
509 
510 			mutex_lock(&iscan->lock);
511 			iscan->cursor_ino = ino;
512 			iscan->__skipped_inomask |= (1ULL << i);
513 			mutex_unlock(&iscan->lock);
514 			continue;
515 		}
516 
517 		ASSERT(iscan->__inodes[idx] == NULL);
518 
519 		error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
520 				&iscan->__inodes[idx]);
521 		if (error)
522 			break;
523 
524 		mutex_lock(&iscan->lock);
525 		iscan->cursor_ino = ino;
526 		mutex_unlock(&iscan->lock);
527 		idx++;
528 	}
529 
530 	trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx);
531 	xfs_trans_brelse(sc->tp, agi_bp);
532 	xfs_perag_put(pag);
533 	return idx;
534 }
535 
536 /*
537  * Advance the visit cursor to reflect skipped inodes beyond whatever we
538  * scanned.
539  */
540 STATIC void
xchk_iscan_finish_batch(struct xchk_iscan * iscan)541 xchk_iscan_finish_batch(
542 	struct xchk_iscan	*iscan)
543 {
544 	xfs_ino_t		highest_skipped;
545 
546 	mutex_lock(&iscan->lock);
547 
548 	if (iscan->__batch_ino != NULLFSINO) {
549 		highest_skipped = iscan->__batch_ino +
550 					xfs_highbit64(iscan->__skipped_inomask);
551 		iscan->__visited_ino = max(iscan->__visited_ino,
552 					   highest_skipped);
553 
554 		trace_xchk_iscan_skip(iscan);
555 	}
556 
557 	iscan->__batch_ino = NULLFSINO;
558 	iscan->__skipped_inomask = 0;
559 
560 	mutex_unlock(&iscan->lock);
561 }
562 
563 /*
564  * Advance the inode scan cursor to the next allocated inode and return up to
565  * 64 consecutive allocated inodes starting with the cursor position.
566  */
567 STATIC int
xchk_iscan_iter_batch(struct xchk_iscan * iscan)568 xchk_iscan_iter_batch(
569 	struct xchk_iscan	*iscan)
570 {
571 	struct xfs_scrub	*sc = iscan->sc;
572 	int			ret;
573 
574 	xchk_iscan_finish_batch(iscan);
575 
576 	if (iscan->iget_timeout)
577 		iscan->__iget_deadline = jiffies +
578 					 msecs_to_jiffies(iscan->iget_timeout);
579 
580 	do {
581 		struct xfs_buf	*agi_bp = NULL;
582 		struct xfs_perag *pag = NULL;
583 		xfs_inofree_t	allocmask = 0;
584 		uint8_t		nr_inodes = 0;
585 
586 		ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask,
587 				&nr_inodes);
588 		if (ret != 1)
589 			return ret;
590 
591 		if (xchk_iscan_aborted(iscan)) {
592 			xfs_trans_brelse(sc->tp, agi_bp);
593 			xfs_perag_put(pag);
594 			ret = -ECANCELED;
595 			break;
596 		}
597 
598 		ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes);
599 	} while (ret == -EAGAIN);
600 
601 	return ret;
602 }
603 
604 /*
605  * Advance the inode scan cursor to the next allocated inode and return the
606  * incore inode structure associated with it.
607  *
608  * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
609  * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be
610  * grabbed, or the usual negative errno.
611  *
612  * If the function returns -EBUSY and the caller can handle skipping an inode,
613  * it may call this function again to continue the scan with the next allocated
614  * inode.
615  */
616 int
xchk_iscan_iter(struct xchk_iscan * iscan,struct xfs_inode ** ipp)617 xchk_iscan_iter(
618 	struct xchk_iscan	*iscan,
619 	struct xfs_inode	**ipp)
620 {
621 	unsigned int		i;
622 	int			error;
623 
624 	/* Find a cached inode, or go get another batch. */
625 	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
626 		if (iscan->__inodes[i])
627 			goto foundit;
628 	}
629 
630 	error = xchk_iscan_iter_batch(iscan);
631 	if (error <= 0)
632 		return error;
633 
634 	ASSERT(iscan->__inodes[0] != NULL);
635 	i = 0;
636 
637 foundit:
638 	/* Give the caller our reference. */
639 	*ipp = iscan->__inodes[i];
640 	iscan->__inodes[i] = NULL;
641 	return 1;
642 }
643 
644 /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */
645 void
xchk_iscan_iter_finish(struct xchk_iscan * iscan)646 xchk_iscan_iter_finish(
647 	struct xchk_iscan	*iscan)
648 {
649 	struct xfs_scrub	*sc = iscan->sc;
650 	unsigned int		i;
651 
652 	for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
653 		if (iscan->__inodes[i]) {
654 			xchk_irele(sc, iscan->__inodes[i]);
655 			iscan->__inodes[i] = NULL;
656 		}
657 	}
658 }
659 
660 /* Mark this inode scan finished and release resources. */
661 void
xchk_iscan_teardown(struct xchk_iscan * iscan)662 xchk_iscan_teardown(
663 	struct xchk_iscan	*iscan)
664 {
665 	xchk_iscan_iter_finish(iscan);
666 	xchk_iscan_finish(iscan);
667 	mutex_destroy(&iscan->lock);
668 }
669 
670 /* Pick an AG from which to start a scan. */
671 static inline xfs_ino_t
xchk_iscan_rotor(struct xfs_mount * mp)672 xchk_iscan_rotor(
673 	struct xfs_mount	*mp)
674 {
675 	static atomic_t		agi_rotor;
676 	unsigned int		r = atomic_inc_return(&agi_rotor) - 1;
677 
678 	/*
679 	 * Rotoring *backwards* through the AGs, so we add one here before
680 	 * subtracting from the agcount to arrive at an AG number.
681 	 */
682 	r = (r % mp->m_sb.sb_agcount) + 1;
683 
684 	return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
685 }
686 
687 /*
688  * Set ourselves up to start an inode scan.  If the @iget_timeout and
689  * @iget_retry_delay parameters are set, the scan will try to iget each inode
690  * for @iget_timeout milliseconds.  If an iget call indicates that the inode is
691  * waiting to be inactivated, the CPU will relax for @iget_retry_delay
692  * milliseconds after pushing the inactivation workers.
693  */
694 void
xchk_iscan_start(struct xfs_scrub * sc,unsigned int iget_timeout,unsigned int iget_retry_delay,struct xchk_iscan * iscan)695 xchk_iscan_start(
696 	struct xfs_scrub	*sc,
697 	unsigned int		iget_timeout,
698 	unsigned int		iget_retry_delay,
699 	struct xchk_iscan	*iscan)
700 {
701 	xfs_ino_t		start_ino;
702 
703 	start_ino = xchk_iscan_rotor(sc->mp);
704 
705 	iscan->__batch_ino = NULLFSINO;
706 	iscan->__skipped_inomask = 0;
707 
708 	iscan->sc = sc;
709 	clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
710 	iscan->iget_timeout = iget_timeout;
711 	iscan->iget_retry_delay = iget_retry_delay;
712 	iscan->__visited_ino = start_ino;
713 	iscan->cursor_ino = start_ino;
714 	iscan->scan_start_ino = start_ino;
715 	mutex_init(&iscan->lock);
716 	memset(iscan->__inodes, 0, sizeof(iscan->__inodes));
717 
718 	trace_xchk_iscan_start(iscan, start_ino);
719 }
720 
721 /*
722  * Mark this inode as having been visited.  Callers must hold a sufficiently
723  * exclusive lock on the inode to prevent concurrent modifications.
724  */
725 void
xchk_iscan_mark_visited(struct xchk_iscan * iscan,struct xfs_inode * ip)726 xchk_iscan_mark_visited(
727 	struct xchk_iscan	*iscan,
728 	struct xfs_inode	*ip)
729 {
730 	mutex_lock(&iscan->lock);
731 	iscan->__visited_ino = ip->i_ino;
732 	trace_xchk_iscan_visit(iscan);
733 	mutex_unlock(&iscan->lock);
734 }
735 
736 /*
737  * Did we skip this inode because it wasn't allocated when we loaded the batch?
738  * If so, it is newly allocated and will not be scanned.  All live updates to
739  * this inode must be passed to the caller to maintain scan correctness.
740  */
741 static inline bool
xchk_iscan_skipped(const struct xchk_iscan * iscan,xfs_ino_t ino)742 xchk_iscan_skipped(
743 	const struct xchk_iscan	*iscan,
744 	xfs_ino_t		ino)
745 {
746 	if (iscan->__batch_ino == NULLFSINO)
747 		return false;
748 	if (ino < iscan->__batch_ino)
749 		return false;
750 	if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK)
751 		return false;
752 
753 	return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino));
754 }
755 
756 /*
757  * Do we need a live update for this inode?  This is true if the scanner thread
758  * has visited this inode and the scan hasn't been aborted due to errors.
759  * Callers must hold a sufficiently exclusive lock on the inode to prevent
760  * scanners from reading any inode metadata.
761  */
762 bool
xchk_iscan_want_live_update(struct xchk_iscan * iscan,xfs_ino_t ino)763 xchk_iscan_want_live_update(
764 	struct xchk_iscan	*iscan,
765 	xfs_ino_t		ino)
766 {
767 	bool			ret = false;
768 
769 	if (xchk_iscan_aborted(iscan))
770 		return false;
771 
772 	mutex_lock(&iscan->lock);
773 
774 	trace_xchk_iscan_want_live_update(iscan, ino);
775 
776 	/* Scan is finished, caller should receive all updates. */
777 	if (iscan->__visited_ino == NULLFSINO) {
778 		ret = true;
779 		goto unlock;
780 	}
781 
782 	/*
783 	 * No inodes have been visited yet, so the visited cursor points at the
784 	 * start of the scan range.  The caller should not receive any updates.
785 	 */
786 	if (iscan->scan_start_ino == iscan->__visited_ino) {
787 		ret = false;
788 		goto unlock;
789 	}
790 
791 	/*
792 	 * This inode was not allocated at the time of the iscan batch.
793 	 * The caller should receive all updates.
794 	 */
795 	if (xchk_iscan_skipped(iscan, ino)) {
796 		ret = true;
797 		goto unlock;
798 	}
799 
800 	/*
801 	 * The visited cursor hasn't yet wrapped around the end of the FS.  If
802 	 * @ino is inside the starred range, the caller should receive updates:
803 	 *
804 	 * 0 ------------ S ************ V ------------ EOFS
805 	 */
806 	if (iscan->scan_start_ino <= iscan->__visited_ino) {
807 		if (ino >= iscan->scan_start_ino &&
808 		    ino <= iscan->__visited_ino)
809 			ret = true;
810 
811 		goto unlock;
812 	}
813 
814 	/*
815 	 * The visited cursor wrapped around the end of the FS.  If @ino is
816 	 * inside the starred range, the caller should receive updates:
817 	 *
818 	 * 0 ************ V ------------ S ************ EOFS
819 	 */
820 	if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
821 		ret = true;
822 
823 unlock:
824 	mutex_unlock(&iscan->lock);
825 	return ret;
826 }
827