1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_trans_resv.h"
11 #include "xfs_mount.h"
12 #include "xfs_log_format.h"
13 #include "xfs_trans.h"
14 #include "xfs_inode.h"
15 #include "xfs_btree.h"
16 #include "xfs_ialloc.h"
17 #include "xfs_ialloc_btree.h"
18 #include "xfs_ag.h"
19 #include "xfs_error.h"
20 #include "xfs_bit.h"
21 #include "xfs_icache.h"
22 #include "scrub/scrub.h"
23 #include "scrub/iscan.h"
24 #include "scrub/common.h"
25 #include "scrub/trace.h"
26
27 /*
28 * Live File Scan
29 * ==============
30 *
31 * Live file scans walk every inode in a live filesystem. This is more or
32 * less like a regular iwalk, except that when we're advancing the scan cursor,
33 * we must ensure that inodes cannot be added or deleted anywhere between the
34 * old cursor value and the new cursor value. If we're advancing the cursor
35 * by one inode, the caller must hold that inode; if we're finding the next
36 * inode to scan, we must grab the AGI and hold it until we've updated the
37 * scan cursor.
38 *
39 * Callers are expected to use this code to scan all files in the filesystem to
40 * construct a new metadata index of some kind. The scan races against other
41 * live updates, which means there must be a provision to update the new index
42 * when updates are made to inodes that already been scanned. The iscan lock
43 * can be used in live update hook code to stop the scan and protect this data
44 * structure.
45 *
46 * To keep the new index up to date with other metadata updates being made to
47 * the live filesystem, it is assumed that the caller will add hooks as needed
48 * to be notified when a metadata update occurs. The inode scanner must tell
49 * the hook code when an inode has been visited with xchk_iscan_mark_visit.
50 * Hook functions can use xchk_iscan_want_live_update to decide if the
51 * scanner's observations must be updated.
52 */
53
54 /*
55 * If the inobt record @rec covers @iscan->skip_ino, mark the inode free so
56 * that the scan ignores that inode.
57 */
58 STATIC void
xchk_iscan_mask_skipino(struct xchk_iscan * iscan,struct xfs_perag * pag,struct xfs_inobt_rec_incore * rec,xfs_agino_t lastrecino)59 xchk_iscan_mask_skipino(
60 struct xchk_iscan *iscan,
61 struct xfs_perag *pag,
62 struct xfs_inobt_rec_incore *rec,
63 xfs_agino_t lastrecino)
64 {
65 struct xfs_scrub *sc = iscan->sc;
66 struct xfs_mount *mp = sc->mp;
67 xfs_agnumber_t skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
68 xfs_agnumber_t skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);
69
70 if (pag->pag_agno != skip_agno)
71 return;
72 if (skip_agino < rec->ir_startino)
73 return;
74 if (skip_agino > lastrecino)
75 return;
76
77 rec->ir_free |= xfs_inobt_maskn(skip_agino - rec->ir_startino, 1);
78 }
79
80 /*
81 * Set *cursor to the next allocated inode after whatever it's set to now.
82 * If there are no more inodes in this AG, cursor is set to NULLAGINO.
83 */
84 STATIC int
xchk_iscan_find_next(struct xchk_iscan * iscan,struct xfs_buf * agi_bp,struct xfs_perag * pag,xfs_inofree_t * allocmaskp,xfs_agino_t * cursor,uint8_t * nr_inodesp)85 xchk_iscan_find_next(
86 struct xchk_iscan *iscan,
87 struct xfs_buf *agi_bp,
88 struct xfs_perag *pag,
89 xfs_inofree_t *allocmaskp,
90 xfs_agino_t *cursor,
91 uint8_t *nr_inodesp)
92 {
93 struct xfs_scrub *sc = iscan->sc;
94 struct xfs_inobt_rec_incore rec;
95 struct xfs_btree_cur *cur;
96 struct xfs_mount *mp = sc->mp;
97 struct xfs_trans *tp = sc->tp;
98 xfs_agnumber_t agno = pag->pag_agno;
99 xfs_agino_t lastino = NULLAGINO;
100 xfs_agino_t first, last;
101 xfs_agino_t agino = *cursor;
102 int has_rec;
103 int error;
104
105 /* If the cursor is beyond the end of this AG, move to the next one. */
106 xfs_agino_range(mp, agno, &first, &last);
107 if (agino > last) {
108 *cursor = NULLAGINO;
109 return 0;
110 }
111
112 /*
113 * Look up the inode chunk for the current cursor position. If there
114 * is no chunk here, we want the next one.
115 */
116 cur = xfs_inobt_init_cursor(pag, tp, agi_bp);
117 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec);
118 if (!error && !has_rec)
119 error = xfs_btree_increment(cur, 0, &has_rec);
120 for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) {
121 xfs_inofree_t allocmask;
122
123 /*
124 * If we've run out of inobt records in this AG, move the
125 * cursor on to the next AG and exit. The caller can try
126 * again with the next AG.
127 */
128 if (!has_rec) {
129 *cursor = NULLAGINO;
130 break;
131 }
132
133 error = xfs_inobt_get_rec(cur, &rec, &has_rec);
134 if (error)
135 break;
136 if (!has_rec) {
137 error = -EFSCORRUPTED;
138 break;
139 }
140
141 /* Make sure that we always move forward. */
142 if (lastino != NULLAGINO &&
143 XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) {
144 error = -EFSCORRUPTED;
145 break;
146 }
147 lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1;
148
149 /*
150 * If this record only covers inodes that come before the
151 * cursor, advance to the next record.
152 */
153 if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
154 continue;
155
156 if (iscan->skip_ino)
157 xchk_iscan_mask_skipino(iscan, pag, &rec, lastino);
158
159 /*
160 * If the incoming lookup put us in the middle of an inobt
161 * record, mark it and the previous inodes "free" so that the
162 * search for allocated inodes will start at the cursor.
163 * We don't care about ir_freecount here.
164 */
165 if (agino >= rec.ir_startino)
166 rec.ir_free |= xfs_inobt_maskn(0,
167 agino + 1 - rec.ir_startino);
168
169 /*
170 * If there are allocated inodes in this chunk, find them
171 * and update the scan cursor.
172 */
173 allocmask = ~rec.ir_free;
174 if (hweight64(allocmask) > 0) {
175 int next = xfs_lowbit64(allocmask);
176
177 ASSERT(next >= 0);
178 *cursor = rec.ir_startino + next;
179 *allocmaskp = allocmask >> next;
180 *nr_inodesp = XFS_INODES_PER_CHUNK - next;
181 break;
182 }
183 }
184
185 xfs_btree_del_cursor(cur, error);
186 return error;
187 }
188
189 /*
190 * Advance both the scan and the visited cursors.
191 *
192 * The inumber address space for a given filesystem is sparse, which means that
193 * the scan cursor can jump a long ways in a single iter() call. There are no
194 * inodes in these sparse areas, so we must move the visited cursor forward at
195 * the same time so that the scan user can receive live updates for inodes that
196 * may get created once we release the AGI buffer.
197 */
198 static inline void
xchk_iscan_move_cursor(struct xchk_iscan * iscan,xfs_agnumber_t agno,xfs_agino_t agino)199 xchk_iscan_move_cursor(
200 struct xchk_iscan *iscan,
201 xfs_agnumber_t agno,
202 xfs_agino_t agino)
203 {
204 struct xfs_scrub *sc = iscan->sc;
205 struct xfs_mount *mp = sc->mp;
206 xfs_ino_t cursor, visited;
207
208 BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
209
210 /*
211 * Special-case ino == 0 here so that we never set visited_ino to
212 * NULLFSINO when wrapping around EOFS, for that will let through all
213 * live updates.
214 */
215 cursor = XFS_AGINO_TO_INO(mp, agno, agino);
216 if (cursor == 0)
217 visited = XFS_MAXINUMBER;
218 else
219 visited = cursor - 1;
220
221 mutex_lock(&iscan->lock);
222 iscan->cursor_ino = cursor;
223 iscan->__visited_ino = visited;
224 trace_xchk_iscan_move_cursor(iscan);
225 mutex_unlock(&iscan->lock);
226 }
227
228 /*
229 * Prepare to return agno/agino to the iscan caller by moving the lastino
230 * cursor to the previous inode. Do this while we still hold the AGI so that
231 * no other threads can create or delete inodes in this AG.
232 */
233 static inline void
xchk_iscan_finish(struct xchk_iscan * iscan)234 xchk_iscan_finish(
235 struct xchk_iscan *iscan)
236 {
237 mutex_lock(&iscan->lock);
238 iscan->cursor_ino = NULLFSINO;
239
240 /* All live updates will be applied from now on */
241 iscan->__visited_ino = NULLFSINO;
242
243 mutex_unlock(&iscan->lock);
244 }
245
246 /* Mark an inode scan finished before we actually scan anything. */
247 void
xchk_iscan_finish_early(struct xchk_iscan * iscan)248 xchk_iscan_finish_early(
249 struct xchk_iscan *iscan)
250 {
251 ASSERT(iscan->cursor_ino == iscan->scan_start_ino);
252 ASSERT(iscan->__visited_ino == iscan->scan_start_ino);
253
254 xchk_iscan_finish(iscan);
255 }
256
257 /*
258 * Grab the AGI to advance the inode scan. Returns 0 if *agi_bpp is now set,
259 * -ECANCELED if the live scan aborted, -EBUSY if the AGI could not be grabbed,
260 * or the usual negative errno.
261 */
262 STATIC int
xchk_iscan_read_agi(struct xchk_iscan * iscan,struct xfs_perag * pag,struct xfs_buf ** agi_bpp)263 xchk_iscan_read_agi(
264 struct xchk_iscan *iscan,
265 struct xfs_perag *pag,
266 struct xfs_buf **agi_bpp)
267 {
268 struct xfs_scrub *sc = iscan->sc;
269 unsigned long relax;
270 int ret;
271
272 if (!xchk_iscan_agi_needs_trylock(iscan))
273 return xfs_ialloc_read_agi(pag, sc->tp, 0, agi_bpp);
274
275 relax = msecs_to_jiffies(iscan->iget_retry_delay);
276 do {
277 ret = xfs_ialloc_read_agi(pag, sc->tp, XFS_IALLOC_FLAG_TRYLOCK,
278 agi_bpp);
279 if (ret != -EAGAIN)
280 return ret;
281 if (!iscan->iget_timeout ||
282 time_is_before_jiffies(iscan->__iget_deadline))
283 return -EBUSY;
284
285 trace_xchk_iscan_agi_retry_wait(iscan);
286 } while (!schedule_timeout_killable(relax) &&
287 !xchk_iscan_aborted(iscan));
288 return -ECANCELED;
289 }
290
291 /*
292 * Advance ino to the next inode that the inobt thinks is allocated, being
293 * careful to jump to the next AG if we've reached the right end of this AG's
294 * inode btree. Advancing ino effectively means that we've pushed the inode
295 * scan forward, so set the iscan cursor to (ino - 1) so that our live update
296 * predicates will track inode allocations in that part of the inode number
297 * key space once we release the AGI buffer.
298 *
299 * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
300 * -ECANCELED if the live scan aborted, or the usual negative errno.
301 */
302 STATIC int
xchk_iscan_advance(struct xchk_iscan * iscan,struct xfs_perag ** pagp,struct xfs_buf ** agi_bpp,xfs_inofree_t * allocmaskp,uint8_t * nr_inodesp)303 xchk_iscan_advance(
304 struct xchk_iscan *iscan,
305 struct xfs_perag **pagp,
306 struct xfs_buf **agi_bpp,
307 xfs_inofree_t *allocmaskp,
308 uint8_t *nr_inodesp)
309 {
310 struct xfs_scrub *sc = iscan->sc;
311 struct xfs_mount *mp = sc->mp;
312 struct xfs_buf *agi_bp;
313 struct xfs_perag *pag;
314 xfs_agnumber_t agno;
315 xfs_agino_t agino;
316 int ret;
317
318 ASSERT(iscan->cursor_ino >= iscan->__visited_ino);
319
320 do {
321 if (xchk_iscan_aborted(iscan))
322 return -ECANCELED;
323
324 agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino);
325 pag = xfs_perag_get(mp, agno);
326 if (!pag)
327 return -ECANCELED;
328
329 ret = xchk_iscan_read_agi(iscan, pag, &agi_bp);
330 if (ret)
331 goto out_pag;
332
333 agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
334 ret = xchk_iscan_find_next(iscan, agi_bp, pag, allocmaskp,
335 &agino, nr_inodesp);
336 if (ret)
337 goto out_buf;
338
339 if (agino != NULLAGINO) {
340 /*
341 * Found the next inode in this AG, so return it along
342 * with the AGI buffer and the perag structure to
343 * ensure it cannot go away.
344 */
345 xchk_iscan_move_cursor(iscan, agno, agino);
346 *agi_bpp = agi_bp;
347 *pagp = pag;
348 return 1;
349 }
350
351 /*
352 * Did not find any more inodes in this AG, move on to the next
353 * AG.
354 */
355 agno = (agno + 1) % mp->m_sb.sb_agcount;
356 xchk_iscan_move_cursor(iscan, agno, 0);
357 xfs_trans_brelse(sc->tp, agi_bp);
358 xfs_perag_put(pag);
359
360 trace_xchk_iscan_advance_ag(iscan);
361 } while (iscan->cursor_ino != iscan->scan_start_ino);
362
363 xchk_iscan_finish(iscan);
364 return 0;
365
366 out_buf:
367 xfs_trans_brelse(sc->tp, agi_bp);
368 out_pag:
369 xfs_perag_put(pag);
370 return ret;
371 }
372
373 /*
374 * Grabbing the inode failed, so we need to back up the scan and ask the caller
375 * to try to _advance the scan again. Returns -EBUSY if we've run out of retry
376 * opportunities, -ECANCELED if the process has a fatal signal pending, or
377 * -EAGAIN if we should try again.
378 */
379 STATIC int
xchk_iscan_iget_retry(struct xchk_iscan * iscan,bool wait)380 xchk_iscan_iget_retry(
381 struct xchk_iscan *iscan,
382 bool wait)
383 {
384 ASSERT(iscan->cursor_ino == iscan->__visited_ino + 1);
385
386 if (!iscan->iget_timeout ||
387 time_is_before_jiffies(iscan->__iget_deadline))
388 return -EBUSY;
389
390 if (wait) {
391 unsigned long relax;
392
393 /*
394 * Sleep for a period of time to let the rest of the system
395 * catch up. If we return early, someone sent a kill signal to
396 * the calling process.
397 */
398 relax = msecs_to_jiffies(iscan->iget_retry_delay);
399 trace_xchk_iscan_iget_retry_wait(iscan);
400
401 if (schedule_timeout_killable(relax) ||
402 xchk_iscan_aborted(iscan))
403 return -ECANCELED;
404 }
405
406 iscan->cursor_ino--;
407 return -EAGAIN;
408 }
409
410 /*
411 * For an inode scan, we hold the AGI and want to try to grab a batch of
412 * inodes. Holding the AGI prevents inodegc from clearing freed inodes,
413 * so we must use noretry here. For every inode after the first one in the
414 * batch, we don't want to wait, so we use retry there too. Finally, use
415 * dontcache to avoid polluting the cache.
416 */
417 #define ISCAN_IGET_FLAGS (XFS_IGET_NORETRY | XFS_IGET_DONTCACHE)
418
419 /*
420 * Grab an inode as part of an inode scan. While scanning this inode, the
421 * caller must ensure that no other threads can modify the inode until a call
422 * to xchk_iscan_visit succeeds.
423 *
424 * Returns the number of incore inodes grabbed; -EAGAIN if the caller should
425 * call again xchk_iscan_advance; -EBUSY if we couldn't grab an inode;
426 * -ECANCELED if there's a fatal signal pending; or some other negative errno.
427 */
428 STATIC int
xchk_iscan_iget(struct xchk_iscan * iscan,struct xfs_perag * pag,struct xfs_buf * agi_bp,xfs_inofree_t allocmask,uint8_t nr_inodes)429 xchk_iscan_iget(
430 struct xchk_iscan *iscan,
431 struct xfs_perag *pag,
432 struct xfs_buf *agi_bp,
433 xfs_inofree_t allocmask,
434 uint8_t nr_inodes)
435 {
436 struct xfs_scrub *sc = iscan->sc;
437 struct xfs_mount *mp = sc->mp;
438 xfs_ino_t ino = iscan->cursor_ino;
439 unsigned int idx = 0;
440 unsigned int i;
441 int error;
442
443 ASSERT(iscan->__inodes[0] == NULL);
444
445 /* Fill the first slot in the inode array. */
446 error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
447 &iscan->__inodes[idx]);
448
449 trace_xchk_iscan_iget(iscan, error);
450
451 if (error == -ENOENT || error == -EAGAIN) {
452 xfs_trans_brelse(sc->tp, agi_bp);
453 xfs_perag_put(pag);
454
455 /*
456 * It's possible that this inode has lost all of its links but
457 * hasn't yet been inactivated. If we don't have a transaction
458 * or it's not writable, flush the inodegc workers and wait.
459 * If we have a non-empty transaction, we must not block on
460 * inodegc, which allocates its own transactions.
461 */
462 if (sc->tp && !(sc->tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
463 xfs_inodegc_push(mp);
464 else
465 xfs_inodegc_flush(mp);
466 return xchk_iscan_iget_retry(iscan, true);
467 }
468
469 if (error == -EINVAL) {
470 xfs_trans_brelse(sc->tp, agi_bp);
471 xfs_perag_put(pag);
472
473 /*
474 * We thought the inode was allocated, but the inode btree
475 * lookup failed, which means that it was freed since the last
476 * time we advanced the cursor. Back up and try again. This
477 * should never happen since still hold the AGI buffer from the
478 * inobt check, but we need to be careful about infinite loops.
479 */
480 return xchk_iscan_iget_retry(iscan, false);
481 }
482
483 if (error) {
484 xfs_trans_brelse(sc->tp, agi_bp);
485 xfs_perag_put(pag);
486 return error;
487 }
488 idx++;
489 ino++;
490 allocmask >>= 1;
491
492 /*
493 * Now that we've filled the first slot in __inodes, try to fill the
494 * rest of the batch with consecutively ordered inodes. to reduce the
495 * number of _iter calls. Make a bitmap of unallocated inodes from the
496 * zeroes in the inuse bitmap; these inodes will not be scanned, but
497 * the _want_live_update predicate will pass through all live updates.
498 *
499 * If we can't iget an allocated inode, stop and return what we have.
500 */
501 mutex_lock(&iscan->lock);
502 iscan->__batch_ino = ino - 1;
503 iscan->__skipped_inomask = 0;
504 mutex_unlock(&iscan->lock);
505
506 for (i = 1; i < nr_inodes; i++, ino++, allocmask >>= 1) {
507 if (!(allocmask & 1)) {
508 ASSERT(!(iscan->__skipped_inomask & (1ULL << i)));
509
510 mutex_lock(&iscan->lock);
511 iscan->cursor_ino = ino;
512 iscan->__skipped_inomask |= (1ULL << i);
513 mutex_unlock(&iscan->lock);
514 continue;
515 }
516
517 ASSERT(iscan->__inodes[idx] == NULL);
518
519 error = xfs_iget(sc->mp, sc->tp, ino, ISCAN_IGET_FLAGS, 0,
520 &iscan->__inodes[idx]);
521 if (error)
522 break;
523
524 mutex_lock(&iscan->lock);
525 iscan->cursor_ino = ino;
526 mutex_unlock(&iscan->lock);
527 idx++;
528 }
529
530 trace_xchk_iscan_iget_batch(sc->mp, iscan, nr_inodes, idx);
531 xfs_trans_brelse(sc->tp, agi_bp);
532 xfs_perag_put(pag);
533 return idx;
534 }
535
536 /*
537 * Advance the visit cursor to reflect skipped inodes beyond whatever we
538 * scanned.
539 */
540 STATIC void
xchk_iscan_finish_batch(struct xchk_iscan * iscan)541 xchk_iscan_finish_batch(
542 struct xchk_iscan *iscan)
543 {
544 xfs_ino_t highest_skipped;
545
546 mutex_lock(&iscan->lock);
547
548 if (iscan->__batch_ino != NULLFSINO) {
549 highest_skipped = iscan->__batch_ino +
550 xfs_highbit64(iscan->__skipped_inomask);
551 iscan->__visited_ino = max(iscan->__visited_ino,
552 highest_skipped);
553
554 trace_xchk_iscan_skip(iscan);
555 }
556
557 iscan->__batch_ino = NULLFSINO;
558 iscan->__skipped_inomask = 0;
559
560 mutex_unlock(&iscan->lock);
561 }
562
563 /*
564 * Advance the inode scan cursor to the next allocated inode and return up to
565 * 64 consecutive allocated inodes starting with the cursor position.
566 */
567 STATIC int
xchk_iscan_iter_batch(struct xchk_iscan * iscan)568 xchk_iscan_iter_batch(
569 struct xchk_iscan *iscan)
570 {
571 struct xfs_scrub *sc = iscan->sc;
572 int ret;
573
574 xchk_iscan_finish_batch(iscan);
575
576 if (iscan->iget_timeout)
577 iscan->__iget_deadline = jiffies +
578 msecs_to_jiffies(iscan->iget_timeout);
579
580 do {
581 struct xfs_buf *agi_bp = NULL;
582 struct xfs_perag *pag = NULL;
583 xfs_inofree_t allocmask = 0;
584 uint8_t nr_inodes = 0;
585
586 ret = xchk_iscan_advance(iscan, &pag, &agi_bp, &allocmask,
587 &nr_inodes);
588 if (ret != 1)
589 return ret;
590
591 if (xchk_iscan_aborted(iscan)) {
592 xfs_trans_brelse(sc->tp, agi_bp);
593 xfs_perag_put(pag);
594 ret = -ECANCELED;
595 break;
596 }
597
598 ret = xchk_iscan_iget(iscan, pag, agi_bp, allocmask, nr_inodes);
599 } while (ret == -EAGAIN);
600
601 return ret;
602 }
603
604 /*
605 * Advance the inode scan cursor to the next allocated inode and return the
606 * incore inode structure associated with it.
607 *
608 * Returns 1 if there's a new inode to examine, 0 if we've run out of inodes,
609 * -ECANCELED if the live scan aborted, -EBUSY if the incore inode could not be
610 * grabbed, or the usual negative errno.
611 *
612 * If the function returns -EBUSY and the caller can handle skipping an inode,
613 * it may call this function again to continue the scan with the next allocated
614 * inode.
615 */
616 int
xchk_iscan_iter(struct xchk_iscan * iscan,struct xfs_inode ** ipp)617 xchk_iscan_iter(
618 struct xchk_iscan *iscan,
619 struct xfs_inode **ipp)
620 {
621 unsigned int i;
622 int error;
623
624 /* Find a cached inode, or go get another batch. */
625 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
626 if (iscan->__inodes[i])
627 goto foundit;
628 }
629
630 error = xchk_iscan_iter_batch(iscan);
631 if (error <= 0)
632 return error;
633
634 ASSERT(iscan->__inodes[0] != NULL);
635 i = 0;
636
637 foundit:
638 /* Give the caller our reference. */
639 *ipp = iscan->__inodes[i];
640 iscan->__inodes[i] = NULL;
641 return 1;
642 }
643
644 /* Clean up an xfs_iscan_iter call by dropping any inodes that we still hold. */
645 void
xchk_iscan_iter_finish(struct xchk_iscan * iscan)646 xchk_iscan_iter_finish(
647 struct xchk_iscan *iscan)
648 {
649 struct xfs_scrub *sc = iscan->sc;
650 unsigned int i;
651
652 for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
653 if (iscan->__inodes[i]) {
654 xchk_irele(sc, iscan->__inodes[i]);
655 iscan->__inodes[i] = NULL;
656 }
657 }
658 }
659
660 /* Mark this inode scan finished and release resources. */
661 void
xchk_iscan_teardown(struct xchk_iscan * iscan)662 xchk_iscan_teardown(
663 struct xchk_iscan *iscan)
664 {
665 xchk_iscan_iter_finish(iscan);
666 xchk_iscan_finish(iscan);
667 mutex_destroy(&iscan->lock);
668 }
669
670 /* Pick an AG from which to start a scan. */
671 static inline xfs_ino_t
xchk_iscan_rotor(struct xfs_mount * mp)672 xchk_iscan_rotor(
673 struct xfs_mount *mp)
674 {
675 static atomic_t agi_rotor;
676 unsigned int r = atomic_inc_return(&agi_rotor) - 1;
677
678 /*
679 * Rotoring *backwards* through the AGs, so we add one here before
680 * subtracting from the agcount to arrive at an AG number.
681 */
682 r = (r % mp->m_sb.sb_agcount) + 1;
683
684 return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
685 }
686
687 /*
688 * Set ourselves up to start an inode scan. If the @iget_timeout and
689 * @iget_retry_delay parameters are set, the scan will try to iget each inode
690 * for @iget_timeout milliseconds. If an iget call indicates that the inode is
691 * waiting to be inactivated, the CPU will relax for @iget_retry_delay
692 * milliseconds after pushing the inactivation workers.
693 */
694 void
xchk_iscan_start(struct xfs_scrub * sc,unsigned int iget_timeout,unsigned int iget_retry_delay,struct xchk_iscan * iscan)695 xchk_iscan_start(
696 struct xfs_scrub *sc,
697 unsigned int iget_timeout,
698 unsigned int iget_retry_delay,
699 struct xchk_iscan *iscan)
700 {
701 xfs_ino_t start_ino;
702
703 start_ino = xchk_iscan_rotor(sc->mp);
704
705 iscan->__batch_ino = NULLFSINO;
706 iscan->__skipped_inomask = 0;
707
708 iscan->sc = sc;
709 clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
710 iscan->iget_timeout = iget_timeout;
711 iscan->iget_retry_delay = iget_retry_delay;
712 iscan->__visited_ino = start_ino;
713 iscan->cursor_ino = start_ino;
714 iscan->scan_start_ino = start_ino;
715 mutex_init(&iscan->lock);
716 memset(iscan->__inodes, 0, sizeof(iscan->__inodes));
717
718 trace_xchk_iscan_start(iscan, start_ino);
719 }
720
721 /*
722 * Mark this inode as having been visited. Callers must hold a sufficiently
723 * exclusive lock on the inode to prevent concurrent modifications.
724 */
725 void
xchk_iscan_mark_visited(struct xchk_iscan * iscan,struct xfs_inode * ip)726 xchk_iscan_mark_visited(
727 struct xchk_iscan *iscan,
728 struct xfs_inode *ip)
729 {
730 mutex_lock(&iscan->lock);
731 iscan->__visited_ino = ip->i_ino;
732 trace_xchk_iscan_visit(iscan);
733 mutex_unlock(&iscan->lock);
734 }
735
736 /*
737 * Did we skip this inode because it wasn't allocated when we loaded the batch?
738 * If so, it is newly allocated and will not be scanned. All live updates to
739 * this inode must be passed to the caller to maintain scan correctness.
740 */
741 static inline bool
xchk_iscan_skipped(const struct xchk_iscan * iscan,xfs_ino_t ino)742 xchk_iscan_skipped(
743 const struct xchk_iscan *iscan,
744 xfs_ino_t ino)
745 {
746 if (iscan->__batch_ino == NULLFSINO)
747 return false;
748 if (ino < iscan->__batch_ino)
749 return false;
750 if (ino >= iscan->__batch_ino + XFS_INODES_PER_CHUNK)
751 return false;
752
753 return iscan->__skipped_inomask & (1ULL << (ino - iscan->__batch_ino));
754 }
755
756 /*
757 * Do we need a live update for this inode? This is true if the scanner thread
758 * has visited this inode and the scan hasn't been aborted due to errors.
759 * Callers must hold a sufficiently exclusive lock on the inode to prevent
760 * scanners from reading any inode metadata.
761 */
762 bool
xchk_iscan_want_live_update(struct xchk_iscan * iscan,xfs_ino_t ino)763 xchk_iscan_want_live_update(
764 struct xchk_iscan *iscan,
765 xfs_ino_t ino)
766 {
767 bool ret = false;
768
769 if (xchk_iscan_aborted(iscan))
770 return false;
771
772 mutex_lock(&iscan->lock);
773
774 trace_xchk_iscan_want_live_update(iscan, ino);
775
776 /* Scan is finished, caller should receive all updates. */
777 if (iscan->__visited_ino == NULLFSINO) {
778 ret = true;
779 goto unlock;
780 }
781
782 /*
783 * No inodes have been visited yet, so the visited cursor points at the
784 * start of the scan range. The caller should not receive any updates.
785 */
786 if (iscan->scan_start_ino == iscan->__visited_ino) {
787 ret = false;
788 goto unlock;
789 }
790
791 /*
792 * This inode was not allocated at the time of the iscan batch.
793 * The caller should receive all updates.
794 */
795 if (xchk_iscan_skipped(iscan, ino)) {
796 ret = true;
797 goto unlock;
798 }
799
800 /*
801 * The visited cursor hasn't yet wrapped around the end of the FS. If
802 * @ino is inside the starred range, the caller should receive updates:
803 *
804 * 0 ------------ S ************ V ------------ EOFS
805 */
806 if (iscan->scan_start_ino <= iscan->__visited_ino) {
807 if (ino >= iscan->scan_start_ino &&
808 ino <= iscan->__visited_ino)
809 ret = true;
810
811 goto unlock;
812 }
813
814 /*
815 * The visited cursor wrapped around the end of the FS. If @ino is
816 * inside the starred range, the caller should receive updates:
817 *
818 * 0 ************ V ------------ S ************ EOFS
819 */
820 if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
821 ret = true;
822
823 unlock:
824 mutex_unlock(&iscan->lock);
825 return ret;
826 }
827