1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #ifndef __XFS_SCRUB_SCRUB_H__
7 #define __XFS_SCRUB_SCRUB_H__
8 
9 struct xfs_scrub;
10 
11 struct xchk_relax {
12 	unsigned long	next_resched;
13 	unsigned int	resched_nr;
14 	bool		interruptible;
15 };
16 
17 /* Yield to the scheduler at most 10x per second. */
18 #define XCHK_RELAX_NEXT		(jiffies + (HZ / 10))
19 
20 #define INIT_XCHK_RELAX	\
21 	(struct xchk_relax){ \
22 		.next_resched	= XCHK_RELAX_NEXT, \
23 		.resched_nr	= 0, \
24 		.interruptible	= true, \
25 	}
26 
27 /*
28  * Relax during a scrub operation and exit if there's a fatal signal pending.
29  *
30  * If preemption is disabled, we need to yield to the scheduler every now and
31  * then so that we don't run afoul of the soft lockup watchdog or RCU stall
32  * detector.  cond_resched calls are somewhat expensive (~5ns) so we want to
33  * ratelimit this to 10x per second.  Amortize the cost of the other checks by
34  * only doing it once every 100 calls.
35  */
xchk_maybe_relax(struct xchk_relax * widget)36 static inline int xchk_maybe_relax(struct xchk_relax *widget)
37 {
38 	/* Amortize the cost of scheduling and checking signals. */
39 	if (likely(++widget->resched_nr < 100))
40 		return 0;
41 	widget->resched_nr = 0;
42 
43 	if (unlikely(widget->next_resched <= jiffies)) {
44 		cond_resched();
45 		widget->next_resched = XCHK_RELAX_NEXT;
46 	}
47 
48 	if (widget->interruptible && fatal_signal_pending(current))
49 		return -EINTR;
50 
51 	return 0;
52 }
53 
54 /*
55  * Standard flags for allocating memory within scrub.  NOFS context is
56  * configured by the process allocation scope.  Scrub and repair must be able
57  * to back out gracefully if there isn't enough memory.  Force-cast to avoid
58  * complaints from static checkers.
59  */
60 #define XCHK_GFP_FLAGS	((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
61 					 __GFP_RETRY_MAYFAIL))
62 
63 /*
64  * For opening files by handle for fsck operations, we don't trust the inumber
65  * or the allocation state; therefore, perform an untrusted lookup.  We don't
66  * want these inodes to pollute the cache, so mark them for immediate removal.
67  */
68 #define XCHK_IGET_FLAGS	(XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE)
69 
70 /* Type info and names for the scrub types. */
71 enum xchk_type {
72 	ST_NONE = 1,	/* disabled */
73 	ST_PERAG,	/* per-AG metadata */
74 	ST_FS,		/* per-FS metadata */
75 	ST_INODE,	/* per-inode metadata */
76 };
77 
78 struct xchk_meta_ops {
79 	/* Acquire whatever resources are needed for the operation. */
80 	int		(*setup)(struct xfs_scrub *sc);
81 
82 	/* Examine metadata for errors. */
83 	int		(*scrub)(struct xfs_scrub *);
84 
85 	/* Repair or optimize the metadata. */
86 	int		(*repair)(struct xfs_scrub *);
87 
88 	/*
89 	 * Re-scrub the metadata we repaired, in case there's extra work that
90 	 * we need to do to check our repair work.  If this is NULL, we'll use
91 	 * the ->scrub function pointer, assuming that the regular scrub is
92 	 * sufficient.
93 	 */
94 	int		(*repair_eval)(struct xfs_scrub *sc);
95 
96 	/* Decide if we even have this piece of metadata. */
97 	bool		(*has)(struct xfs_mount *);
98 
99 	/* type describing required/allowed inputs */
100 	enum xchk_type	type;
101 };
102 
103 /* Buffer pointers and btree cursors for an entire AG. */
104 struct xchk_ag {
105 	struct xfs_perag	*pag;
106 
107 	/* AG btree roots */
108 	struct xfs_buf		*agf_bp;
109 	struct xfs_buf		*agi_bp;
110 
111 	/* AG btrees */
112 	struct xfs_btree_cur	*bno_cur;
113 	struct xfs_btree_cur	*cnt_cur;
114 	struct xfs_btree_cur	*ino_cur;
115 	struct xfs_btree_cur	*fino_cur;
116 	struct xfs_btree_cur	*rmap_cur;
117 	struct xfs_btree_cur	*refc_cur;
118 };
119 
120 struct xfs_scrub {
121 	/* General scrub state. */
122 	struct xfs_mount		*mp;
123 	struct xfs_scrub_metadata	*sm;
124 	const struct xchk_meta_ops	*ops;
125 	struct xfs_trans		*tp;
126 
127 	/* File that scrub was called with. */
128 	struct file			*file;
129 
130 	/*
131 	 * File that is undergoing the scrub operation.  This can differ from
132 	 * the file that scrub was called with if we're checking file-based fs
133 	 * metadata (e.g. rt bitmaps) or if we're doing a scrub-by-handle for
134 	 * something that can't be opened directly (e.g. symlinks).
135 	 */
136 	struct xfs_inode		*ip;
137 
138 	/* Kernel memory buffer used by scrubbers; freed at teardown. */
139 	void				*buf;
140 
141 	/*
142 	 * Clean up resources owned by whatever is in the buffer.  Cleanup can
143 	 * be deferred with this hook as a means for scrub functions to pass
144 	 * data to repair functions.  This function must not free the buffer
145 	 * itself.
146 	 */
147 	void				(*buf_cleanup)(void *buf);
148 
149 	/* xfile used by the scrubbers; freed at teardown. */
150 	struct xfile			*xfile;
151 
152 	/* buffer target for in-memory btrees; also freed at teardown. */
153 	struct xfs_buftarg		*xmbtp;
154 
155 	/* Lock flags for @ip. */
156 	uint				ilock_flags;
157 
158 	/* The orphanage, for stashing files that have lost their parent. */
159 	uint				orphanage_ilock_flags;
160 	struct xfs_inode		*orphanage;
161 
162 	/* A temporary file on this filesystem, for staging new metadata. */
163 	struct xfs_inode		*tempip;
164 	uint				temp_ilock_flags;
165 
166 	/* See the XCHK/XREP state flags below. */
167 	unsigned int			flags;
168 
169 	/*
170 	 * The XFS_SICK_* flags that correspond to the metadata being scrubbed
171 	 * or repaired.  We will use this mask to update the in-core fs health
172 	 * status with whatever we find.
173 	 */
174 	unsigned int			sick_mask;
175 
176 	/* next time we want to cond_resched() */
177 	struct xchk_relax		relax;
178 
179 	/* State tracking for single-AG operations. */
180 	struct xchk_ag			sa;
181 };
182 
183 /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
184 #define XCHK_TRY_HARDER		(1U << 0)  /* can't get resources, try again */
185 #define XCHK_HAVE_FREEZE_PROT	(1U << 1)  /* do we have freeze protection? */
186 #define XCHK_FSGATES_DRAIN	(1U << 2)  /* defer ops draining enabled */
187 #define XCHK_NEED_DRAIN		(1U << 3)  /* scrub needs to drain defer ops */
188 #define XCHK_FSGATES_QUOTA	(1U << 4)  /* quota live update enabled */
189 #define XCHK_FSGATES_DIRENTS	(1U << 5)  /* directory live update enabled */
190 #define XCHK_FSGATES_RMAP	(1U << 6)  /* rmapbt live update enabled */
191 #define XREP_RESET_PERAG_RESV	(1U << 30) /* must reset AG space reservation */
192 #define XREP_ALREADY_FIXED	(1U << 31) /* checking our repair work */
193 
194 /*
195  * The XCHK_FSGATES* flags reflect functionality in the main filesystem that
196  * are only enabled for this particular online fsck.  When not in use, the
197  * features are gated off via dynamic code patching, which is why the state
198  * must be enabled during scrub setup and can only be torn down afterwards.
199  */
200 #define XCHK_FSGATES_ALL	(XCHK_FSGATES_DRAIN | \
201 				 XCHK_FSGATES_QUOTA | \
202 				 XCHK_FSGATES_DIRENTS | \
203 				 XCHK_FSGATES_RMAP)
204 
205 struct xfs_scrub_subord {
206 	struct xfs_scrub	sc;
207 	struct xfs_scrub	*parent_sc;
208 	unsigned int		old_smtype;
209 	unsigned int		old_smflags;
210 };
211 
212 struct xfs_scrub_subord *xchk_scrub_create_subord(struct xfs_scrub *sc,
213 		unsigned int subtype);
214 void xchk_scrub_free_subord(struct xfs_scrub_subord *sub);
215 
216 /*
217  * We /could/ terminate a scrub/repair operation early.  If we're not
218  * in a good place to continue (fatal signal, etc.) then bail out.
219  * Note that we're careful not to make any judgements about *error.
220  */
221 static inline bool
xchk_should_terminate(struct xfs_scrub * sc,int * error)222 xchk_should_terminate(
223 	struct xfs_scrub	*sc,
224 	int			*error)
225 {
226 	if (xchk_maybe_relax(&sc->relax)) {
227 		if (*error == 0)
228 			*error = -EINTR;
229 		return true;
230 	}
231 	return false;
232 }
233 
xchk_nothing(struct xfs_scrub * sc)234 static inline int xchk_nothing(struct xfs_scrub *sc)
235 {
236 	return -ENOENT;
237 }
238 
239 /* Metadata scrubbers */
240 int xchk_tester(struct xfs_scrub *sc);
241 int xchk_superblock(struct xfs_scrub *sc);
242 int xchk_agf(struct xfs_scrub *sc);
243 int xchk_agfl(struct xfs_scrub *sc);
244 int xchk_agi(struct xfs_scrub *sc);
245 int xchk_allocbt(struct xfs_scrub *sc);
246 int xchk_iallocbt(struct xfs_scrub *sc);
247 int xchk_rmapbt(struct xfs_scrub *sc);
248 int xchk_refcountbt(struct xfs_scrub *sc);
249 int xchk_inode(struct xfs_scrub *sc);
250 int xchk_bmap_data(struct xfs_scrub *sc);
251 int xchk_bmap_attr(struct xfs_scrub *sc);
252 int xchk_bmap_cow(struct xfs_scrub *sc);
253 int xchk_directory(struct xfs_scrub *sc);
254 int xchk_xattr(struct xfs_scrub *sc);
255 int xchk_symlink(struct xfs_scrub *sc);
256 int xchk_parent(struct xfs_scrub *sc);
257 int xchk_dirtree(struct xfs_scrub *sc);
258 #ifdef CONFIG_XFS_RT
259 int xchk_rtbitmap(struct xfs_scrub *sc);
260 int xchk_rtsummary(struct xfs_scrub *sc);
261 #else
262 # define xchk_rtbitmap		xchk_nothing
263 # define xchk_rtsummary		xchk_nothing
264 #endif
265 #ifdef CONFIG_XFS_QUOTA
266 int xchk_quota(struct xfs_scrub *sc);
267 int xchk_quotacheck(struct xfs_scrub *sc);
268 #else
269 # define xchk_quota		xchk_nothing
270 # define xchk_quotacheck	xchk_nothing
271 #endif
272 int xchk_fscounters(struct xfs_scrub *sc);
273 int xchk_nlinks(struct xfs_scrub *sc);
274 
275 /* cross-referencing helpers */
276 void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
277 		xfs_extlen_t len);
278 void xchk_xref_is_not_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
279 		xfs_extlen_t len);
280 void xchk_xref_is_inode_chunk(struct xfs_scrub *sc, xfs_agblock_t agbno,
281 		xfs_extlen_t len);
282 void xchk_xref_is_only_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
283 		xfs_extlen_t len, const struct xfs_owner_info *oinfo);
284 void xchk_xref_is_not_owned_by(struct xfs_scrub *sc, xfs_agblock_t agbno,
285 		xfs_extlen_t len, const struct xfs_owner_info *oinfo);
286 void xchk_xref_has_no_owner(struct xfs_scrub *sc, xfs_agblock_t agbno,
287 		xfs_extlen_t len);
288 void xchk_xref_is_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
289 		xfs_extlen_t len);
290 void xchk_xref_is_not_shared(struct xfs_scrub *sc, xfs_agblock_t bno,
291 		xfs_extlen_t len);
292 void xchk_xref_is_not_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
293 		xfs_extlen_t len);
294 #ifdef CONFIG_XFS_RT
295 void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
296 		xfs_extlen_t len);
297 #else
298 # define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
299 #endif
300 
301 #endif	/* __XFS_SCRUB_SCRUB_H__ */
302