1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_trans.h"
14 #include "xfs_btree.h"
15 #include "xfs_error.h"
16 #include "xfs_buf_mem.h"
17 #include "xfs_btree_mem.h"
18 #include "xfs_ag.h"
19 #include "xfs_buf_item.h"
20 #include "xfs_trace.h"
21 
22 /* Set the root of an in-memory btree. */
23 void
xfbtree_set_root(struct xfs_btree_cur * cur,const union xfs_btree_ptr * ptr,int inc)24 xfbtree_set_root(
25 	struct xfs_btree_cur		*cur,
26 	const union xfs_btree_ptr	*ptr,
27 	int				inc)
28 {
29 	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
30 
31 	cur->bc_mem.xfbtree->root = *ptr;
32 	cur->bc_mem.xfbtree->nlevels += inc;
33 }
34 
35 /* Initialize a pointer from the in-memory btree header. */
36 void
xfbtree_init_ptr_from_cur(struct xfs_btree_cur * cur,union xfs_btree_ptr * ptr)37 xfbtree_init_ptr_from_cur(
38 	struct xfs_btree_cur		*cur,
39 	union xfs_btree_ptr		*ptr)
40 {
41 	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
42 
43 	*ptr = cur->bc_mem.xfbtree->root;
44 }
45 
46 /* Duplicate an in-memory btree cursor. */
47 struct xfs_btree_cur *
xfbtree_dup_cursor(struct xfs_btree_cur * cur)48 xfbtree_dup_cursor(
49 	struct xfs_btree_cur		*cur)
50 {
51 	struct xfs_btree_cur		*ncur;
52 
53 	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
54 
55 	ncur = xfs_btree_alloc_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ops,
56 			cur->bc_maxlevels, cur->bc_cache);
57 	ncur->bc_flags = cur->bc_flags;
58 	ncur->bc_nlevels = cur->bc_nlevels;
59 	ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree;
60 
61 	if (cur->bc_mem.pag)
62 		ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag);
63 
64 	return ncur;
65 }
66 
67 /* Close the btree xfile and release all resources. */
68 void
xfbtree_destroy(struct xfbtree * xfbt)69 xfbtree_destroy(
70 	struct xfbtree		*xfbt)
71 {
72 	xfs_buftarg_drain(xfbt->target);
73 }
74 
75 /* Compute the number of bytes available for records. */
76 static inline unsigned int
xfbtree_rec_bytes(struct xfs_mount * mp,const struct xfs_btree_ops * ops)77 xfbtree_rec_bytes(
78 	struct xfs_mount		*mp,
79 	const struct xfs_btree_ops	*ops)
80 {
81 	return XMBUF_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
82 }
83 
84 /* Initialize an empty leaf block as the btree root. */
85 STATIC int
xfbtree_init_leaf_block(struct xfs_mount * mp,struct xfbtree * xfbt,const struct xfs_btree_ops * ops)86 xfbtree_init_leaf_block(
87 	struct xfs_mount		*mp,
88 	struct xfbtree			*xfbt,
89 	const struct xfs_btree_ops	*ops)
90 {
91 	struct xfs_buf			*bp;
92 	xfbno_t				bno = xfbt->highest_bno++;
93 	int				error;
94 
95 	error = xfs_buf_get(xfbt->target, xfbno_to_daddr(bno), XFBNO_BBSIZE,
96 			&bp);
97 	if (error)
98 		return error;
99 
100 	trace_xfbtree_create_root_buf(xfbt, bp);
101 
102 	bp->b_ops = ops->buf_ops;
103 	xfs_btree_init_buf(mp, bp, ops, 0, 0, xfbt->owner);
104 	xfs_buf_relse(bp);
105 
106 	xfbt->root.l = cpu_to_be64(bno);
107 	return 0;
108 }
109 
110 /*
111  * Create an in-memory btree root that can be used with the given xmbuf.
112  * Callers must set xfbt->owner.
113  */
114 int
xfbtree_init(struct xfs_mount * mp,struct xfbtree * xfbt,struct xfs_buftarg * btp,const struct xfs_btree_ops * ops)115 xfbtree_init(
116 	struct xfs_mount		*mp,
117 	struct xfbtree			*xfbt,
118 	struct xfs_buftarg		*btp,
119 	const struct xfs_btree_ops	*ops)
120 {
121 	unsigned int			blocklen = xfbtree_rec_bytes(mp, ops);
122 	unsigned int			keyptr_len;
123 	int				error;
124 
125 	/* Requires a long-format CRC-format btree */
126 	if (!xfs_has_crc(mp)) {
127 		ASSERT(xfs_has_crc(mp));
128 		return -EINVAL;
129 	}
130 	if (ops->ptr_len != XFS_BTREE_LONG_PTR_LEN) {
131 		ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN);
132 		return -EINVAL;
133 	}
134 
135 	memset(xfbt, 0, sizeof(*xfbt));
136 	xfbt->target = btp;
137 
138 	/* Set up min/maxrecs for this btree. */
139 	keyptr_len = ops->key_len + sizeof(__be64);
140 	xfbt->maxrecs[0] = blocklen / ops->rec_len;
141 	xfbt->maxrecs[1] = blocklen / keyptr_len;
142 	xfbt->minrecs[0] = xfbt->maxrecs[0] / 2;
143 	xfbt->minrecs[1] = xfbt->maxrecs[1] / 2;
144 	xfbt->highest_bno = 0;
145 	xfbt->nlevels = 1;
146 
147 	/* Initialize the empty btree. */
148 	error = xfbtree_init_leaf_block(mp, xfbt, ops);
149 	if (error)
150 		goto err_freesp;
151 
152 	trace_xfbtree_init(mp, xfbt, ops);
153 
154 	return 0;
155 
156 err_freesp:
157 	xfs_buftarg_drain(xfbt->target);
158 	return error;
159 }
160 
161 /* Allocate a block to our in-memory btree. */
162 int
xfbtree_alloc_block(struct xfs_btree_cur * cur,const union xfs_btree_ptr * start,union xfs_btree_ptr * new,int * stat)163 xfbtree_alloc_block(
164 	struct xfs_btree_cur		*cur,
165 	const union xfs_btree_ptr	*start,
166 	union xfs_btree_ptr		*new,
167 	int				*stat)
168 {
169 	struct xfbtree			*xfbt = cur->bc_mem.xfbtree;
170 	xfbno_t				bno = xfbt->highest_bno++;
171 
172 	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
173 
174 	trace_xfbtree_alloc_block(xfbt, cur, bno);
175 
176 	/* Fail if the block address exceeds the maximum for the buftarg. */
177 	if (!xfbtree_verify_bno(xfbt, bno)) {
178 		ASSERT(xfbtree_verify_bno(xfbt, bno));
179 		*stat = 0;
180 		return 0;
181 	}
182 
183 	new->l = cpu_to_be64(bno);
184 	*stat = 1;
185 	return 0;
186 }
187 
188 /* Free a block from our in-memory btree. */
189 int
xfbtree_free_block(struct xfs_btree_cur * cur,struct xfs_buf * bp)190 xfbtree_free_block(
191 	struct xfs_btree_cur	*cur,
192 	struct xfs_buf		*bp)
193 {
194 	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
195 	xfs_daddr_t		daddr = xfs_buf_daddr(bp);
196 	xfbno_t			bno = xfs_daddr_to_xfbno(daddr);
197 
198 	ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
199 
200 	trace_xfbtree_free_block(xfbt, cur, bno);
201 
202 	if (bno + 1 == xfbt->highest_bno)
203 		xfbt->highest_bno--;
204 
205 	return 0;
206 }
207 
208 /* Return the minimum number of records for a btree block. */
209 int
xfbtree_get_minrecs(struct xfs_btree_cur * cur,int level)210 xfbtree_get_minrecs(
211 	struct xfs_btree_cur	*cur,
212 	int			level)
213 {
214 	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
215 
216 	return xfbt->minrecs[level != 0];
217 }
218 
219 /* Return the maximum number of records for a btree block. */
220 int
xfbtree_get_maxrecs(struct xfs_btree_cur * cur,int level)221 xfbtree_get_maxrecs(
222 	struct xfs_btree_cur	*cur,
223 	int			level)
224 {
225 	struct xfbtree		*xfbt = cur->bc_mem.xfbtree;
226 
227 	return xfbt->maxrecs[level != 0];
228 }
229 
230 /* If this log item is a buffer item that came from the xfbtree, return it. */
231 static inline struct xfs_buf *
xfbtree_buf_match(struct xfbtree * xfbt,const struct xfs_log_item * lip)232 xfbtree_buf_match(
233 	struct xfbtree			*xfbt,
234 	const struct xfs_log_item	*lip)
235 {
236 	const struct xfs_buf_log_item	*bli;
237 	struct xfs_buf			*bp;
238 
239 	if (lip->li_type != XFS_LI_BUF)
240 		return NULL;
241 
242 	bli = container_of(lip, struct xfs_buf_log_item, bli_item);
243 	bp = bli->bli_buf;
244 	if (bp->b_target != xfbt->target)
245 		return NULL;
246 
247 	return bp;
248 }
249 
250 /*
251  * Commit changes to the incore btree immediately by writing all dirty xfbtree
252  * buffers to the backing xfile.  This detaches all xfbtree buffers from the
253  * transaction, even on failure.  The buffer locks are dropped between the
254  * delwri queue and submit, so the caller must synchronize btree access.
255  *
256  * Normally we'd let the buffers commit with the transaction and get written to
257  * the xfile via the log, but online repair stages ephemeral btrees in memory
258  * and uses the btree_staging functions to write new btrees to disk atomically.
259  * The in-memory btree (and its backing store) are discarded at the end of the
260  * repair phase, which means that xfbtree buffers cannot commit with the rest
261  * of a transaction.
262  *
263  * In other words, online repair only needs the transaction to collect buffer
264  * pointers and to avoid buffer deadlocks, not to guarantee consistency of
265  * updates.
266  */
267 int
xfbtree_trans_commit(struct xfbtree * xfbt,struct xfs_trans * tp)268 xfbtree_trans_commit(
269 	struct xfbtree		*xfbt,
270 	struct xfs_trans	*tp)
271 {
272 	struct xfs_log_item	*lip, *n;
273 	bool			tp_dirty = false;
274 	int			error = 0;
275 
276 	/*
277 	 * For each xfbtree buffer attached to the transaction, write the dirty
278 	 * buffers to the xfile and release them.
279 	 */
280 	list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
281 		struct xfs_buf	*bp = xfbtree_buf_match(xfbt, lip);
282 
283 		if (!bp) {
284 			if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
285 				tp_dirty |= true;
286 			continue;
287 		}
288 
289 		trace_xfbtree_trans_commit_buf(xfbt, bp);
290 
291 		xmbuf_trans_bdetach(tp, bp);
292 
293 		/*
294 		 * If the buffer fails verification, note the failure but
295 		 * continue walking the transaction items so that we remove all
296 		 * ephemeral btree buffers.
297 		 */
298 		if (!error)
299 			error = xmbuf_finalize(bp);
300 
301 		xfs_buf_relse(bp);
302 	}
303 
304 	/*
305 	 * Reset the transaction's dirty flag to reflect the dirty state of the
306 	 * log items that are still attached.
307 	 */
308 	tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
309 			(tp_dirty ? XFS_TRANS_DIRTY : 0);
310 
311 	return error;
312 }
313 
314 /*
315  * Cancel changes to the incore btree by detaching all the xfbtree buffers.
316  * Changes are not undone, so callers must not access the btree ever again.
317  */
318 void
xfbtree_trans_cancel(struct xfbtree * xfbt,struct xfs_trans * tp)319 xfbtree_trans_cancel(
320 	struct xfbtree		*xfbt,
321 	struct xfs_trans	*tp)
322 {
323 	struct xfs_log_item	*lip, *n;
324 	bool			tp_dirty = false;
325 
326 	list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
327 		struct xfs_buf	*bp = xfbtree_buf_match(xfbt, lip);
328 
329 		if (!bp) {
330 			if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
331 				tp_dirty |= true;
332 			continue;
333 		}
334 
335 		trace_xfbtree_trans_cancel_buf(xfbt, bp);
336 
337 		xmbuf_trans_bdetach(tp, bp);
338 		xfs_buf_relse(bp);
339 	}
340 
341 	/*
342 	 * Reset the transaction's dirty flag to reflect the dirty state of the
343 	 * log items that are still attached.
344 	 */
345 	tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
346 			(tp_dirty ? XFS_TRANS_DIRTY : 0);
347 }
348