1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
4  * Author: Darrick J. Wong <djwong@kernel.org>
5  */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_mount.h"
13 #include "xfs_defer.h"
14 #include "xfs_inode.h"
15 #include "xfs_trans.h"
16 #include "xfs_bmap.h"
17 #include "xfs_icache.h"
18 #include "xfs_quota.h"
19 #include "xfs_exchmaps.h"
20 #include "xfs_trace.h"
21 #include "xfs_bmap_btree.h"
22 #include "xfs_trans_space.h"
23 #include "xfs_error.h"
24 #include "xfs_errortag.h"
25 #include "xfs_health.h"
26 #include "xfs_exchmaps_item.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_attr_leaf.h"
30 #include "xfs_attr.h"
31 #include "xfs_dir2_priv.h"
32 #include "xfs_dir2.h"
33 #include "xfs_symlink_remote.h"
34 
35 struct kmem_cache	*xfs_exchmaps_intent_cache;
36 
37 /* bmbt mappings adjacent to a pair of records. */
38 struct xfs_exchmaps_adjacent {
39 	struct xfs_bmbt_irec		left1;
40 	struct xfs_bmbt_irec		right1;
41 	struct xfs_bmbt_irec		left2;
42 	struct xfs_bmbt_irec		right2;
43 };
44 
45 #define ADJACENT_INIT { \
46 	.left1  = { .br_startblock = HOLESTARTBLOCK }, \
47 	.right1 = { .br_startblock = HOLESTARTBLOCK }, \
48 	.left2  = { .br_startblock = HOLESTARTBLOCK }, \
49 	.right2 = { .br_startblock = HOLESTARTBLOCK }, \
50 }
51 
52 /* Information to reset reflink flag / CoW fork state after an exchange. */
53 
54 /*
55  * If the reflink flag is set on either inode, make sure it has an incore CoW
56  * fork, since all reflink inodes must have them.  If there's a CoW fork and it
57  * has mappings in it, make sure the inodes are tagged appropriately so that
58  * speculative preallocations can be GC'd if we run low of space.
59  */
60 static inline void
xfs_exchmaps_ensure_cowfork(struct xfs_inode * ip)61 xfs_exchmaps_ensure_cowfork(
62 	struct xfs_inode	*ip)
63 {
64 	struct xfs_ifork	*cfork;
65 
66 	if (xfs_is_reflink_inode(ip))
67 		xfs_ifork_init_cow(ip);
68 
69 	cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
70 	if (!cfork)
71 		return;
72 	if (cfork->if_bytes > 0)
73 		xfs_inode_set_cowblocks_tag(ip);
74 	else
75 		xfs_inode_clear_cowblocks_tag(ip);
76 }
77 
78 /*
79  * Adjust the on-disk inode size upwards if needed so that we never add
80  * mappings into the file past EOF.  This is crucial so that log recovery won't
81  * get confused by the sudden appearance of post-eof mappings.
82  */
83 STATIC void
xfs_exchmaps_update_size(struct xfs_trans * tp,struct xfs_inode * ip,struct xfs_bmbt_irec * imap,xfs_fsize_t new_isize)84 xfs_exchmaps_update_size(
85 	struct xfs_trans	*tp,
86 	struct xfs_inode	*ip,
87 	struct xfs_bmbt_irec	*imap,
88 	xfs_fsize_t		new_isize)
89 {
90 	struct xfs_mount	*mp = tp->t_mountp;
91 	xfs_fsize_t		len;
92 
93 	if (new_isize < 0)
94 		return;
95 
96 	len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
97 		  new_isize);
98 
99 	if (len <= ip->i_disk_size)
100 		return;
101 
102 	trace_xfs_exchmaps_update_inode_size(ip, len);
103 
104 	ip->i_disk_size = len;
105 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
106 }
107 
108 /* Advance the incore state tracking after exchanging a mapping. */
109 static inline void
xmi_advance(struct xfs_exchmaps_intent * xmi,const struct xfs_bmbt_irec * irec)110 xmi_advance(
111 	struct xfs_exchmaps_intent	*xmi,
112 	const struct xfs_bmbt_irec	*irec)
113 {
114 	xmi->xmi_startoff1 += irec->br_blockcount;
115 	xmi->xmi_startoff2 += irec->br_blockcount;
116 	xmi->xmi_blockcount -= irec->br_blockcount;
117 }
118 
119 /* Do we still have more mappings to exchange? */
120 static inline bool
xmi_has_more_exchange_work(const struct xfs_exchmaps_intent * xmi)121 xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
122 {
123 	return xmi->xmi_blockcount > 0;
124 }
125 
126 /* Do we have post-operation cleanups to perform? */
127 static inline bool
xmi_has_postop_work(const struct xfs_exchmaps_intent * xmi)128 xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
129 {
130 	return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
131 				 XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
132 				 __XFS_EXCHMAPS_INO2_SHORTFORM);
133 }
134 
135 /* Check all mappings to make sure we can actually exchange them. */
136 int
xfs_exchmaps_check_forks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)137 xfs_exchmaps_check_forks(
138 	struct xfs_mount		*mp,
139 	const struct xfs_exchmaps_req	*req)
140 {
141 	struct xfs_ifork		*ifp1, *ifp2;
142 	int				whichfork = xfs_exchmaps_reqfork(req);
143 
144 	/* No fork? */
145 	ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
146 	ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
147 	if (!ifp1 || !ifp2)
148 		return -EINVAL;
149 
150 	/* We don't know how to exchange local format forks. */
151 	if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
152 	    ifp2->if_format == XFS_DINODE_FMT_LOCAL)
153 		return -EINVAL;
154 
155 	return 0;
156 }
157 
158 #ifdef CONFIG_XFS_QUOTA
159 /* Log the actual updates to the quota accounting. */
160 static inline void
xfs_exchmaps_update_quota(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2)161 xfs_exchmaps_update_quota(
162 	struct xfs_trans		*tp,
163 	struct xfs_exchmaps_intent	*xmi,
164 	struct xfs_bmbt_irec		*irec1,
165 	struct xfs_bmbt_irec		*irec2)
166 {
167 	int64_t				ip1_delta = 0, ip2_delta = 0;
168 	unsigned int			qflag;
169 
170 	qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
171 						      XFS_TRANS_DQ_BCOUNT;
172 
173 	if (xfs_bmap_is_real_extent(irec1)) {
174 		ip1_delta -= irec1->br_blockcount;
175 		ip2_delta += irec1->br_blockcount;
176 	}
177 
178 	if (xfs_bmap_is_real_extent(irec2)) {
179 		ip1_delta += irec2->br_blockcount;
180 		ip2_delta -= irec2->br_blockcount;
181 	}
182 
183 	xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
184 	xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
185 }
186 #else
187 # define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2)	((void)0)
188 #endif
189 
190 /* Decide if we want to skip this mapping from file1. */
191 static inline bool
xfs_exchmaps_can_skip_mapping(struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec)192 xfs_exchmaps_can_skip_mapping(
193 	struct xfs_exchmaps_intent	*xmi,
194 	struct xfs_bmbt_irec		*irec)
195 {
196 	struct xfs_mount		*mp = xmi->xmi_ip1->i_mount;
197 
198 	/* Do not skip this mapping if the caller did not tell us to. */
199 	if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
200 		return false;
201 
202 	/* Do not skip mapped, written mappings. */
203 	if (xfs_bmap_is_written_extent(irec))
204 		return false;
205 
206 	/*
207 	 * The mapping is unwritten or a hole.  It cannot be a delalloc
208 	 * reservation because we already excluded those.  It cannot be an
209 	 * unwritten extent with dirty page cache because we flushed the page
210 	 * cache.  For files where the allocation unit is 1FSB (files on the
211 	 * data dev, rt files if the extent size is 1FSB), we can safely
212 	 * skip this mapping.
213 	 */
214 	if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
215 		return true;
216 
217 	/*
218 	 * For a realtime file with a multi-fsb allocation unit, the decision
219 	 * is trickier because we can only swap full allocation units.
220 	 * Unwritten mappings can appear in the middle of an rtx if the rtx is
221 	 * partially written, but they can also appear for preallocations.
222 	 *
223 	 * If the mapping is a hole, skip it entirely.  Holes should align with
224 	 * rtx boundaries.
225 	 */
226 	if (!xfs_bmap_is_real_extent(irec))
227 		return true;
228 
229 	/*
230 	 * All mappings below this point are unwritten.
231 	 *
232 	 * - If the beginning is not aligned to an rtx, trim the end of the
233 	 *   mapping so that it does not cross an rtx boundary, and swap it.
234 	 *
235 	 * - If both ends are aligned to an rtx, skip the entire mapping.
236 	 */
237 	if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
238 		xfs_fileoff_t	new_end;
239 
240 		new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
241 		irec->br_blockcount = min(irec->br_blockcount,
242 					  new_end - irec->br_startoff);
243 		return false;
244 	}
245 	if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
246 		return true;
247 
248 	/*
249 	 * All mappings below this point are unwritten, start on an rtx
250 	 * boundary, and do not end on an rtx boundary.
251 	 *
252 	 * - If the mapping is longer than one rtx, trim the end of the mapping
253 	 *   down to an rtx boundary and skip it.
254 	 *
255 	 * - The mapping is shorter than one rtx.  Swap it.
256 	 */
257 	if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
258 		xfs_fileoff_t	new_end;
259 
260 		new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
261 				mp->m_sb.sb_rextsize);
262 		irec->br_blockcount = new_end - irec->br_startoff;
263 		return true;
264 	}
265 
266 	return false;
267 }
268 
269 /*
270  * Walk forward through the file ranges in @xmi until we find two different
271  * mappings to exchange.  If there is work to do, return the mappings;
272  * otherwise we've reached the end of the range and xmi_blockcount will be
273  * zero.
274  *
275  * If the walk skips over a pair of mappings to the same storage, save them as
276  * the left records in @adj (if provided) so that the simulation phase can
277  * avoid an extra lookup.
278   */
279 static int
xfs_exchmaps_find_mappings(struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2,struct xfs_exchmaps_adjacent * adj)280 xfs_exchmaps_find_mappings(
281 	struct xfs_exchmaps_intent	*xmi,
282 	struct xfs_bmbt_irec		*irec1,
283 	struct xfs_bmbt_irec		*irec2,
284 	struct xfs_exchmaps_adjacent	*adj)
285 {
286 	int				nimaps;
287 	int				bmap_flags;
288 	int				error;
289 
290 	bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
291 
292 	for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
293 		/* Read mapping from the first file */
294 		nimaps = 1;
295 		error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
296 				xmi->xmi_blockcount, irec1, &nimaps,
297 				bmap_flags);
298 		if (error)
299 			return error;
300 		if (nimaps != 1 ||
301 		    irec1->br_startblock == DELAYSTARTBLOCK ||
302 		    irec1->br_startoff != xmi->xmi_startoff1) {
303 			/*
304 			 * We should never get no mapping or a delalloc mapping
305 			 * or something that doesn't match what we asked for,
306 			 * since the caller flushed both inodes and we hold the
307 			 * ILOCKs for both inodes.
308 			 */
309 			ASSERT(0);
310 			return -EINVAL;
311 		}
312 
313 		if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
314 			trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
315 			continue;
316 		}
317 
318 		/* Read mapping from the second file */
319 		nimaps = 1;
320 		error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
321 				irec1->br_blockcount, irec2, &nimaps,
322 				bmap_flags);
323 		if (error)
324 			return error;
325 		if (nimaps != 1 ||
326 		    irec2->br_startblock == DELAYSTARTBLOCK ||
327 		    irec2->br_startoff != xmi->xmi_startoff2) {
328 			/*
329 			 * We should never get no mapping or a delalloc mapping
330 			 * or something that doesn't match what we asked for,
331 			 * since the caller flushed both inodes and we hold the
332 			 * ILOCKs for both inodes.
333 			 */
334 			ASSERT(0);
335 			return -EINVAL;
336 		}
337 
338 		/*
339 		 * We can only exchange as many blocks as the smaller of the
340 		 * two mapping maps.
341 		 */
342 		irec1->br_blockcount = min(irec1->br_blockcount,
343 					   irec2->br_blockcount);
344 
345 		trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
346 		trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
347 
348 		/* We found something to exchange, so return it. */
349 		if (irec1->br_startblock != irec2->br_startblock)
350 			return 0;
351 
352 		/*
353 		 * Two mappings pointing to the same physical block must not
354 		 * have different states; that's filesystem corruption.  Move
355 		 * on to the next mapping if they're both holes or both point
356 		 * to the same physical space extent.
357 		 */
358 		if (irec1->br_state != irec2->br_state) {
359 			xfs_bmap_mark_sick(xmi->xmi_ip1,
360 					xfs_exchmaps_whichfork(xmi));
361 			xfs_bmap_mark_sick(xmi->xmi_ip2,
362 					xfs_exchmaps_whichfork(xmi));
363 			return -EFSCORRUPTED;
364 		}
365 
366 		/*
367 		 * Save the mappings if we're estimating work and skipping
368 		 * these identical mappings.
369 		 */
370 		if (adj) {
371 			memcpy(&adj->left1, irec1, sizeof(*irec1));
372 			memcpy(&adj->left2, irec2, sizeof(*irec2));
373 		}
374 	}
375 
376 	return 0;
377 }
378 
379 /* Exchange these two mappings. */
380 static void
xfs_exchmaps_one_step(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2)381 xfs_exchmaps_one_step(
382 	struct xfs_trans		*tp,
383 	struct xfs_exchmaps_intent	*xmi,
384 	struct xfs_bmbt_irec		*irec1,
385 	struct xfs_bmbt_irec		*irec2)
386 {
387 	int				whichfork = xfs_exchmaps_whichfork(xmi);
388 
389 	xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
390 
391 	/* Remove both mappings. */
392 	xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
393 	xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
394 
395 	/*
396 	 * Re-add both mappings.  We exchange the file offsets between the two
397 	 * maps and add the opposite map, which has the effect of filling the
398 	 * logical offsets we just unmapped, but with with the physical mapping
399 	 * information exchanged.
400 	 */
401 	swap(irec1->br_startoff, irec2->br_startoff);
402 	xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
403 	xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
404 
405 	/* Make sure we're not adding mappings past EOF. */
406 	if (whichfork == XFS_DATA_FORK) {
407 		xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
408 				xmi->xmi_isize1);
409 		xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
410 				xmi->xmi_isize2);
411 	}
412 
413 	/*
414 	 * Advance our cursor and exit.   The caller (either defer ops or log
415 	 * recovery) will log the XMD item, and if *blockcount is nonzero, it
416 	 * will log a new XMI item for the remainder and call us back.
417 	 */
418 	xmi_advance(xmi, irec1);
419 }
420 
421 /* Convert inode2's leaf attr fork back to shortform, if possible.. */
422 STATIC int
xfs_exchmaps_attr_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)423 xfs_exchmaps_attr_to_sf(
424 	struct xfs_trans		*tp,
425 	struct xfs_exchmaps_intent	*xmi)
426 {
427 	struct xfs_da_args	args = {
428 		.dp		= xmi->xmi_ip2,
429 		.geo		= tp->t_mountp->m_attr_geo,
430 		.whichfork	= XFS_ATTR_FORK,
431 		.trans		= tp,
432 		.owner		= xmi->xmi_ip2->i_ino,
433 	};
434 	struct xfs_buf		*bp;
435 	int			forkoff;
436 	int			error;
437 
438 	if (!xfs_attr_is_leaf(xmi->xmi_ip2))
439 		return 0;
440 
441 	error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
442 			&bp);
443 	if (error)
444 		return error;
445 
446 	forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
447 	if (forkoff == 0)
448 		return 0;
449 
450 	return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
451 }
452 
453 /* Convert inode2's block dir fork back to shortform, if possible.. */
454 STATIC int
xfs_exchmaps_dir_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)455 xfs_exchmaps_dir_to_sf(
456 	struct xfs_trans		*tp,
457 	struct xfs_exchmaps_intent	*xmi)
458 {
459 	struct xfs_da_args	args = {
460 		.dp		= xmi->xmi_ip2,
461 		.geo		= tp->t_mountp->m_dir_geo,
462 		.whichfork	= XFS_DATA_FORK,
463 		.trans		= tp,
464 		.owner		= xmi->xmi_ip2->i_ino,
465 	};
466 	struct xfs_dir2_sf_hdr	sfh;
467 	struct xfs_buf		*bp;
468 	int			size;
469 	int			error = 0;
470 
471 	if (xfs_dir2_format(&args, &error) != XFS_DIR2_FMT_BLOCK)
472 		return error;
473 
474 	error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
475 	if (error)
476 		return error;
477 
478 	size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
479 	if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
480 		return 0;
481 
482 	return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
483 }
484 
485 /* Convert inode2's remote symlink target back to shortform, if possible. */
486 STATIC int
xfs_exchmaps_link_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)487 xfs_exchmaps_link_to_sf(
488 	struct xfs_trans		*tp,
489 	struct xfs_exchmaps_intent	*xmi)
490 {
491 	struct xfs_inode		*ip = xmi->xmi_ip2;
492 	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
493 	char				*buf;
494 	int				error;
495 
496 	if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
497 	    ip->i_disk_size > xfs_inode_data_fork_size(ip))
498 		return 0;
499 
500 	/* Read the current symlink target into a buffer. */
501 	buf = kmalloc(ip->i_disk_size + 1,
502 			GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
503 	if (!buf) {
504 		ASSERT(0);
505 		return -ENOMEM;
506 	}
507 
508 	error = xfs_symlink_remote_read(ip, buf);
509 	if (error)
510 		goto free;
511 
512 	/* Remove the blocks. */
513 	error = xfs_symlink_remote_truncate(tp, ip);
514 	if (error)
515 		goto free;
516 
517 	/* Convert fork to local format and log our changes. */
518 	xfs_idestroy_fork(ifp);
519 	ifp->if_bytes = 0;
520 	ifp->if_format = XFS_DINODE_FMT_LOCAL;
521 	xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
522 	xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
523 free:
524 	kfree(buf);
525 	return error;
526 }
527 
528 /* Clear the reflink flag after an exchange. */
529 static inline void
xfs_exchmaps_clear_reflink(struct xfs_trans * tp,struct xfs_inode * ip)530 xfs_exchmaps_clear_reflink(
531 	struct xfs_trans	*tp,
532 	struct xfs_inode	*ip)
533 {
534 	trace_xfs_reflink_unset_inode_flag(ip);
535 
536 	ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
537 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
538 }
539 
540 /* Finish whatever work might come after an exchange operation. */
541 static int
xfs_exchmaps_do_postop_work(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)542 xfs_exchmaps_do_postop_work(
543 	struct xfs_trans		*tp,
544 	struct xfs_exchmaps_intent	*xmi)
545 {
546 	if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
547 		int			error = 0;
548 
549 		if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
550 			error = xfs_exchmaps_attr_to_sf(tp, xmi);
551 		else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
552 			error = xfs_exchmaps_dir_to_sf(tp, xmi);
553 		else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
554 			error = xfs_exchmaps_link_to_sf(tp, xmi);
555 		xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
556 		if (error)
557 			return error;
558 	}
559 
560 	if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
561 		xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
562 		xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
563 	}
564 
565 	if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
566 		xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
567 		xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
568 	}
569 
570 	return 0;
571 }
572 
573 /* Finish one step in a mapping exchange operation, possibly relogging. */
574 int
xfs_exchmaps_finish_one(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)575 xfs_exchmaps_finish_one(
576 	struct xfs_trans		*tp,
577 	struct xfs_exchmaps_intent	*xmi)
578 {
579 	struct xfs_bmbt_irec		irec1, irec2;
580 	int				error;
581 
582 	if (xmi_has_more_exchange_work(xmi)) {
583 		/*
584 		 * If the operation state says that some range of the files
585 		 * have not yet been exchanged, look for mappings in that range
586 		 * to exchange.  If we find some mappings, exchange them.
587 		 */
588 		error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
589 		if (error)
590 			return error;
591 
592 		if (xmi_has_more_exchange_work(xmi))
593 			xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
594 
595 		/*
596 		 * If the caller asked us to exchange the file sizes after the
597 		 * exchange and either we just exchanged the last mappings in
598 		 * the range or we didn't find anything to exchange, update the
599 		 * ondisk file sizes.
600 		 */
601 		if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
602 		    !xmi_has_more_exchange_work(xmi)) {
603 			xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
604 			xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
605 
606 			xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
607 			xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
608 		}
609 	} else if (xmi_has_postop_work(xmi)) {
610 		/*
611 		 * Now that we're finished with the exchange operation,
612 		 * complete the post-op cleanup work.
613 		 */
614 		error = xfs_exchmaps_do_postop_work(tp, xmi);
615 		if (error)
616 			return error;
617 	}
618 
619 	if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
620 		return -EIO;
621 
622 	/* If we still have work to do, ask for a new transaction. */
623 	if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
624 		trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
625 		return -EAGAIN;
626 	}
627 
628 	/*
629 	 * If we reach here, we've finished all the exchange work and the post
630 	 * operation work.  The last thing we need to do before returning to
631 	 * the caller is to make sure that COW forks are set up correctly.
632 	 */
633 	if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
634 		xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
635 		xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
636 	}
637 
638 	return 0;
639 }
640 
641 /*
642  * Compute the amount of bmbt blocks we should reserve for each file.  In the
643  * worst case, each exchange will fill a hole with a new mapping, which could
644  * result in a btree split every time we add a new leaf block.
645  */
646 static inline uint64_t
xfs_exchmaps_bmbt_blocks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)647 xfs_exchmaps_bmbt_blocks(
648 	struct xfs_mount		*mp,
649 	const struct xfs_exchmaps_req	*req)
650 {
651 	return howmany_64(req->nr_exchanges,
652 					XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
653 			XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
654 }
655 
656 /* Compute the space we should reserve for the rmap btree expansions. */
657 static inline uint64_t
xfs_exchmaps_rmapbt_blocks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)658 xfs_exchmaps_rmapbt_blocks(
659 	struct xfs_mount		*mp,
660 	const struct xfs_exchmaps_req	*req)
661 {
662 	if (!xfs_has_rmapbt(mp))
663 		return 0;
664 	if (XFS_IS_REALTIME_INODE(req->ip1))
665 		return 0;
666 
667 	return howmany_64(req->nr_exchanges,
668 					XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
669 			XFS_RMAPADD_SPACE_RES(mp);
670 }
671 
672 /* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
673 int
xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req * req)674 xfs_exchmaps_estimate_overhead(
675 	struct xfs_exchmaps_req		*req)
676 {
677 	struct xfs_mount		*mp = req->ip1->i_mount;
678 	xfs_filblks_t			bmbt_blocks;
679 	xfs_filblks_t			rmapbt_blocks;
680 	xfs_filblks_t			resblks = req->resblks;
681 
682 	/*
683 	 * Compute the number of bmbt and rmapbt blocks we might need to handle
684 	 * the estimated number of exchanges.
685 	 */
686 	bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
687 	rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
688 
689 	trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
690 
691 	/* Make sure the change in file block count doesn't overflow. */
692 	if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
693 		return -EFBIG;
694 	if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
695 		return -EFBIG;
696 
697 	/*
698 	 * Add together the number of blocks we need to handle btree growth,
699 	 * then add it to the number of blocks we need to reserve to this
700 	 * transaction.
701 	 */
702 	if (check_add_overflow(resblks, bmbt_blocks, &resblks))
703 		return -ENOSPC;
704 	if (check_add_overflow(resblks, bmbt_blocks, &resblks))
705 		return -ENOSPC;
706 	if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
707 		return -ENOSPC;
708 	if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
709 		return -ENOSPC;
710 
711 	/* Can't actually reserve more than UINT_MAX blocks. */
712 	if (req->resblks > UINT_MAX)
713 		return -ENOSPC;
714 
715 	req->resblks = resblks;
716 	trace_xfs_exchmaps_final_estimate(req);
717 	return 0;
718 }
719 
720 /* Decide if we can merge two real mappings. */
721 static inline bool
xmi_can_merge(const struct xfs_bmbt_irec * b1,const struct xfs_bmbt_irec * b2)722 xmi_can_merge(
723 	const struct xfs_bmbt_irec	*b1,
724 	const struct xfs_bmbt_irec	*b2)
725 {
726 	/* Don't merge holes. */
727 	if (b1->br_startblock == HOLESTARTBLOCK ||
728 	    b2->br_startblock == HOLESTARTBLOCK)
729 		return false;
730 
731 	/* We don't merge holes. */
732 	if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
733 		return false;
734 
735 	if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
736 	    b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
737 	    b1->br_state			  == b2->br_state &&
738 	    b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
739 		return true;
740 
741 	return false;
742 }
743 
744 /*
745  * Decide if we can merge three mappings.  Caller must ensure all three
746  * mappings must not be holes or delalloc reservations.
747  */
748 static inline bool
xmi_can_merge_all(const struct xfs_bmbt_irec * l,const struct xfs_bmbt_irec * m,const struct xfs_bmbt_irec * r)749 xmi_can_merge_all(
750 	const struct xfs_bmbt_irec	*l,
751 	const struct xfs_bmbt_irec	*m,
752 	const struct xfs_bmbt_irec	*r)
753 {
754 	xfs_filblks_t			new_len;
755 
756 	new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
757 	return new_len <= XFS_MAX_BMBT_EXTLEN;
758 }
759 
760 #define CLEFT_CONTIG	0x01
761 #define CRIGHT_CONTIG	0x02
762 #define CHOLE		0x04
763 #define CBOTH_CONTIG	(CLEFT_CONTIG | CRIGHT_CONTIG)
764 
765 #define NLEFT_CONTIG	0x10
766 #define NRIGHT_CONTIG	0x20
767 #define NHOLE		0x40
768 #define NBOTH_CONTIG	(NLEFT_CONTIG | NRIGHT_CONTIG)
769 
770 /* Estimate the effect of a single exchange on mapping count. */
771 static inline int
xmi_delta_nextents_step(struct xfs_mount * mp,const struct xfs_bmbt_irec * left,const struct xfs_bmbt_irec * curr,const struct xfs_bmbt_irec * new,const struct xfs_bmbt_irec * right)772 xmi_delta_nextents_step(
773 	struct xfs_mount		*mp,
774 	const struct xfs_bmbt_irec	*left,
775 	const struct xfs_bmbt_irec	*curr,
776 	const struct xfs_bmbt_irec	*new,
777 	const struct xfs_bmbt_irec	*right)
778 {
779 	bool				lhole, rhole, chole, nhole;
780 	unsigned int			state = 0;
781 	int				ret = 0;
782 
783 	lhole = left->br_startblock == HOLESTARTBLOCK;
784 	rhole = right->br_startblock == HOLESTARTBLOCK;
785 	chole = curr->br_startblock == HOLESTARTBLOCK;
786 	nhole = new->br_startblock == HOLESTARTBLOCK;
787 
788 	if (chole)
789 		state |= CHOLE;
790 	if (!lhole && !chole && xmi_can_merge(left, curr))
791 		state |= CLEFT_CONTIG;
792 	if (!rhole && !chole && xmi_can_merge(curr, right))
793 		state |= CRIGHT_CONTIG;
794 	if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
795 	    !xmi_can_merge_all(left, curr, right))
796 		state &= ~CRIGHT_CONTIG;
797 
798 	if (nhole)
799 		state |= NHOLE;
800 	if (!lhole && !nhole && xmi_can_merge(left, new))
801 		state |= NLEFT_CONTIG;
802 	if (!rhole && !nhole && xmi_can_merge(new, right))
803 		state |= NRIGHT_CONTIG;
804 	if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
805 	    !xmi_can_merge_all(left, new, right))
806 		state &= ~NRIGHT_CONTIG;
807 
808 	switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
809 	case CLEFT_CONTIG | CRIGHT_CONTIG:
810 		/*
811 		 * left/curr/right are the same mapping, so deleting curr
812 		 * causes 2 new mappings to be created.
813 		 */
814 		ret += 2;
815 		break;
816 	case 0:
817 		/*
818 		 * curr is not contiguous with any mapping, so we remove curr
819 		 * completely
820 		 */
821 		ret--;
822 		break;
823 	case CHOLE:
824 		/* hole, do nothing */
825 		break;
826 	case CLEFT_CONTIG:
827 	case CRIGHT_CONTIG:
828 		/* trim either left or right, no change */
829 		break;
830 	}
831 
832 	switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
833 	case NLEFT_CONTIG | NRIGHT_CONTIG:
834 		/*
835 		 * left/curr/right will become the same mapping, so adding
836 		 * curr causes the deletion of right.
837 		 */
838 		ret--;
839 		break;
840 	case 0:
841 		/* new is not contiguous with any mapping */
842 		ret++;
843 		break;
844 	case NHOLE:
845 		/* hole, do nothing. */
846 		break;
847 	case NLEFT_CONTIG:
848 	case NRIGHT_CONTIG:
849 		/* new is absorbed into left or right, no change */
850 		break;
851 	}
852 
853 	trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
854 			state);
855 	return ret;
856 }
857 
858 /* Make sure we don't overflow the extent (mapping) counters. */
859 static inline int
xmi_ensure_delta_nextents(struct xfs_exchmaps_req * req,struct xfs_inode * ip,int64_t delta)860 xmi_ensure_delta_nextents(
861 	struct xfs_exchmaps_req	*req,
862 	struct xfs_inode	*ip,
863 	int64_t			delta)
864 {
865 	struct xfs_mount	*mp = ip->i_mount;
866 	int			whichfork = xfs_exchmaps_reqfork(req);
867 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
868 	uint64_t		new_nextents;
869 	xfs_extnum_t		max_nextents;
870 
871 	if (delta < 0)
872 		return 0;
873 
874 	/*
875 	 * It's always an error if the delta causes integer overflow.  delta
876 	 * needs an explicit cast here to avoid warnings about implicit casts
877 	 * coded into the overflow check.
878 	 */
879 	if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
880 				&new_nextents))
881 		return -EFBIG;
882 
883 	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
884 	    new_nextents > 10)
885 		return -EFBIG;
886 
887 	/*
888 	 * We always promote both inodes to have large extent counts if the
889 	 * superblock feature is enabled, so we only need to check against the
890 	 * theoretical maximum.
891 	 */
892 	max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
893 					     whichfork);
894 	if (new_nextents > max_nextents)
895 		return -EFBIG;
896 
897 	return 0;
898 }
899 
900 /* Find the next mapping after irec. */
901 static inline int
xmi_next(struct xfs_inode * ip,int bmap_flags,const struct xfs_bmbt_irec * irec,struct xfs_bmbt_irec * nrec)902 xmi_next(
903 	struct xfs_inode		*ip,
904 	int				bmap_flags,
905 	const struct xfs_bmbt_irec	*irec,
906 	struct xfs_bmbt_irec		*nrec)
907 {
908 	xfs_fileoff_t			off;
909 	xfs_filblks_t			blockcount;
910 	int				nimaps = 1;
911 	int				error;
912 
913 	off = irec->br_startoff + irec->br_blockcount;
914 	blockcount = XFS_MAX_FILEOFF - off;
915 	error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
916 	if (error)
917 		return error;
918 	if (nrec->br_startblock == DELAYSTARTBLOCK ||
919 	    nrec->br_startoff != off) {
920 		/*
921 		 * If we don't get the mapping we want, return a zero-length
922 		 * mapping, which our estimator function will pretend is a hole.
923 		 * We shouldn't get delalloc reservations.
924 		 */
925 		nrec->br_startblock = HOLESTARTBLOCK;
926 	}
927 
928 	return 0;
929 }
930 
931 int __init
xfs_exchmaps_intent_init_cache(void)932 xfs_exchmaps_intent_init_cache(void)
933 {
934 	xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
935 			sizeof(struct xfs_exchmaps_intent),
936 			0, 0, NULL);
937 
938 	return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
939 }
940 
941 void
xfs_exchmaps_intent_destroy_cache(void)942 xfs_exchmaps_intent_destroy_cache(void)
943 {
944 	kmem_cache_destroy(xfs_exchmaps_intent_cache);
945 	xfs_exchmaps_intent_cache = NULL;
946 }
947 
948 /*
949  * Decide if we will exchange the reflink flags between the two files after the
950  * exchange.  The only time we want to do this is if we're exchanging all
951  * mappings under EOF and the inode reflink flags have different states.
952  */
953 static inline bool
xmi_can_exchange_reflink_flags(const struct xfs_exchmaps_req * req,unsigned int reflink_state)954 xmi_can_exchange_reflink_flags(
955 	const struct xfs_exchmaps_req	*req,
956 	unsigned int			reflink_state)
957 {
958 	struct xfs_mount		*mp = req->ip1->i_mount;
959 
960 	if (hweight32(reflink_state) != 1)
961 		return false;
962 	if (req->startoff1 != 0 || req->startoff2 != 0)
963 		return false;
964 	if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
965 		return false;
966 	if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
967 		return false;
968 	return true;
969 }
970 
971 
972 /* Allocate and initialize a new incore intent item from a request. */
973 struct xfs_exchmaps_intent *
xfs_exchmaps_init_intent(const struct xfs_exchmaps_req * req)974 xfs_exchmaps_init_intent(
975 	const struct xfs_exchmaps_req	*req)
976 {
977 	struct xfs_exchmaps_intent	*xmi;
978 	unsigned int			rs = 0;
979 
980 	xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
981 			GFP_NOFS | __GFP_NOFAIL);
982 	INIT_LIST_HEAD(&xmi->xmi_list);
983 	xmi->xmi_ip1 = req->ip1;
984 	xmi->xmi_ip2 = req->ip2;
985 	xmi->xmi_startoff1 = req->startoff1;
986 	xmi->xmi_startoff2 = req->startoff2;
987 	xmi->xmi_blockcount = req->blockcount;
988 	xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
989 	xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
990 
991 	if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
992 		xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
993 		return xmi;
994 	}
995 
996 	if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
997 		xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
998 		xmi->xmi_isize1 = req->ip2->i_disk_size;
999 		xmi->xmi_isize2 = req->ip1->i_disk_size;
1000 	}
1001 
1002 	/* Record the state of each inode's reflink flag before the op. */
1003 	if (xfs_is_reflink_inode(req->ip1))
1004 		rs |= 1;
1005 	if (xfs_is_reflink_inode(req->ip2))
1006 		rs |= 2;
1007 
1008 	/*
1009 	 * Figure out if we're clearing the reflink flags (which effectively
1010 	 * exchanges them) after the operation.
1011 	 */
1012 	if (xmi_can_exchange_reflink_flags(req, rs)) {
1013 		if (rs & 1)
1014 			xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
1015 		if (rs & 2)
1016 			xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
1017 	}
1018 
1019 	if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
1020 	    S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
1021 		xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
1022 
1023 	return xmi;
1024 }
1025 
1026 /*
1027  * Estimate the number of exchange operations and the number of file blocks
1028  * in each file that will be affected by the exchange operation.
1029  */
1030 int
xfs_exchmaps_estimate(struct xfs_exchmaps_req * req)1031 xfs_exchmaps_estimate(
1032 	struct xfs_exchmaps_req		*req)
1033 {
1034 	struct xfs_exchmaps_intent	*xmi;
1035 	struct xfs_bmbt_irec		irec1, irec2;
1036 	struct xfs_exchmaps_adjacent	adj = ADJACENT_INIT;
1037 	xfs_filblks_t			ip1_blocks = 0, ip2_blocks = 0;
1038 	int64_t				d_nexts1, d_nexts2;
1039 	int				bmap_flags;
1040 	int				error;
1041 
1042 	ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
1043 
1044 	bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
1045 	xmi = xfs_exchmaps_init_intent(req);
1046 
1047 	/*
1048 	 * To guard against the possibility of overflowing the extent counters,
1049 	 * we have to estimate an upper bound on the potential increase in that
1050 	 * counter.  We can split the mapping at each end of the range, and for
1051 	 * each step of the exchange we can split the mapping that we're
1052 	 * working on if the mappings do not align.
1053 	 */
1054 	d_nexts1 = d_nexts2 = 3;
1055 
1056 	while (xmi_has_more_exchange_work(xmi)) {
1057 		/*
1058 		 * Walk through the file ranges until we find something to
1059 		 * exchange.  Because we're simulating the exchange, pass in
1060 		 * adj to capture skipped mappings for correct estimation of
1061 		 * bmbt record merges.
1062 		 */
1063 		error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
1064 		if (error)
1065 			goto out_free;
1066 		if (!xmi_has_more_exchange_work(xmi))
1067 			break;
1068 
1069 		/* Update accounting. */
1070 		if (xfs_bmap_is_real_extent(&irec1))
1071 			ip1_blocks += irec1.br_blockcount;
1072 		if (xfs_bmap_is_real_extent(&irec2))
1073 			ip2_blocks += irec2.br_blockcount;
1074 		req->nr_exchanges++;
1075 
1076 		/* Read the next mappings from both files. */
1077 		error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
1078 		if (error)
1079 			goto out_free;
1080 
1081 		error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
1082 		if (error)
1083 			goto out_free;
1084 
1085 		/* Update extent count deltas. */
1086 		d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
1087 				&adj.left1, &irec1, &irec2, &adj.right1);
1088 
1089 		d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
1090 				&adj.left2, &irec2, &irec1, &adj.right2);
1091 
1092 		/* Now pretend we exchanged the mappings. */
1093 		if (xmi_can_merge(&adj.left2, &irec1))
1094 			adj.left2.br_blockcount += irec1.br_blockcount;
1095 		else
1096 			memcpy(&adj.left2, &irec1, sizeof(irec1));
1097 
1098 		if (xmi_can_merge(&adj.left1, &irec2))
1099 			adj.left1.br_blockcount += irec2.br_blockcount;
1100 		else
1101 			memcpy(&adj.left1, &irec2, sizeof(irec2));
1102 
1103 		xmi_advance(xmi, &irec1);
1104 	}
1105 
1106 	/* Account for the blocks that are being exchanged. */
1107 	if (XFS_IS_REALTIME_INODE(req->ip1) &&
1108 	    xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
1109 		req->ip1_rtbcount = ip1_blocks;
1110 		req->ip2_rtbcount = ip2_blocks;
1111 	} else {
1112 		req->ip1_bcount = ip1_blocks;
1113 		req->ip2_bcount = ip2_blocks;
1114 	}
1115 
1116 	/*
1117 	 * Make sure that both forks have enough slack left in their extent
1118 	 * counters that the exchange operation will not overflow.
1119 	 */
1120 	trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
1121 	if (req->ip1 == req->ip2) {
1122 		error = xmi_ensure_delta_nextents(req, req->ip1,
1123 				d_nexts1 + d_nexts2);
1124 	} else {
1125 		error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
1126 		if (error)
1127 			goto out_free;
1128 		error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
1129 	}
1130 	if (error)
1131 		goto out_free;
1132 
1133 	trace_xfs_exchmaps_initial_estimate(req);
1134 	error = xfs_exchmaps_estimate_overhead(req);
1135 out_free:
1136 	kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
1137 	return error;
1138 }
1139 
1140 /* Set the reflink flag before an operation. */
1141 static inline void
xfs_exchmaps_set_reflink(struct xfs_trans * tp,struct xfs_inode * ip)1142 xfs_exchmaps_set_reflink(
1143 	struct xfs_trans	*tp,
1144 	struct xfs_inode	*ip)
1145 {
1146 	trace_xfs_reflink_set_inode_flag(ip);
1147 
1148 	ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1149 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1150 }
1151 
1152 /*
1153  * If either file has shared blocks and we're exchanging data forks, we must
1154  * flag the other file as having shared blocks so that we get the shared-block
1155  * rmap functions if we need to fix up the rmaps.
1156  */
1157 void
xfs_exchmaps_ensure_reflink(struct xfs_trans * tp,const struct xfs_exchmaps_intent * xmi)1158 xfs_exchmaps_ensure_reflink(
1159 	struct xfs_trans			*tp,
1160 	const struct xfs_exchmaps_intent	*xmi)
1161 {
1162 	unsigned int				rs = 0;
1163 
1164 	if (xfs_is_reflink_inode(xmi->xmi_ip1))
1165 		rs |= 1;
1166 	if (xfs_is_reflink_inode(xmi->xmi_ip2))
1167 		rs |= 2;
1168 
1169 	if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
1170 		xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
1171 
1172 	if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
1173 		xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
1174 }
1175 
1176 /* Set the large extent count flag before an operation if needed. */
1177 static inline void
xfs_exchmaps_ensure_large_extent_counts(struct xfs_trans * tp,struct xfs_inode * ip)1178 xfs_exchmaps_ensure_large_extent_counts(
1179 	struct xfs_trans	*tp,
1180 	struct xfs_inode	*ip)
1181 {
1182 	if (xfs_inode_has_large_extent_counts(ip))
1183 		return;
1184 
1185 	ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1186 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1187 }
1188 
1189 /* Widen the extent counter fields of both inodes if necessary. */
1190 void
xfs_exchmaps_upgrade_extent_counts(struct xfs_trans * tp,const struct xfs_exchmaps_intent * xmi)1191 xfs_exchmaps_upgrade_extent_counts(
1192 	struct xfs_trans			*tp,
1193 	const struct xfs_exchmaps_intent	*xmi)
1194 {
1195 	if (!xfs_has_large_extent_counts(tp->t_mountp))
1196 		return;
1197 
1198 	xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
1199 	xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
1200 }
1201 
1202 /*
1203  * Schedule an exchange a range of mappings from one inode to another.
1204  *
1205  * The use of file mapping exchange log intent items ensures the operation can
1206  * be resumed even if the system goes down.  The caller must commit the
1207  * transaction to start the work.
1208  *
1209  * The caller must ensure the inodes must be joined to the transaction and
1210  * ILOCKd; they will still be joined to the transaction at exit.
1211  */
1212 void
xfs_exchange_mappings(struct xfs_trans * tp,const struct xfs_exchmaps_req * req)1213 xfs_exchange_mappings(
1214 	struct xfs_trans		*tp,
1215 	const struct xfs_exchmaps_req	*req)
1216 {
1217 	struct xfs_exchmaps_intent	*xmi;
1218 
1219 	BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
1220 
1221 	xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
1222 	xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
1223 	ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
1224 	if (req->flags & XFS_EXCHMAPS_SET_SIZES)
1225 		ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
1226 	ASSERT(xfs_has_exchange_range(tp->t_mountp));
1227 
1228 	if (req->blockcount == 0)
1229 		return;
1230 
1231 	xmi = xfs_exchmaps_init_intent(req);
1232 	xfs_exchmaps_defer_add(tp, xmi);
1233 	xfs_exchmaps_ensure_reflink(tp, xmi);
1234 	xfs_exchmaps_upgrade_extent_counts(tp, xmi);
1235 }
1236