1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
4   * Author: Darrick J. Wong <djwong@kernel.org>
5   */
6  #include "xfs.h"
7  #include "xfs_fs.h"
8  #include "xfs_shared.h"
9  #include "xfs_format.h"
10  #include "xfs_log_format.h"
11  #include "xfs_trans_resv.h"
12  #include "xfs_mount.h"
13  #include "xfs_defer.h"
14  #include "xfs_inode.h"
15  #include "xfs_trans.h"
16  #include "xfs_bmap.h"
17  #include "xfs_icache.h"
18  #include "xfs_quota.h"
19  #include "xfs_exchmaps.h"
20  #include "xfs_trace.h"
21  #include "xfs_bmap_btree.h"
22  #include "xfs_trans_space.h"
23  #include "xfs_error.h"
24  #include "xfs_errortag.h"
25  #include "xfs_health.h"
26  #include "xfs_exchmaps_item.h"
27  #include "xfs_da_format.h"
28  #include "xfs_da_btree.h"
29  #include "xfs_attr_leaf.h"
30  #include "xfs_attr.h"
31  #include "xfs_dir2_priv.h"
32  #include "xfs_dir2.h"
33  #include "xfs_symlink_remote.h"
34  
35  struct kmem_cache	*xfs_exchmaps_intent_cache;
36  
37  /* bmbt mappings adjacent to a pair of records. */
38  struct xfs_exchmaps_adjacent {
39  	struct xfs_bmbt_irec		left1;
40  	struct xfs_bmbt_irec		right1;
41  	struct xfs_bmbt_irec		left2;
42  	struct xfs_bmbt_irec		right2;
43  };
44  
45  #define ADJACENT_INIT { \
46  	.left1  = { .br_startblock = HOLESTARTBLOCK }, \
47  	.right1 = { .br_startblock = HOLESTARTBLOCK }, \
48  	.left2  = { .br_startblock = HOLESTARTBLOCK }, \
49  	.right2 = { .br_startblock = HOLESTARTBLOCK }, \
50  }
51  
52  /* Information to reset reflink flag / CoW fork state after an exchange. */
53  
54  /*
55   * If the reflink flag is set on either inode, make sure it has an incore CoW
56   * fork, since all reflink inodes must have them.  If there's a CoW fork and it
57   * has mappings in it, make sure the inodes are tagged appropriately so that
58   * speculative preallocations can be GC'd if we run low of space.
59   */
60  static inline void
xfs_exchmaps_ensure_cowfork(struct xfs_inode * ip)61  xfs_exchmaps_ensure_cowfork(
62  	struct xfs_inode	*ip)
63  {
64  	struct xfs_ifork	*cfork;
65  
66  	if (xfs_is_reflink_inode(ip))
67  		xfs_ifork_init_cow(ip);
68  
69  	cfork = xfs_ifork_ptr(ip, XFS_COW_FORK);
70  	if (!cfork)
71  		return;
72  	if (cfork->if_bytes > 0)
73  		xfs_inode_set_cowblocks_tag(ip);
74  	else
75  		xfs_inode_clear_cowblocks_tag(ip);
76  }
77  
78  /*
79   * Adjust the on-disk inode size upwards if needed so that we never add
80   * mappings into the file past EOF.  This is crucial so that log recovery won't
81   * get confused by the sudden appearance of post-eof mappings.
82   */
83  STATIC void
xfs_exchmaps_update_size(struct xfs_trans * tp,struct xfs_inode * ip,struct xfs_bmbt_irec * imap,xfs_fsize_t new_isize)84  xfs_exchmaps_update_size(
85  	struct xfs_trans	*tp,
86  	struct xfs_inode	*ip,
87  	struct xfs_bmbt_irec	*imap,
88  	xfs_fsize_t		new_isize)
89  {
90  	struct xfs_mount	*mp = tp->t_mountp;
91  	xfs_fsize_t		len;
92  
93  	if (new_isize < 0)
94  		return;
95  
96  	len = min(XFS_FSB_TO_B(mp, imap->br_startoff + imap->br_blockcount),
97  		  new_isize);
98  
99  	if (len <= ip->i_disk_size)
100  		return;
101  
102  	trace_xfs_exchmaps_update_inode_size(ip, len);
103  
104  	ip->i_disk_size = len;
105  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
106  }
107  
108  /* Advance the incore state tracking after exchanging a mapping. */
109  static inline void
xmi_advance(struct xfs_exchmaps_intent * xmi,const struct xfs_bmbt_irec * irec)110  xmi_advance(
111  	struct xfs_exchmaps_intent	*xmi,
112  	const struct xfs_bmbt_irec	*irec)
113  {
114  	xmi->xmi_startoff1 += irec->br_blockcount;
115  	xmi->xmi_startoff2 += irec->br_blockcount;
116  	xmi->xmi_blockcount -= irec->br_blockcount;
117  }
118  
119  /* Do we still have more mappings to exchange? */
120  static inline bool
xmi_has_more_exchange_work(const struct xfs_exchmaps_intent * xmi)121  xmi_has_more_exchange_work(const struct xfs_exchmaps_intent *xmi)
122  {
123  	return xmi->xmi_blockcount > 0;
124  }
125  
126  /* Do we have post-operation cleanups to perform? */
127  static inline bool
xmi_has_postop_work(const struct xfs_exchmaps_intent * xmi)128  xmi_has_postop_work(const struct xfs_exchmaps_intent *xmi)
129  {
130  	return xmi->xmi_flags & (XFS_EXCHMAPS_CLEAR_INO1_REFLINK |
131  				 XFS_EXCHMAPS_CLEAR_INO2_REFLINK |
132  				 __XFS_EXCHMAPS_INO2_SHORTFORM);
133  }
134  
135  /* Check all mappings to make sure we can actually exchange them. */
136  int
xfs_exchmaps_check_forks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)137  xfs_exchmaps_check_forks(
138  	struct xfs_mount		*mp,
139  	const struct xfs_exchmaps_req	*req)
140  {
141  	struct xfs_ifork		*ifp1, *ifp2;
142  	int				whichfork = xfs_exchmaps_reqfork(req);
143  
144  	/* No fork? */
145  	ifp1 = xfs_ifork_ptr(req->ip1, whichfork);
146  	ifp2 = xfs_ifork_ptr(req->ip2, whichfork);
147  	if (!ifp1 || !ifp2)
148  		return -EINVAL;
149  
150  	/* We don't know how to exchange local format forks. */
151  	if (ifp1->if_format == XFS_DINODE_FMT_LOCAL ||
152  	    ifp2->if_format == XFS_DINODE_FMT_LOCAL)
153  		return -EINVAL;
154  
155  	return 0;
156  }
157  
158  #ifdef CONFIG_XFS_QUOTA
159  /* Log the actual updates to the quota accounting. */
160  static inline void
xfs_exchmaps_update_quota(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2)161  xfs_exchmaps_update_quota(
162  	struct xfs_trans		*tp,
163  	struct xfs_exchmaps_intent	*xmi,
164  	struct xfs_bmbt_irec		*irec1,
165  	struct xfs_bmbt_irec		*irec2)
166  {
167  	int64_t				ip1_delta = 0, ip2_delta = 0;
168  	unsigned int			qflag;
169  
170  	qflag = XFS_IS_REALTIME_INODE(xmi->xmi_ip1) ? XFS_TRANS_DQ_RTBCOUNT :
171  						      XFS_TRANS_DQ_BCOUNT;
172  
173  	if (xfs_bmap_is_real_extent(irec1)) {
174  		ip1_delta -= irec1->br_blockcount;
175  		ip2_delta += irec1->br_blockcount;
176  	}
177  
178  	if (xfs_bmap_is_real_extent(irec2)) {
179  		ip1_delta += irec2->br_blockcount;
180  		ip2_delta -= irec2->br_blockcount;
181  	}
182  
183  	xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip1, qflag, ip1_delta);
184  	xfs_trans_mod_dquot_byino(tp, xmi->xmi_ip2, qflag, ip2_delta);
185  }
186  #else
187  # define xfs_exchmaps_update_quota(tp, xmi, irec1, irec2)	((void)0)
188  #endif
189  
190  /* Decide if we want to skip this mapping from file1. */
191  static inline bool
xfs_exchmaps_can_skip_mapping(struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec)192  xfs_exchmaps_can_skip_mapping(
193  	struct xfs_exchmaps_intent	*xmi,
194  	struct xfs_bmbt_irec		*irec)
195  {
196  	struct xfs_mount		*mp = xmi->xmi_ip1->i_mount;
197  
198  	/* Do not skip this mapping if the caller did not tell us to. */
199  	if (!(xmi->xmi_flags & XFS_EXCHMAPS_INO1_WRITTEN))
200  		return false;
201  
202  	/* Do not skip mapped, written mappings. */
203  	if (xfs_bmap_is_written_extent(irec))
204  		return false;
205  
206  	/*
207  	 * The mapping is unwritten or a hole.  It cannot be a delalloc
208  	 * reservation because we already excluded those.  It cannot be an
209  	 * unwritten extent with dirty page cache because we flushed the page
210  	 * cache.  For files where the allocation unit is 1FSB (files on the
211  	 * data dev, rt files if the extent size is 1FSB), we can safely
212  	 * skip this mapping.
213  	 */
214  	if (!xfs_inode_has_bigrtalloc(xmi->xmi_ip1))
215  		return true;
216  
217  	/*
218  	 * For a realtime file with a multi-fsb allocation unit, the decision
219  	 * is trickier because we can only swap full allocation units.
220  	 * Unwritten mappings can appear in the middle of an rtx if the rtx is
221  	 * partially written, but they can also appear for preallocations.
222  	 *
223  	 * If the mapping is a hole, skip it entirely.  Holes should align with
224  	 * rtx boundaries.
225  	 */
226  	if (!xfs_bmap_is_real_extent(irec))
227  		return true;
228  
229  	/*
230  	 * All mappings below this point are unwritten.
231  	 *
232  	 * - If the beginning is not aligned to an rtx, trim the end of the
233  	 *   mapping so that it does not cross an rtx boundary, and swap it.
234  	 *
235  	 * - If both ends are aligned to an rtx, skip the entire mapping.
236  	 */
237  	if (!isaligned_64(irec->br_startoff, mp->m_sb.sb_rextsize)) {
238  		xfs_fileoff_t	new_end;
239  
240  		new_end = roundup_64(irec->br_startoff, mp->m_sb.sb_rextsize);
241  		irec->br_blockcount = min(irec->br_blockcount,
242  					  new_end - irec->br_startoff);
243  		return false;
244  	}
245  	if (isaligned_64(irec->br_blockcount, mp->m_sb.sb_rextsize))
246  		return true;
247  
248  	/*
249  	 * All mappings below this point are unwritten, start on an rtx
250  	 * boundary, and do not end on an rtx boundary.
251  	 *
252  	 * - If the mapping is longer than one rtx, trim the end of the mapping
253  	 *   down to an rtx boundary and skip it.
254  	 *
255  	 * - The mapping is shorter than one rtx.  Swap it.
256  	 */
257  	if (irec->br_blockcount > mp->m_sb.sb_rextsize) {
258  		xfs_fileoff_t	new_end;
259  
260  		new_end = rounddown_64(irec->br_startoff + irec->br_blockcount,
261  				mp->m_sb.sb_rextsize);
262  		irec->br_blockcount = new_end - irec->br_startoff;
263  		return true;
264  	}
265  
266  	return false;
267  }
268  
269  /*
270   * Walk forward through the file ranges in @xmi until we find two different
271   * mappings to exchange.  If there is work to do, return the mappings;
272   * otherwise we've reached the end of the range and xmi_blockcount will be
273   * zero.
274   *
275   * If the walk skips over a pair of mappings to the same storage, save them as
276   * the left records in @adj (if provided) so that the simulation phase can
277   * avoid an extra lookup.
278    */
279  static int
xfs_exchmaps_find_mappings(struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2,struct xfs_exchmaps_adjacent * adj)280  xfs_exchmaps_find_mappings(
281  	struct xfs_exchmaps_intent	*xmi,
282  	struct xfs_bmbt_irec		*irec1,
283  	struct xfs_bmbt_irec		*irec2,
284  	struct xfs_exchmaps_adjacent	*adj)
285  {
286  	int				nimaps;
287  	int				bmap_flags;
288  	int				error;
289  
290  	bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_whichfork(xmi));
291  
292  	for (; xmi_has_more_exchange_work(xmi); xmi_advance(xmi, irec1)) {
293  		/* Read mapping from the first file */
294  		nimaps = 1;
295  		error = xfs_bmapi_read(xmi->xmi_ip1, xmi->xmi_startoff1,
296  				xmi->xmi_blockcount, irec1, &nimaps,
297  				bmap_flags);
298  		if (error)
299  			return error;
300  		if (nimaps != 1 ||
301  		    irec1->br_startblock == DELAYSTARTBLOCK ||
302  		    irec1->br_startoff != xmi->xmi_startoff1) {
303  			/*
304  			 * We should never get no mapping or a delalloc mapping
305  			 * or something that doesn't match what we asked for,
306  			 * since the caller flushed both inodes and we hold the
307  			 * ILOCKs for both inodes.
308  			 */
309  			ASSERT(0);
310  			return -EINVAL;
311  		}
312  
313  		if (xfs_exchmaps_can_skip_mapping(xmi, irec1)) {
314  			trace_xfs_exchmaps_mapping1_skip(xmi->xmi_ip1, irec1);
315  			continue;
316  		}
317  
318  		/* Read mapping from the second file */
319  		nimaps = 1;
320  		error = xfs_bmapi_read(xmi->xmi_ip2, xmi->xmi_startoff2,
321  				irec1->br_blockcount, irec2, &nimaps,
322  				bmap_flags);
323  		if (error)
324  			return error;
325  		if (nimaps != 1 ||
326  		    irec2->br_startblock == DELAYSTARTBLOCK ||
327  		    irec2->br_startoff != xmi->xmi_startoff2) {
328  			/*
329  			 * We should never get no mapping or a delalloc mapping
330  			 * or something that doesn't match what we asked for,
331  			 * since the caller flushed both inodes and we hold the
332  			 * ILOCKs for both inodes.
333  			 */
334  			ASSERT(0);
335  			return -EINVAL;
336  		}
337  
338  		/*
339  		 * We can only exchange as many blocks as the smaller of the
340  		 * two mapping maps.
341  		 */
342  		irec1->br_blockcount = min(irec1->br_blockcount,
343  					   irec2->br_blockcount);
344  
345  		trace_xfs_exchmaps_mapping1(xmi->xmi_ip1, irec1);
346  		trace_xfs_exchmaps_mapping2(xmi->xmi_ip2, irec2);
347  
348  		/* We found something to exchange, so return it. */
349  		if (irec1->br_startblock != irec2->br_startblock)
350  			return 0;
351  
352  		/*
353  		 * Two mappings pointing to the same physical block must not
354  		 * have different states; that's filesystem corruption.  Move
355  		 * on to the next mapping if they're both holes or both point
356  		 * to the same physical space extent.
357  		 */
358  		if (irec1->br_state != irec2->br_state) {
359  			xfs_bmap_mark_sick(xmi->xmi_ip1,
360  					xfs_exchmaps_whichfork(xmi));
361  			xfs_bmap_mark_sick(xmi->xmi_ip2,
362  					xfs_exchmaps_whichfork(xmi));
363  			return -EFSCORRUPTED;
364  		}
365  
366  		/*
367  		 * Save the mappings if we're estimating work and skipping
368  		 * these identical mappings.
369  		 */
370  		if (adj) {
371  			memcpy(&adj->left1, irec1, sizeof(*irec1));
372  			memcpy(&adj->left2, irec2, sizeof(*irec2));
373  		}
374  	}
375  
376  	return 0;
377  }
378  
379  /* Exchange these two mappings. */
380  static void
xfs_exchmaps_one_step(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi,struct xfs_bmbt_irec * irec1,struct xfs_bmbt_irec * irec2)381  xfs_exchmaps_one_step(
382  	struct xfs_trans		*tp,
383  	struct xfs_exchmaps_intent	*xmi,
384  	struct xfs_bmbt_irec		*irec1,
385  	struct xfs_bmbt_irec		*irec2)
386  {
387  	int				whichfork = xfs_exchmaps_whichfork(xmi);
388  
389  	xfs_exchmaps_update_quota(tp, xmi, irec1, irec2);
390  
391  	/* Remove both mappings. */
392  	xfs_bmap_unmap_extent(tp, xmi->xmi_ip1, whichfork, irec1);
393  	xfs_bmap_unmap_extent(tp, xmi->xmi_ip2, whichfork, irec2);
394  
395  	/*
396  	 * Re-add both mappings.  We exchange the file offsets between the two
397  	 * maps and add the opposite map, which has the effect of filling the
398  	 * logical offsets we just unmapped, but with with the physical mapping
399  	 * information exchanged.
400  	 */
401  	swap(irec1->br_startoff, irec2->br_startoff);
402  	xfs_bmap_map_extent(tp, xmi->xmi_ip1, whichfork, irec2);
403  	xfs_bmap_map_extent(tp, xmi->xmi_ip2, whichfork, irec1);
404  
405  	/* Make sure we're not adding mappings past EOF. */
406  	if (whichfork == XFS_DATA_FORK) {
407  		xfs_exchmaps_update_size(tp, xmi->xmi_ip1, irec2,
408  				xmi->xmi_isize1);
409  		xfs_exchmaps_update_size(tp, xmi->xmi_ip2, irec1,
410  				xmi->xmi_isize2);
411  	}
412  
413  	/*
414  	 * Advance our cursor and exit.   The caller (either defer ops or log
415  	 * recovery) will log the XMD item, and if *blockcount is nonzero, it
416  	 * will log a new XMI item for the remainder and call us back.
417  	 */
418  	xmi_advance(xmi, irec1);
419  }
420  
421  /* Convert inode2's leaf attr fork back to shortform, if possible.. */
422  STATIC int
xfs_exchmaps_attr_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)423  xfs_exchmaps_attr_to_sf(
424  	struct xfs_trans		*tp,
425  	struct xfs_exchmaps_intent	*xmi)
426  {
427  	struct xfs_da_args	args = {
428  		.dp		= xmi->xmi_ip2,
429  		.geo		= tp->t_mountp->m_attr_geo,
430  		.whichfork	= XFS_ATTR_FORK,
431  		.trans		= tp,
432  		.owner		= xmi->xmi_ip2->i_ino,
433  	};
434  	struct xfs_buf		*bp;
435  	int			forkoff;
436  	int			error;
437  
438  	if (!xfs_attr_is_leaf(xmi->xmi_ip2))
439  		return 0;
440  
441  	error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0,
442  			&bp);
443  	if (error)
444  		return error;
445  
446  	forkoff = xfs_attr_shortform_allfit(bp, xmi->xmi_ip2);
447  	if (forkoff == 0)
448  		return 0;
449  
450  	return xfs_attr3_leaf_to_shortform(bp, &args, forkoff);
451  }
452  
453  /* Convert inode2's block dir fork back to shortform, if possible.. */
454  STATIC int
xfs_exchmaps_dir_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)455  xfs_exchmaps_dir_to_sf(
456  	struct xfs_trans		*tp,
457  	struct xfs_exchmaps_intent	*xmi)
458  {
459  	struct xfs_da_args	args = {
460  		.dp		= xmi->xmi_ip2,
461  		.geo		= tp->t_mountp->m_dir_geo,
462  		.whichfork	= XFS_DATA_FORK,
463  		.trans		= tp,
464  		.owner		= xmi->xmi_ip2->i_ino,
465  	};
466  	struct xfs_dir2_sf_hdr	sfh;
467  	struct xfs_buf		*bp;
468  	int			size;
469  	int			error = 0;
470  
471  	if (xfs_dir2_format(&args, &error) != XFS_DIR2_FMT_BLOCK)
472  		return error;
473  
474  	error = xfs_dir3_block_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, &bp);
475  	if (error)
476  		return error;
477  
478  	size = xfs_dir2_block_sfsize(xmi->xmi_ip2, bp->b_addr, &sfh);
479  	if (size > xfs_inode_data_fork_size(xmi->xmi_ip2))
480  		return 0;
481  
482  	return xfs_dir2_block_to_sf(&args, bp, size, &sfh);
483  }
484  
485  /* Convert inode2's remote symlink target back to shortform, if possible. */
486  STATIC int
xfs_exchmaps_link_to_sf(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)487  xfs_exchmaps_link_to_sf(
488  	struct xfs_trans		*tp,
489  	struct xfs_exchmaps_intent	*xmi)
490  {
491  	struct xfs_inode		*ip = xmi->xmi_ip2;
492  	struct xfs_ifork		*ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
493  	char				*buf;
494  	int				error;
495  
496  	if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
497  	    ip->i_disk_size > xfs_inode_data_fork_size(ip))
498  		return 0;
499  
500  	/* Read the current symlink target into a buffer. */
501  	buf = kmalloc(ip->i_disk_size + 1,
502  			GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
503  	if (!buf) {
504  		ASSERT(0);
505  		return -ENOMEM;
506  	}
507  
508  	error = xfs_symlink_remote_read(ip, buf);
509  	if (error)
510  		goto free;
511  
512  	/* Remove the blocks. */
513  	error = xfs_symlink_remote_truncate(tp, ip);
514  	if (error)
515  		goto free;
516  
517  	/* Convert fork to local format and log our changes. */
518  	xfs_idestroy_fork(ifp);
519  	ifp->if_bytes = 0;
520  	ifp->if_format = XFS_DINODE_FMT_LOCAL;
521  	xfs_init_local_fork(ip, XFS_DATA_FORK, buf, ip->i_disk_size);
522  	xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE);
523  free:
524  	kfree(buf);
525  	return error;
526  }
527  
528  /* Clear the reflink flag after an exchange. */
529  static inline void
xfs_exchmaps_clear_reflink(struct xfs_trans * tp,struct xfs_inode * ip)530  xfs_exchmaps_clear_reflink(
531  	struct xfs_trans	*tp,
532  	struct xfs_inode	*ip)
533  {
534  	trace_xfs_reflink_unset_inode_flag(ip);
535  
536  	ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
537  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
538  }
539  
540  /* Finish whatever work might come after an exchange operation. */
541  static int
xfs_exchmaps_do_postop_work(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)542  xfs_exchmaps_do_postop_work(
543  	struct xfs_trans		*tp,
544  	struct xfs_exchmaps_intent	*xmi)
545  {
546  	if (xmi->xmi_flags & __XFS_EXCHMAPS_INO2_SHORTFORM) {
547  		int			error = 0;
548  
549  		if (xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)
550  			error = xfs_exchmaps_attr_to_sf(tp, xmi);
551  		else if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode))
552  			error = xfs_exchmaps_dir_to_sf(tp, xmi);
553  		else if (S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
554  			error = xfs_exchmaps_link_to_sf(tp, xmi);
555  		xmi->xmi_flags &= ~__XFS_EXCHMAPS_INO2_SHORTFORM;
556  		if (error)
557  			return error;
558  	}
559  
560  	if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO1_REFLINK) {
561  		xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip1);
562  		xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
563  	}
564  
565  	if (xmi->xmi_flags & XFS_EXCHMAPS_CLEAR_INO2_REFLINK) {
566  		xfs_exchmaps_clear_reflink(tp, xmi->xmi_ip2);
567  		xmi->xmi_flags &= ~XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
568  	}
569  
570  	return 0;
571  }
572  
573  /* Finish one step in a mapping exchange operation, possibly relogging. */
574  int
xfs_exchmaps_finish_one(struct xfs_trans * tp,struct xfs_exchmaps_intent * xmi)575  xfs_exchmaps_finish_one(
576  	struct xfs_trans		*tp,
577  	struct xfs_exchmaps_intent	*xmi)
578  {
579  	struct xfs_bmbt_irec		irec1, irec2;
580  	int				error;
581  
582  	if (xmi_has_more_exchange_work(xmi)) {
583  		/*
584  		 * If the operation state says that some range of the files
585  		 * have not yet been exchanged, look for mappings in that range
586  		 * to exchange.  If we find some mappings, exchange them.
587  		 */
588  		error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, NULL);
589  		if (error)
590  			return error;
591  
592  		if (xmi_has_more_exchange_work(xmi))
593  			xfs_exchmaps_one_step(tp, xmi, &irec1, &irec2);
594  
595  		/*
596  		 * If the caller asked us to exchange the file sizes after the
597  		 * exchange and either we just exchanged the last mappings in
598  		 * the range or we didn't find anything to exchange, update the
599  		 * ondisk file sizes.
600  		 */
601  		if ((xmi->xmi_flags & XFS_EXCHMAPS_SET_SIZES) &&
602  		    !xmi_has_more_exchange_work(xmi)) {
603  			xmi->xmi_ip1->i_disk_size = xmi->xmi_isize1;
604  			xmi->xmi_ip2->i_disk_size = xmi->xmi_isize2;
605  
606  			xfs_trans_log_inode(tp, xmi->xmi_ip1, XFS_ILOG_CORE);
607  			xfs_trans_log_inode(tp, xmi->xmi_ip2, XFS_ILOG_CORE);
608  		}
609  	} else if (xmi_has_postop_work(xmi)) {
610  		/*
611  		 * Now that we're finished with the exchange operation,
612  		 * complete the post-op cleanup work.
613  		 */
614  		error = xfs_exchmaps_do_postop_work(tp, xmi);
615  		if (error)
616  			return error;
617  	}
618  
619  	if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_EXCHMAPS_FINISH_ONE))
620  		return -EIO;
621  
622  	/* If we still have work to do, ask for a new transaction. */
623  	if (xmi_has_more_exchange_work(xmi) || xmi_has_postop_work(xmi)) {
624  		trace_xfs_exchmaps_defer(tp->t_mountp, xmi);
625  		return -EAGAIN;
626  	}
627  
628  	/*
629  	 * If we reach here, we've finished all the exchange work and the post
630  	 * operation work.  The last thing we need to do before returning to
631  	 * the caller is to make sure that COW forks are set up correctly.
632  	 */
633  	if (!(xmi->xmi_flags & XFS_EXCHMAPS_ATTR_FORK)) {
634  		xfs_exchmaps_ensure_cowfork(xmi->xmi_ip1);
635  		xfs_exchmaps_ensure_cowfork(xmi->xmi_ip2);
636  	}
637  
638  	return 0;
639  }
640  
641  /*
642   * Compute the amount of bmbt blocks we should reserve for each file.  In the
643   * worst case, each exchange will fill a hole with a new mapping, which could
644   * result in a btree split every time we add a new leaf block.
645   */
646  static inline uint64_t
xfs_exchmaps_bmbt_blocks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)647  xfs_exchmaps_bmbt_blocks(
648  	struct xfs_mount		*mp,
649  	const struct xfs_exchmaps_req	*req)
650  {
651  	return howmany_64(req->nr_exchanges,
652  					XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp)) *
653  			XFS_EXTENTADD_SPACE_RES(mp, xfs_exchmaps_reqfork(req));
654  }
655  
656  /* Compute the space we should reserve for the rmap btree expansions. */
657  static inline uint64_t
xfs_exchmaps_rmapbt_blocks(struct xfs_mount * mp,const struct xfs_exchmaps_req * req)658  xfs_exchmaps_rmapbt_blocks(
659  	struct xfs_mount		*mp,
660  	const struct xfs_exchmaps_req	*req)
661  {
662  	if (!xfs_has_rmapbt(mp))
663  		return 0;
664  	if (XFS_IS_REALTIME_INODE(req->ip1))
665  		return 0;
666  
667  	return howmany_64(req->nr_exchanges,
668  					XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
669  			XFS_RMAPADD_SPACE_RES(mp);
670  }
671  
672  /* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
673  int
xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req * req)674  xfs_exchmaps_estimate_overhead(
675  	struct xfs_exchmaps_req		*req)
676  {
677  	struct xfs_mount		*mp = req->ip1->i_mount;
678  	xfs_filblks_t			bmbt_blocks;
679  	xfs_filblks_t			rmapbt_blocks;
680  	xfs_filblks_t			resblks = req->resblks;
681  
682  	/*
683  	 * Compute the number of bmbt and rmapbt blocks we might need to handle
684  	 * the estimated number of exchanges.
685  	 */
686  	bmbt_blocks = xfs_exchmaps_bmbt_blocks(mp, req);
687  	rmapbt_blocks = xfs_exchmaps_rmapbt_blocks(mp, req);
688  
689  	trace_xfs_exchmaps_overhead(mp, bmbt_blocks, rmapbt_blocks);
690  
691  	/* Make sure the change in file block count doesn't overflow. */
692  	if (check_add_overflow(req->ip1_bcount, bmbt_blocks, &req->ip1_bcount))
693  		return -EFBIG;
694  	if (check_add_overflow(req->ip2_bcount, bmbt_blocks, &req->ip2_bcount))
695  		return -EFBIG;
696  
697  	/*
698  	 * Add together the number of blocks we need to handle btree growth,
699  	 * then add it to the number of blocks we need to reserve to this
700  	 * transaction.
701  	 */
702  	if (check_add_overflow(resblks, bmbt_blocks, &resblks))
703  		return -ENOSPC;
704  	if (check_add_overflow(resblks, bmbt_blocks, &resblks))
705  		return -ENOSPC;
706  	if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
707  		return -ENOSPC;
708  	if (check_add_overflow(resblks, rmapbt_blocks, &resblks))
709  		return -ENOSPC;
710  
711  	/* Can't actually reserve more than UINT_MAX blocks. */
712  	if (req->resblks > UINT_MAX)
713  		return -ENOSPC;
714  
715  	req->resblks = resblks;
716  	trace_xfs_exchmaps_final_estimate(req);
717  	return 0;
718  }
719  
720  /* Decide if we can merge two real mappings. */
721  static inline bool
xmi_can_merge(const struct xfs_bmbt_irec * b1,const struct xfs_bmbt_irec * b2)722  xmi_can_merge(
723  	const struct xfs_bmbt_irec	*b1,
724  	const struct xfs_bmbt_irec	*b2)
725  {
726  	/* Don't merge holes. */
727  	if (b1->br_startblock == HOLESTARTBLOCK ||
728  	    b2->br_startblock == HOLESTARTBLOCK)
729  		return false;
730  
731  	/* We don't merge holes. */
732  	if (!xfs_bmap_is_real_extent(b1) || !xfs_bmap_is_real_extent(b2))
733  		return false;
734  
735  	if (b1->br_startoff   + b1->br_blockcount == b2->br_startoff &&
736  	    b1->br_startblock + b1->br_blockcount == b2->br_startblock &&
737  	    b1->br_state			  == b2->br_state &&
738  	    b1->br_blockcount + b2->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
739  		return true;
740  
741  	return false;
742  }
743  
744  /*
745   * Decide if we can merge three mappings.  Caller must ensure all three
746   * mappings must not be holes or delalloc reservations.
747   */
748  static inline bool
xmi_can_merge_all(const struct xfs_bmbt_irec * l,const struct xfs_bmbt_irec * m,const struct xfs_bmbt_irec * r)749  xmi_can_merge_all(
750  	const struct xfs_bmbt_irec	*l,
751  	const struct xfs_bmbt_irec	*m,
752  	const struct xfs_bmbt_irec	*r)
753  {
754  	xfs_filblks_t			new_len;
755  
756  	new_len = l->br_blockcount + m->br_blockcount + r->br_blockcount;
757  	return new_len <= XFS_MAX_BMBT_EXTLEN;
758  }
759  
760  #define CLEFT_CONTIG	0x01
761  #define CRIGHT_CONTIG	0x02
762  #define CHOLE		0x04
763  #define CBOTH_CONTIG	(CLEFT_CONTIG | CRIGHT_CONTIG)
764  
765  #define NLEFT_CONTIG	0x10
766  #define NRIGHT_CONTIG	0x20
767  #define NHOLE		0x40
768  #define NBOTH_CONTIG	(NLEFT_CONTIG | NRIGHT_CONTIG)
769  
770  /* Estimate the effect of a single exchange on mapping count. */
771  static inline int
xmi_delta_nextents_step(struct xfs_mount * mp,const struct xfs_bmbt_irec * left,const struct xfs_bmbt_irec * curr,const struct xfs_bmbt_irec * new,const struct xfs_bmbt_irec * right)772  xmi_delta_nextents_step(
773  	struct xfs_mount		*mp,
774  	const struct xfs_bmbt_irec	*left,
775  	const struct xfs_bmbt_irec	*curr,
776  	const struct xfs_bmbt_irec	*new,
777  	const struct xfs_bmbt_irec	*right)
778  {
779  	bool				lhole, rhole, chole, nhole;
780  	unsigned int			state = 0;
781  	int				ret = 0;
782  
783  	lhole = left->br_startblock == HOLESTARTBLOCK;
784  	rhole = right->br_startblock == HOLESTARTBLOCK;
785  	chole = curr->br_startblock == HOLESTARTBLOCK;
786  	nhole = new->br_startblock == HOLESTARTBLOCK;
787  
788  	if (chole)
789  		state |= CHOLE;
790  	if (!lhole && !chole && xmi_can_merge(left, curr))
791  		state |= CLEFT_CONTIG;
792  	if (!rhole && !chole && xmi_can_merge(curr, right))
793  		state |= CRIGHT_CONTIG;
794  	if ((state & CBOTH_CONTIG) == CBOTH_CONTIG &&
795  	    !xmi_can_merge_all(left, curr, right))
796  		state &= ~CRIGHT_CONTIG;
797  
798  	if (nhole)
799  		state |= NHOLE;
800  	if (!lhole && !nhole && xmi_can_merge(left, new))
801  		state |= NLEFT_CONTIG;
802  	if (!rhole && !nhole && xmi_can_merge(new, right))
803  		state |= NRIGHT_CONTIG;
804  	if ((state & NBOTH_CONTIG) == NBOTH_CONTIG &&
805  	    !xmi_can_merge_all(left, new, right))
806  		state &= ~NRIGHT_CONTIG;
807  
808  	switch (state & (CLEFT_CONTIG | CRIGHT_CONTIG | CHOLE)) {
809  	case CLEFT_CONTIG | CRIGHT_CONTIG:
810  		/*
811  		 * left/curr/right are the same mapping, so deleting curr
812  		 * causes 2 new mappings to be created.
813  		 */
814  		ret += 2;
815  		break;
816  	case 0:
817  		/*
818  		 * curr is not contiguous with any mapping, so we remove curr
819  		 * completely
820  		 */
821  		ret--;
822  		break;
823  	case CHOLE:
824  		/* hole, do nothing */
825  		break;
826  	case CLEFT_CONTIG:
827  	case CRIGHT_CONTIG:
828  		/* trim either left or right, no change */
829  		break;
830  	}
831  
832  	switch (state & (NLEFT_CONTIG | NRIGHT_CONTIG | NHOLE)) {
833  	case NLEFT_CONTIG | NRIGHT_CONTIG:
834  		/*
835  		 * left/curr/right will become the same mapping, so adding
836  		 * curr causes the deletion of right.
837  		 */
838  		ret--;
839  		break;
840  	case 0:
841  		/* new is not contiguous with any mapping */
842  		ret++;
843  		break;
844  	case NHOLE:
845  		/* hole, do nothing. */
846  		break;
847  	case NLEFT_CONTIG:
848  	case NRIGHT_CONTIG:
849  		/* new is absorbed into left or right, no change */
850  		break;
851  	}
852  
853  	trace_xfs_exchmaps_delta_nextents_step(mp, left, curr, new, right, ret,
854  			state);
855  	return ret;
856  }
857  
858  /* Make sure we don't overflow the extent (mapping) counters. */
859  static inline int
xmi_ensure_delta_nextents(struct xfs_exchmaps_req * req,struct xfs_inode * ip,int64_t delta)860  xmi_ensure_delta_nextents(
861  	struct xfs_exchmaps_req	*req,
862  	struct xfs_inode	*ip,
863  	int64_t			delta)
864  {
865  	struct xfs_mount	*mp = ip->i_mount;
866  	int			whichfork = xfs_exchmaps_reqfork(req);
867  	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
868  	uint64_t		new_nextents;
869  	xfs_extnum_t		max_nextents;
870  
871  	if (delta < 0)
872  		return 0;
873  
874  	/*
875  	 * It's always an error if the delta causes integer overflow.  delta
876  	 * needs an explicit cast here to avoid warnings about implicit casts
877  	 * coded into the overflow check.
878  	 */
879  	if (check_add_overflow(ifp->if_nextents, (uint64_t)delta,
880  				&new_nextents))
881  		return -EFBIG;
882  
883  	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) &&
884  	    new_nextents > 10)
885  		return -EFBIG;
886  
887  	/*
888  	 * We always promote both inodes to have large extent counts if the
889  	 * superblock feature is enabled, so we only need to check against the
890  	 * theoretical maximum.
891  	 */
892  	max_nextents = xfs_iext_max_nextents(xfs_has_large_extent_counts(mp),
893  					     whichfork);
894  	if (new_nextents > max_nextents)
895  		return -EFBIG;
896  
897  	return 0;
898  }
899  
900  /* Find the next mapping after irec. */
901  static inline int
xmi_next(struct xfs_inode * ip,int bmap_flags,const struct xfs_bmbt_irec * irec,struct xfs_bmbt_irec * nrec)902  xmi_next(
903  	struct xfs_inode		*ip,
904  	int				bmap_flags,
905  	const struct xfs_bmbt_irec	*irec,
906  	struct xfs_bmbt_irec		*nrec)
907  {
908  	xfs_fileoff_t			off;
909  	xfs_filblks_t			blockcount;
910  	int				nimaps = 1;
911  	int				error;
912  
913  	off = irec->br_startoff + irec->br_blockcount;
914  	blockcount = XFS_MAX_FILEOFF - off;
915  	error = xfs_bmapi_read(ip, off, blockcount, nrec, &nimaps, bmap_flags);
916  	if (error)
917  		return error;
918  	if (nrec->br_startblock == DELAYSTARTBLOCK ||
919  	    nrec->br_startoff != off) {
920  		/*
921  		 * If we don't get the mapping we want, return a zero-length
922  		 * mapping, which our estimator function will pretend is a hole.
923  		 * We shouldn't get delalloc reservations.
924  		 */
925  		nrec->br_startblock = HOLESTARTBLOCK;
926  	}
927  
928  	return 0;
929  }
930  
931  int __init
xfs_exchmaps_intent_init_cache(void)932  xfs_exchmaps_intent_init_cache(void)
933  {
934  	xfs_exchmaps_intent_cache = kmem_cache_create("xfs_exchmaps_intent",
935  			sizeof(struct xfs_exchmaps_intent),
936  			0, 0, NULL);
937  
938  	return xfs_exchmaps_intent_cache != NULL ? 0 : -ENOMEM;
939  }
940  
941  void
xfs_exchmaps_intent_destroy_cache(void)942  xfs_exchmaps_intent_destroy_cache(void)
943  {
944  	kmem_cache_destroy(xfs_exchmaps_intent_cache);
945  	xfs_exchmaps_intent_cache = NULL;
946  }
947  
948  /*
949   * Decide if we will exchange the reflink flags between the two files after the
950   * exchange.  The only time we want to do this is if we're exchanging all
951   * mappings under EOF and the inode reflink flags have different states.
952   */
953  static inline bool
xmi_can_exchange_reflink_flags(const struct xfs_exchmaps_req * req,unsigned int reflink_state)954  xmi_can_exchange_reflink_flags(
955  	const struct xfs_exchmaps_req	*req,
956  	unsigned int			reflink_state)
957  {
958  	struct xfs_mount		*mp = req->ip1->i_mount;
959  
960  	if (hweight32(reflink_state) != 1)
961  		return false;
962  	if (req->startoff1 != 0 || req->startoff2 != 0)
963  		return false;
964  	if (req->blockcount != XFS_B_TO_FSB(mp, req->ip1->i_disk_size))
965  		return false;
966  	if (req->blockcount != XFS_B_TO_FSB(mp, req->ip2->i_disk_size))
967  		return false;
968  	return true;
969  }
970  
971  
972  /* Allocate and initialize a new incore intent item from a request. */
973  struct xfs_exchmaps_intent *
xfs_exchmaps_init_intent(const struct xfs_exchmaps_req * req)974  xfs_exchmaps_init_intent(
975  	const struct xfs_exchmaps_req	*req)
976  {
977  	struct xfs_exchmaps_intent	*xmi;
978  	unsigned int			rs = 0;
979  
980  	xmi = kmem_cache_zalloc(xfs_exchmaps_intent_cache,
981  			GFP_NOFS | __GFP_NOFAIL);
982  	INIT_LIST_HEAD(&xmi->xmi_list);
983  	xmi->xmi_ip1 = req->ip1;
984  	xmi->xmi_ip2 = req->ip2;
985  	xmi->xmi_startoff1 = req->startoff1;
986  	xmi->xmi_startoff2 = req->startoff2;
987  	xmi->xmi_blockcount = req->blockcount;
988  	xmi->xmi_isize1 = xmi->xmi_isize2 = -1;
989  	xmi->xmi_flags = req->flags & XFS_EXCHMAPS_PARAMS;
990  
991  	if (xfs_exchmaps_whichfork(xmi) == XFS_ATTR_FORK) {
992  		xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
993  		return xmi;
994  	}
995  
996  	if (req->flags & XFS_EXCHMAPS_SET_SIZES) {
997  		xmi->xmi_flags |= XFS_EXCHMAPS_SET_SIZES;
998  		xmi->xmi_isize1 = req->ip2->i_disk_size;
999  		xmi->xmi_isize2 = req->ip1->i_disk_size;
1000  	}
1001  
1002  	/* Record the state of each inode's reflink flag before the op. */
1003  	if (xfs_is_reflink_inode(req->ip1))
1004  		rs |= 1;
1005  	if (xfs_is_reflink_inode(req->ip2))
1006  		rs |= 2;
1007  
1008  	/*
1009  	 * Figure out if we're clearing the reflink flags (which effectively
1010  	 * exchanges them) after the operation.
1011  	 */
1012  	if (xmi_can_exchange_reflink_flags(req, rs)) {
1013  		if (rs & 1)
1014  			xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO1_REFLINK;
1015  		if (rs & 2)
1016  			xmi->xmi_flags |= XFS_EXCHMAPS_CLEAR_INO2_REFLINK;
1017  	}
1018  
1019  	if (S_ISDIR(VFS_I(xmi->xmi_ip2)->i_mode) ||
1020  	    S_ISLNK(VFS_I(xmi->xmi_ip2)->i_mode))
1021  		xmi->xmi_flags |= __XFS_EXCHMAPS_INO2_SHORTFORM;
1022  
1023  	return xmi;
1024  }
1025  
1026  /*
1027   * Estimate the number of exchange operations and the number of file blocks
1028   * in each file that will be affected by the exchange operation.
1029   */
1030  int
xfs_exchmaps_estimate(struct xfs_exchmaps_req * req)1031  xfs_exchmaps_estimate(
1032  	struct xfs_exchmaps_req		*req)
1033  {
1034  	struct xfs_exchmaps_intent	*xmi;
1035  	struct xfs_bmbt_irec		irec1, irec2;
1036  	struct xfs_exchmaps_adjacent	adj = ADJACENT_INIT;
1037  	xfs_filblks_t			ip1_blocks = 0, ip2_blocks = 0;
1038  	int64_t				d_nexts1, d_nexts2;
1039  	int				bmap_flags;
1040  	int				error;
1041  
1042  	ASSERT(!(req->flags & ~XFS_EXCHMAPS_PARAMS));
1043  
1044  	bmap_flags = xfs_bmapi_aflag(xfs_exchmaps_reqfork(req));
1045  	xmi = xfs_exchmaps_init_intent(req);
1046  
1047  	/*
1048  	 * To guard against the possibility of overflowing the extent counters,
1049  	 * we have to estimate an upper bound on the potential increase in that
1050  	 * counter.  We can split the mapping at each end of the range, and for
1051  	 * each step of the exchange we can split the mapping that we're
1052  	 * working on if the mappings do not align.
1053  	 */
1054  	d_nexts1 = d_nexts2 = 3;
1055  
1056  	while (xmi_has_more_exchange_work(xmi)) {
1057  		/*
1058  		 * Walk through the file ranges until we find something to
1059  		 * exchange.  Because we're simulating the exchange, pass in
1060  		 * adj to capture skipped mappings for correct estimation of
1061  		 * bmbt record merges.
1062  		 */
1063  		error = xfs_exchmaps_find_mappings(xmi, &irec1, &irec2, &adj);
1064  		if (error)
1065  			goto out_free;
1066  		if (!xmi_has_more_exchange_work(xmi))
1067  			break;
1068  
1069  		/* Update accounting. */
1070  		if (xfs_bmap_is_real_extent(&irec1))
1071  			ip1_blocks += irec1.br_blockcount;
1072  		if (xfs_bmap_is_real_extent(&irec2))
1073  			ip2_blocks += irec2.br_blockcount;
1074  		req->nr_exchanges++;
1075  
1076  		/* Read the next mappings from both files. */
1077  		error = xmi_next(req->ip1, bmap_flags, &irec1, &adj.right1);
1078  		if (error)
1079  			goto out_free;
1080  
1081  		error = xmi_next(req->ip2, bmap_flags, &irec2, &adj.right2);
1082  		if (error)
1083  			goto out_free;
1084  
1085  		/* Update extent count deltas. */
1086  		d_nexts1 += xmi_delta_nextents_step(req->ip1->i_mount,
1087  				&adj.left1, &irec1, &irec2, &adj.right1);
1088  
1089  		d_nexts2 += xmi_delta_nextents_step(req->ip1->i_mount,
1090  				&adj.left2, &irec2, &irec1, &adj.right2);
1091  
1092  		/* Now pretend we exchanged the mappings. */
1093  		if (xmi_can_merge(&adj.left2, &irec1))
1094  			adj.left2.br_blockcount += irec1.br_blockcount;
1095  		else
1096  			memcpy(&adj.left2, &irec1, sizeof(irec1));
1097  
1098  		if (xmi_can_merge(&adj.left1, &irec2))
1099  			adj.left1.br_blockcount += irec2.br_blockcount;
1100  		else
1101  			memcpy(&adj.left1, &irec2, sizeof(irec2));
1102  
1103  		xmi_advance(xmi, &irec1);
1104  	}
1105  
1106  	/* Account for the blocks that are being exchanged. */
1107  	if (XFS_IS_REALTIME_INODE(req->ip1) &&
1108  	    xfs_exchmaps_reqfork(req) == XFS_DATA_FORK) {
1109  		req->ip1_rtbcount = ip1_blocks;
1110  		req->ip2_rtbcount = ip2_blocks;
1111  	} else {
1112  		req->ip1_bcount = ip1_blocks;
1113  		req->ip2_bcount = ip2_blocks;
1114  	}
1115  
1116  	/*
1117  	 * Make sure that both forks have enough slack left in their extent
1118  	 * counters that the exchange operation will not overflow.
1119  	 */
1120  	trace_xfs_exchmaps_delta_nextents(req, d_nexts1, d_nexts2);
1121  	if (req->ip1 == req->ip2) {
1122  		error = xmi_ensure_delta_nextents(req, req->ip1,
1123  				d_nexts1 + d_nexts2);
1124  	} else {
1125  		error = xmi_ensure_delta_nextents(req, req->ip1, d_nexts1);
1126  		if (error)
1127  			goto out_free;
1128  		error = xmi_ensure_delta_nextents(req, req->ip2, d_nexts2);
1129  	}
1130  	if (error)
1131  		goto out_free;
1132  
1133  	trace_xfs_exchmaps_initial_estimate(req);
1134  	error = xfs_exchmaps_estimate_overhead(req);
1135  out_free:
1136  	kmem_cache_free(xfs_exchmaps_intent_cache, xmi);
1137  	return error;
1138  }
1139  
1140  /* Set the reflink flag before an operation. */
1141  static inline void
xfs_exchmaps_set_reflink(struct xfs_trans * tp,struct xfs_inode * ip)1142  xfs_exchmaps_set_reflink(
1143  	struct xfs_trans	*tp,
1144  	struct xfs_inode	*ip)
1145  {
1146  	trace_xfs_reflink_set_inode_flag(ip);
1147  
1148  	ip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
1149  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1150  }
1151  
1152  /*
1153   * If either file has shared blocks and we're exchanging data forks, we must
1154   * flag the other file as having shared blocks so that we get the shared-block
1155   * rmap functions if we need to fix up the rmaps.
1156   */
1157  void
xfs_exchmaps_ensure_reflink(struct xfs_trans * tp,const struct xfs_exchmaps_intent * xmi)1158  xfs_exchmaps_ensure_reflink(
1159  	struct xfs_trans			*tp,
1160  	const struct xfs_exchmaps_intent	*xmi)
1161  {
1162  	unsigned int				rs = 0;
1163  
1164  	if (xfs_is_reflink_inode(xmi->xmi_ip1))
1165  		rs |= 1;
1166  	if (xfs_is_reflink_inode(xmi->xmi_ip2))
1167  		rs |= 2;
1168  
1169  	if ((rs & 1) && !xfs_is_reflink_inode(xmi->xmi_ip2))
1170  		xfs_exchmaps_set_reflink(tp, xmi->xmi_ip2);
1171  
1172  	if ((rs & 2) && !xfs_is_reflink_inode(xmi->xmi_ip1))
1173  		xfs_exchmaps_set_reflink(tp, xmi->xmi_ip1);
1174  }
1175  
1176  /* Set the large extent count flag before an operation if needed. */
1177  static inline void
xfs_exchmaps_ensure_large_extent_counts(struct xfs_trans * tp,struct xfs_inode * ip)1178  xfs_exchmaps_ensure_large_extent_counts(
1179  	struct xfs_trans	*tp,
1180  	struct xfs_inode	*ip)
1181  {
1182  	if (xfs_inode_has_large_extent_counts(ip))
1183  		return;
1184  
1185  	ip->i_diflags2 |= XFS_DIFLAG2_NREXT64;
1186  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1187  }
1188  
1189  /* Widen the extent counter fields of both inodes if necessary. */
1190  void
xfs_exchmaps_upgrade_extent_counts(struct xfs_trans * tp,const struct xfs_exchmaps_intent * xmi)1191  xfs_exchmaps_upgrade_extent_counts(
1192  	struct xfs_trans			*tp,
1193  	const struct xfs_exchmaps_intent	*xmi)
1194  {
1195  	if (!xfs_has_large_extent_counts(tp->t_mountp))
1196  		return;
1197  
1198  	xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip1);
1199  	xfs_exchmaps_ensure_large_extent_counts(tp, xmi->xmi_ip2);
1200  }
1201  
1202  /*
1203   * Schedule an exchange a range of mappings from one inode to another.
1204   *
1205   * The use of file mapping exchange log intent items ensures the operation can
1206   * be resumed even if the system goes down.  The caller must commit the
1207   * transaction to start the work.
1208   *
1209   * The caller must ensure the inodes must be joined to the transaction and
1210   * ILOCKd; they will still be joined to the transaction at exit.
1211   */
1212  void
xfs_exchange_mappings(struct xfs_trans * tp,const struct xfs_exchmaps_req * req)1213  xfs_exchange_mappings(
1214  	struct xfs_trans		*tp,
1215  	const struct xfs_exchmaps_req	*req)
1216  {
1217  	struct xfs_exchmaps_intent	*xmi;
1218  
1219  	BUILD_BUG_ON(XFS_EXCHMAPS_INTERNAL_FLAGS & XFS_EXCHMAPS_LOGGED_FLAGS);
1220  
1221  	xfs_assert_ilocked(req->ip1, XFS_ILOCK_EXCL);
1222  	xfs_assert_ilocked(req->ip2, XFS_ILOCK_EXCL);
1223  	ASSERT(!(req->flags & ~XFS_EXCHMAPS_LOGGED_FLAGS));
1224  	if (req->flags & XFS_EXCHMAPS_SET_SIZES)
1225  		ASSERT(!(req->flags & XFS_EXCHMAPS_ATTR_FORK));
1226  	ASSERT(xfs_has_exchange_range(tp->t_mountp));
1227  
1228  	if (req->blockcount == 0)
1229  		return;
1230  
1231  	xmi = xfs_exchmaps_init_intent(req);
1232  	xfs_exchmaps_defer_add(tp, xmi);
1233  	xfs_exchmaps_ensure_reflink(tp, xmi);
1234  	xfs_exchmaps_upgrade_extent_counts(tp, xmi);
1235  }
1236