1  /* SPDX-License-Identifier: GPL-2.0 */
2  /*
3   * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
4   * Copyright (c) 2013 Red Hat, Inc.
5   * All Rights Reserved.
6   */
7  #ifndef __XFS_DA_FORMAT_H__
8  #define __XFS_DA_FORMAT_H__
9  
10  /*
11   * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
12   *
13   * It is used to manage a doubly linked list of all blocks at the same
14   * level in the Btree, and to identify which type of block this is.
15   */
16  #define XFS_DA_NODE_MAGIC	0xfebe	/* magic number: non-leaf blocks */
17  #define XFS_ATTR_LEAF_MAGIC	0xfbee	/* magic number: attribute leaf blks */
18  #define XFS_DIR2_LEAF1_MAGIC	0xd2f1	/* magic number: v2 dirlf single blks */
19  #define XFS_DIR2_LEAFN_MAGIC	0xd2ff	/* magic number: v2 dirlf multi blks */
20  
21  typedef struct xfs_da_blkinfo {
22  	__be32		forw;			/* previous block in list */
23  	__be32		back;			/* following block in list */
24  	__be16		magic;			/* validity check on block */
25  	__be16		pad;			/* unused */
26  } xfs_da_blkinfo_t;
27  
28  /*
29   * CRC enabled directory structure types
30   *
31   * The headers change size for the additional verification information, but
32   * otherwise the tree layouts and contents are unchanged. Hence the da btree
33   * code can use the struct xfs_da_blkinfo for manipulating the tree links and
34   * magic numbers without modification for both v2 and v3 nodes.
35   */
36  #define XFS_DA3_NODE_MAGIC	0x3ebe	/* magic number: non-leaf blocks */
37  #define XFS_ATTR3_LEAF_MAGIC	0x3bee	/* magic number: attribute leaf blks */
38  #define XFS_DIR3_LEAF1_MAGIC	0x3df1	/* magic number: v3 dirlf single blks */
39  #define XFS_DIR3_LEAFN_MAGIC	0x3dff	/* magic number: v3 dirlf multi blks */
40  
41  struct xfs_da3_blkinfo {
42  	/*
43  	 * the node link manipulation code relies on the fact that the first
44  	 * element of this structure is the struct xfs_da_blkinfo so it can
45  	 * ignore the differences in the rest of the structures.
46  	 */
47  	struct xfs_da_blkinfo	hdr;
48  	__be32			crc;	/* CRC of block */
49  	__be64			blkno;	/* first block of the buffer */
50  	__be64			lsn;	/* sequence number of last write */
51  	uuid_t			uuid;	/* filesystem we belong to */
52  	__be64			owner;	/* inode that owns the block */
53  };
54  
55  /*
56   * This is the structure of the root and intermediate nodes in the Btree.
57   * The leaf nodes are defined above.
58   *
59   * Entries are not packed.
60   *
61   * Since we have duplicate keys, use a binary search but always follow
62   * all match in the block, not just the first match found.
63   */
64  #define XFS_DA_NODE_MAXDEPTH	5	/* max depth of Btree */
65  
66  typedef struct xfs_da_node_hdr {
67  	struct xfs_da_blkinfo	info;	/* block type, links, etc. */
68  	__be16			__count; /* count of active entries */
69  	__be16			__level; /* level above leaves (leaf == 0) */
70  } xfs_da_node_hdr_t;
71  
72  struct xfs_da3_node_hdr {
73  	struct xfs_da3_blkinfo	info;	/* block type, links, etc. */
74  	__be16			__count; /* count of active entries */
75  	__be16			__level; /* level above leaves (leaf == 0) */
76  	__be32			__pad32;
77  };
78  
79  #define XFS_DA3_NODE_CRC_OFF	(offsetof(struct xfs_da3_node_hdr, info.crc))
80  
81  typedef struct xfs_da_node_entry {
82  	__be32	hashval;	/* hash value for this descendant */
83  	__be32	before;		/* Btree block before this key */
84  } xfs_da_node_entry_t;
85  
86  typedef struct xfs_da_intnode {
87  	struct xfs_da_node_hdr	hdr;
88  	struct xfs_da_node_entry __btree[];
89  } xfs_da_intnode_t;
90  
91  struct xfs_da3_intnode {
92  	struct xfs_da3_node_hdr	hdr;
93  	struct xfs_da_node_entry __btree[];
94  };
95  
96  /*
97   * Directory version 2.
98   *
99   * There are 4 possible formats:
100   *  - shortform - embedded into the inode
101   *  - single block - data with embedded leaf at the end
102   *  - multiple data blocks, single leaf+freeindex block
103   *  - data blocks, node and leaf blocks (btree), freeindex blocks
104   *
105   * Note: many node blocks structures and constants are shared with the attr
106   * code and defined in xfs_da_btree.h.
107   */
108  
109  #define	XFS_DIR2_BLOCK_MAGIC	0x58443242	/* XD2B: single block dirs */
110  #define	XFS_DIR2_DATA_MAGIC	0x58443244	/* XD2D: multiblock dirs */
111  #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F: free index blocks */
112  
113  /*
114   * Directory Version 3 With CRCs.
115   *
116   * The tree formats are the same as for version 2 directories.  The difference
117   * is in the block header and dirent formats. In many cases the v3 structures
118   * use v2 definitions as they are no different and this makes code sharing much
119   * easier.
120   *
121   * Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the
122   * format is v2 then they switch to the existing v2 code, or the format is v3
123   * they implement the v3 functionality. This means the existing dir2 is a mix of
124   * xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called
125   * where there is a difference in the formats, otherwise the code is unchanged.
126   *
127   * Where it is possible, the code decides what to do based on the magic numbers
128   * in the blocks rather than feature bits in the superblock. This means the code
129   * is as independent of the external XFS code as possible as doesn't require
130   * passing struct xfs_mount pointers into places where it isn't really
131   * necessary.
132   *
133   * Version 3 includes:
134   *
135   *	- a larger block header for CRC and identification purposes and so the
136   *	offsets of all the structures inside the blocks are different.
137   *
138   *	- new magic numbers to be able to detect the v2/v3 types on the fly.
139   */
140  
141  #define	XFS_DIR3_BLOCK_MAGIC	0x58444233	/* XDB3: single block dirs */
142  #define	XFS_DIR3_DATA_MAGIC	0x58444433	/* XDD3: multiblock dirs */
143  #define	XFS_DIR3_FREE_MAGIC	0x58444633	/* XDF3: free index blocks */
144  
145  /*
146   * Dirents in version 3 directories have a file type field. Additions to this
147   * list are an on-disk format change, requiring feature bits. Valid values
148   * are as follows:
149   */
150  #define XFS_DIR3_FT_UNKNOWN		0
151  #define XFS_DIR3_FT_REG_FILE		1
152  #define XFS_DIR3_FT_DIR			2
153  #define XFS_DIR3_FT_CHRDEV		3
154  #define XFS_DIR3_FT_BLKDEV		4
155  #define XFS_DIR3_FT_FIFO		5
156  #define XFS_DIR3_FT_SOCK		6
157  #define XFS_DIR3_FT_SYMLINK		7
158  #define XFS_DIR3_FT_WHT			8
159  
160  #define XFS_DIR3_FT_MAX			9
161  
162  #define XFS_DIR3_FTYPE_STR \
163  	{ XFS_DIR3_FT_UNKNOWN,	"unknown" }, \
164  	{ XFS_DIR3_FT_REG_FILE,	"file" }, \
165  	{ XFS_DIR3_FT_DIR,	"directory" }, \
166  	{ XFS_DIR3_FT_CHRDEV,	"char" }, \
167  	{ XFS_DIR3_FT_BLKDEV,	"block" }, \
168  	{ XFS_DIR3_FT_FIFO,	"fifo" }, \
169  	{ XFS_DIR3_FT_SOCK,	"sock" }, \
170  	{ XFS_DIR3_FT_SYMLINK,	"symlink" }, \
171  	{ XFS_DIR3_FT_WHT,	"whiteout" }
172  
173  /*
174   * Byte offset in data block and shortform entry.
175   */
176  typedef uint16_t	xfs_dir2_data_off_t;
177  #define	NULLDATAOFF	0xffffU
178  typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
179  
180  /*
181   * Offset in data space of a data entry.
182   */
183  typedef uint32_t	xfs_dir2_dataptr_t;
184  #define	XFS_DIR2_MAX_DATAPTR	((xfs_dir2_dataptr_t)0xffffffff)
185  #define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
186  
187  /*
188   * Byte offset in a directory.
189   */
190  typedef	xfs_off_t	xfs_dir2_off_t;
191  
192  /*
193   * Directory block number (logical dirblk in file)
194   */
195  typedef uint32_t	xfs_dir2_db_t;
196  
197  #define XFS_INO32_SIZE	4
198  #define XFS_INO64_SIZE	8
199  #define XFS_INO64_DIFF	(XFS_INO64_SIZE - XFS_INO32_SIZE)
200  
201  #define	XFS_DIR2_MAX_SHORT_INUM	((xfs_ino_t)0xffffffffULL)
202  
203  /*
204   * Directory layout when stored internal to an inode.
205   *
206   * Small directories are packed as tightly as possible so as to fit into the
207   * literal area of the inode.  These "shortform" directories consist of a
208   * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
209   * structures.  Due the different inode number storage size and the variable
210   * length name field in the xfs_dir2_sf_entry all these structure are
211   * variable length, and the accessors in this file should be used to iterate
212   * over them.
213   */
214  typedef struct xfs_dir2_sf_hdr {
215  	uint8_t			count;		/* count of entries */
216  	uint8_t			i8count;	/* count of 8-byte inode #s */
217  	uint8_t			parent[8];	/* parent dir inode number */
218  } __packed xfs_dir2_sf_hdr_t;
219  
220  typedef struct xfs_dir2_sf_entry {
221  	__u8			namelen;	/* actual name length */
222  	__u8			offset[2];	/* saved offset */
223  	__u8			name[];		/* name, variable size */
224  	/*
225  	 * A single byte containing the file type field follows the inode
226  	 * number for version 3 directory entries.
227  	 *
228  	 * A 64-bit or 32-bit inode number follows here, at a variable offset
229  	 * after the name.
230  	 */
231  } __packed xfs_dir2_sf_entry_t;
232  
xfs_dir2_sf_hdr_size(int i8count)233  static inline int xfs_dir2_sf_hdr_size(int i8count)
234  {
235  	return sizeof(struct xfs_dir2_sf_hdr) -
236  		(i8count == 0) * XFS_INO64_DIFF;
237  }
238  
239  static inline xfs_dir2_data_aoff_t
xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t * sfep)240  xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
241  {
242  	return get_unaligned_be16(sfep->offset);
243  }
244  
245  static inline void
xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t * sfep,xfs_dir2_data_aoff_t off)246  xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
247  {
248  	put_unaligned_be16(off, sfep->offset);
249  }
250  
251  static inline struct xfs_dir2_sf_entry *
xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr * hdr)252  xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
253  {
254  	return (struct xfs_dir2_sf_entry *)
255  		((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
256  }
257  
258  /*
259   * Data block structures.
260   *
261   * A pure data block looks like the following drawing on disk:
262   *
263   *    +-------------------------------------------------+
264   *    | xfs_dir2_data_hdr_t                             |
265   *    +-------------------------------------------------+
266   *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
267   *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
268   *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
269   *    | ...                                             |
270   *    +-------------------------------------------------+
271   *    | unused space                                    |
272   *    +-------------------------------------------------+
273   *
274   * As all the entries are variable size structures the accessors below should
275   * be used to iterate over them.
276   *
277   * In addition to the pure data blocks for the data and node formats,
278   * most structures are also used for the combined data/freespace "block"
279   * format below.
280   */
281  
282  #define	XFS_DIR2_DATA_ALIGN_LOG	3		/* i.e., 8 bytes */
283  #define	XFS_DIR2_DATA_ALIGN	(1 << XFS_DIR2_DATA_ALIGN_LOG)
284  #define	XFS_DIR2_DATA_FREE_TAG	0xffff
285  #define	XFS_DIR2_DATA_FD_COUNT	3
286  
287  /*
288   * Directory address space divided into sections,
289   * spaces separated by 32GB.
290   */
291  #define	XFS_DIR2_MAX_SPACES	3
292  #define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
293  #define	XFS_DIR2_DATA_SPACE	0
294  #define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
295  
296  /*
297   * Describe a free area in the data block.
298   *
299   * The freespace will be formatted as a xfs_dir2_data_unused_t.
300   */
301  typedef struct xfs_dir2_data_free {
302  	__be16			offset;		/* start of freespace */
303  	__be16			length;		/* length of freespace */
304  } xfs_dir2_data_free_t;
305  
306  /*
307   * Header for the data blocks.
308   *
309   * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
310   */
311  typedef struct xfs_dir2_data_hdr {
312  	__be32			magic;		/* XFS_DIR2_DATA_MAGIC or */
313  						/* XFS_DIR2_BLOCK_MAGIC */
314  	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
315  } xfs_dir2_data_hdr_t;
316  
317  /*
318   * define a structure for all the verification fields we are adding to the
319   * directory block structures. This will be used in several structures.
320   * The magic number must be the first entry to align with all the dir2
321   * structures so we determine how to decode them just by the magic number.
322   */
323  struct xfs_dir3_blk_hdr {
324  	__be32			magic;	/* magic number */
325  	__be32			crc;	/* CRC of block */
326  	__be64			blkno;	/* first block of the buffer */
327  	__be64			lsn;	/* sequence number of last write */
328  	uuid_t			uuid;	/* filesystem we belong to */
329  	__be64			owner;	/* inode that owns the block */
330  };
331  
332  struct xfs_dir3_data_hdr {
333  	struct xfs_dir3_blk_hdr	hdr;
334  	xfs_dir2_data_free_t	best_free[XFS_DIR2_DATA_FD_COUNT];
335  	__be32			pad;	/* 64 bit alignment */
336  };
337  
338  #define XFS_DIR3_DATA_CRC_OFF  offsetof(struct xfs_dir3_data_hdr, hdr.crc)
339  
340  /*
341   * Active entry in a data block.
342   *
343   * Aligned to 8 bytes.  After the variable length name field there is a
344   * 2 byte tag field, which can be accessed using xfs_dir3_data_entry_tag_p.
345   *
346   * For dir3 structures, there is file type field between the name and the tag.
347   * This can only be manipulated by helper functions. It is packed hard against
348   * the end of the name so any padding for rounding is between the file type and
349   * the tag.
350   */
351  typedef struct xfs_dir2_data_entry {
352  	__be64			inumber;	/* inode number */
353  	__u8			namelen;	/* name length */
354  	__u8			name[];		/* name bytes, no null */
355       /* __u8			filetype; */	/* type of inode we point to */
356       /*	__be16                  tag; */		/* starting offset of us */
357  } xfs_dir2_data_entry_t;
358  
359  /*
360   * Unused entry in a data block.
361   *
362   * Aligned to 8 bytes.  Tag appears as the last 2 bytes and must be accessed
363   * using xfs_dir2_data_unused_tag_p.
364   */
365  typedef struct xfs_dir2_data_unused {
366  	__be16			freetag;	/* XFS_DIR2_DATA_FREE_TAG */
367  	__be16			length;		/* total free length */
368  						/* variable offset */
369  	__be16			tag;		/* starting offset of us */
370  } xfs_dir2_data_unused_t;
371  
372  /*
373   * Pointer to a freespace's tag word.
374   */
375  static inline __be16 *
xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused * dup)376  xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
377  {
378  	return (__be16 *)((char *)dup +
379  			be16_to_cpu(dup->length) - sizeof(__be16));
380  }
381  
382  /*
383   * Leaf block structures.
384   *
385   * A pure leaf block looks like the following drawing on disk:
386   *
387   *    +---------------------------+
388   *    | xfs_dir2_leaf_hdr_t       |
389   *    +---------------------------+
390   *    | xfs_dir2_leaf_entry_t     |
391   *    | xfs_dir2_leaf_entry_t     |
392   *    | xfs_dir2_leaf_entry_t     |
393   *    | xfs_dir2_leaf_entry_t     |
394   *    | ...                       |
395   *    +---------------------------+
396   *    | xfs_dir2_data_off_t       |
397   *    | xfs_dir2_data_off_t       |
398   *    | xfs_dir2_data_off_t       |
399   *    | ...                       |
400   *    +---------------------------+
401   *    | xfs_dir2_leaf_tail_t      |
402   *    +---------------------------+
403   *
404   * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
405   * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
406   * for directories with separate leaf nodes and free space blocks
407   * (magic = XFS_DIR2_LEAFN_MAGIC).
408   *
409   * As all the entries are variable size structures the accessors below should
410   * be used to iterate over them.
411   */
412  
413  /*
414   * Offset of the leaf/node space.  First block in this space
415   * is the btree root.
416   */
417  #define	XFS_DIR2_LEAF_SPACE	1
418  #define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
419  
420  /*
421   * Leaf block header.
422   */
423  typedef struct xfs_dir2_leaf_hdr {
424  	xfs_da_blkinfo_t	info;		/* header for da routines */
425  	__be16			count;		/* count of entries */
426  	__be16			stale;		/* count of stale entries */
427  } xfs_dir2_leaf_hdr_t;
428  
429  struct xfs_dir3_leaf_hdr {
430  	struct xfs_da3_blkinfo	info;		/* header for da routines */
431  	__be16			count;		/* count of entries */
432  	__be16			stale;		/* count of stale entries */
433  	__be32			pad;		/* 64 bit alignment */
434  };
435  
436  /*
437   * Leaf block entry.
438   */
439  typedef struct xfs_dir2_leaf_entry {
440  	__be32			hashval;	/* hash value of name */
441  	__be32			address;	/* address of data entry */
442  } xfs_dir2_leaf_entry_t;
443  
444  /*
445   * Leaf block tail.
446   */
447  typedef struct xfs_dir2_leaf_tail {
448  	__be32			bestcount;
449  } xfs_dir2_leaf_tail_t;
450  
451  /*
452   * Leaf block.
453   */
454  typedef struct xfs_dir2_leaf {
455  	xfs_dir2_leaf_hdr_t	hdr;			/* leaf header */
456  	xfs_dir2_leaf_entry_t	__ents[];		/* entries */
457  } xfs_dir2_leaf_t;
458  
459  struct xfs_dir3_leaf {
460  	struct xfs_dir3_leaf_hdr	hdr;		/* leaf header */
461  	struct xfs_dir2_leaf_entry	__ents[];	/* entries */
462  };
463  
464  #define XFS_DIR3_LEAF_CRC_OFF  offsetof(struct xfs_dir3_leaf_hdr, info.crc)
465  
466  /*
467   * Get address of the bests array in the single-leaf block.
468   */
469  static inline __be16 *
xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail * ltp)470  xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
471  {
472  	return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
473  }
474  
475  /*
476   * Free space block definitions for the node format.
477   */
478  
479  /*
480   * Offset of the freespace index.
481   */
482  #define	XFS_DIR2_FREE_SPACE	2
483  #define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
484  
485  typedef	struct xfs_dir2_free_hdr {
486  	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */
487  	__be32			firstdb;	/* db of first entry */
488  	__be32			nvalid;		/* count of valid entries */
489  	__be32			nused;		/* count of used entries */
490  } xfs_dir2_free_hdr_t;
491  
492  typedef struct xfs_dir2_free {
493  	xfs_dir2_free_hdr_t	hdr;		/* block header */
494  	__be16			bests[];	/* best free counts */
495  						/* unused entries are -1 */
496  } xfs_dir2_free_t;
497  
498  struct xfs_dir3_free_hdr {
499  	struct xfs_dir3_blk_hdr	hdr;
500  	__be32			firstdb;	/* db of first entry */
501  	__be32			nvalid;		/* count of valid entries */
502  	__be32			nused;		/* count of used entries */
503  	__be32			pad;		/* 64 bit alignment */
504  };
505  
506  struct xfs_dir3_free {
507  	struct xfs_dir3_free_hdr hdr;
508  	__be16			bests[];	/* best free counts */
509  						/* unused entries are -1 */
510  };
511  
512  #define XFS_DIR3_FREE_CRC_OFF  offsetof(struct xfs_dir3_free, hdr.hdr.crc)
513  
514  /*
515   * Single block format.
516   *
517   * The single block format looks like the following drawing on disk:
518   *
519   *    +-------------------------------------------------+
520   *    | xfs_dir2_data_hdr_t                             |
521   *    +-------------------------------------------------+
522   *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
523   *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
524   *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
525   *    | ...                                             |
526   *    +-------------------------------------------------+
527   *    | unused space                                    |
528   *    +-------------------------------------------------+
529   *    | ...                                             |
530   *    | xfs_dir2_leaf_entry_t                           |
531   *    | xfs_dir2_leaf_entry_t                           |
532   *    +-------------------------------------------------+
533   *    | xfs_dir2_block_tail_t                           |
534   *    +-------------------------------------------------+
535   *
536   * As all the entries are variable size structures the accessors below should
537   * be used to iterate over them.
538   */
539  
540  typedef struct xfs_dir2_block_tail {
541  	__be32		count;			/* count of leaf entries */
542  	__be32		stale;			/* count of stale lf entries */
543  } xfs_dir2_block_tail_t;
544  
545  /*
546   * Pointer to the leaf entries embedded in a data block (1-block format)
547   */
548  static inline struct xfs_dir2_leaf_entry *
xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail * btp)549  xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
550  {
551  	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
552  }
553  
554  
555  /*
556   * Attribute storage layout
557   *
558   * Attribute lists are structured around Btrees where all the data
559   * elements are in the leaf nodes.  Attribute names are hashed into an int,
560   * then that int is used as the index into the Btree.  Since the hashval
561   * of an attribute name may not be unique, we may have duplicate keys.  The
562   * internal links in the Btree are logical block offsets into the file.
563   *
564   * Struct leaf_entry's are packed from the top.  Name/values grow from the
565   * bottom but are not packed.  The freemap contains run-length-encoded entries
566   * for the free bytes after the leaf_entry's, but only the N largest such,
567   * smaller runs are dropped.  When the freemap doesn't show enough space
568   * for an allocation, we compact the name/value area and try again.  If we
569   * still don't have enough space, then we have to split the block.  The
570   * name/value structs (both local and remote versions) must be 32bit aligned.
571   *
572   * Since we have duplicate hash keys, for each key that matches, compare
573   * the actual name string.  The root and intermediate node search always
574   * takes the first-in-the-block key match found, so we should only have
575   * to work "forw"ard.  If none matches, continue with the "forw"ard leaf
576   * nodes until the hash key changes or the attribute name is found.
577   *
578   * We store the fact that an attribute is a ROOT/USER/SECURE attribute in
579   * the leaf_entry.  The namespaces are independent only because we also look
580   * at the namespace bit when we are looking for a matching attribute name.
581   *
582   * We also store an "incomplete" bit in the leaf_entry.  It shows that an
583   * attribute is in the middle of being created and should not be shown to
584   * the user if we crash during the time that the bit is set.  We clear the
585   * bit when we have finished setting up the attribute.  We do this because
586   * we cannot create some large attributes inside a single transaction, and we
587   * need some indication that we weren't finished if we crash in the middle.
588   */
589  #define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */
590  
591  /*
592   * Attribute storage when stored inside the inode.
593   *
594   * Small attribute lists are packed as tightly as possible so as to fit into the
595   * literal area of the inode.
596   *
597   * These "shortform" attribute forks consist of a single xfs_attr_sf_hdr header
598   * followed by zero or more xfs_attr_sf_entry structures.
599   */
600  struct xfs_attr_sf_hdr {	/* constant-structure header block */
601  	__be16	totsize;	/* total bytes in shortform list */
602  	__u8	count;		/* count of active entries */
603  	__u8	padding;
604  };
605  
606  struct xfs_attr_sf_entry {
607  	__u8	namelen;	/* actual length of name (no NULL) */
608  	__u8	valuelen;	/* actual length of value (no NULL) */
609  	__u8	flags;		/* flags bits (XFS_ATTR_*) */
610  	__u8	nameval[];	/* name & value bytes concatenated */
611  };
612  
613  typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */
614  	__be16	base;			  /* base of free region */
615  	__be16	size;			  /* length of free region */
616  } xfs_attr_leaf_map_t;
617  
618  typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
619  	xfs_da_blkinfo_t info;		/* block type, links, etc. */
620  	__be16	count;			/* count of active leaf_entry's */
621  	__be16	usedbytes;		/* num bytes of names/values stored */
622  	__be16	firstused;		/* first used byte in name area */
623  	__u8	holes;			/* != 0 if blk needs compaction */
624  	__u8	pad1;
625  	xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
626  					/* N largest free regions */
627  } xfs_attr_leaf_hdr_t;
628  
629  typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
630  	__be32	hashval;		/* hash value of name */
631  	__be16	nameidx;		/* index into buffer of name/value */
632  	__u8	flags;			/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
633  	__u8	pad2;			/* unused pad byte */
634  } xfs_attr_leaf_entry_t;
635  
636  typedef struct xfs_attr_leaf_name_local {
637  	__be16	valuelen;		/* number of bytes in value */
638  	__u8	namelen;		/* length of name bytes */
639  	/*
640  	 * In Linux 6.5 this flex array was converted from nameval[1] to
641  	 * nameval[].  Be very careful here about extra padding at the end;
642  	 * see xfs_attr_leaf_entsize_local() for details.
643  	 */
644  	__u8	nameval[];		/* name/value bytes */
645  } xfs_attr_leaf_name_local_t;
646  
647  typedef struct xfs_attr_leaf_name_remote {
648  	__be32	valueblk;		/* block number of value bytes */
649  	__be32	valuelen;		/* number of bytes in value */
650  	__u8	namelen;		/* length of name bytes */
651  	/*
652  	 * In Linux 6.5 this flex array was converted from name[1] to name[].
653  	 * Be very careful here about extra padding at the end; see
654  	 * xfs_attr_leaf_entsize_remote() for details.
655  	 */
656  	__u8	name[];			/* name bytes */
657  } xfs_attr_leaf_name_remote_t;
658  
659  typedef struct xfs_attr_leafblock {
660  	xfs_attr_leaf_hdr_t	hdr;	/* constant-structure header block */
661  	xfs_attr_leaf_entry_t	entries[];	/* sorted on key, not name */
662  	/*
663  	 * The rest of the block contains the following structures after the
664  	 * leaf entries, growing from the bottom up. The variables are never
665  	 * referenced and definining them can actually make gcc optimize away
666  	 * accesses to the 'entries' array above index 0 so don't do that.
667  	 *
668  	 * xfs_attr_leaf_name_local_t namelist;
669  	 * xfs_attr_leaf_name_remote_t valuelist;
670  	 */
671  } xfs_attr_leafblock_t;
672  
673  /*
674   * CRC enabled leaf structures. Called "version 3" structures to match the
675   * version number of the directory and dablk structures for this feature, and
676   * attr2 is already taken by the variable inode attribute fork size feature.
677   */
678  struct xfs_attr3_leaf_hdr {
679  	struct xfs_da3_blkinfo	info;
680  	__be16			count;
681  	__be16			usedbytes;
682  	__be16			firstused;
683  	__u8			holes;
684  	__u8			pad1;
685  	struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
686  	__be32			pad2;		/* 64 bit alignment */
687  };
688  
689  #define XFS_ATTR3_LEAF_CRC_OFF	(offsetof(struct xfs_attr3_leaf_hdr, info.crc))
690  
691  struct xfs_attr3_leafblock {
692  	struct xfs_attr3_leaf_hdr	hdr;
693  	struct xfs_attr_leaf_entry	entries[];
694  
695  	/*
696  	 * The rest of the block contains the following structures after the
697  	 * leaf entries, growing from the bottom up. The variables are never
698  	 * referenced, the locations accessed purely from helper functions.
699  	 *
700  	 * struct xfs_attr_leaf_name_local
701  	 * struct xfs_attr_leaf_name_remote
702  	 */
703  };
704  
705  /*
706   * Special value to represent fs block size in the leaf header firstused field.
707   * Only used when block size overflows the 2-bytes available on disk.
708   */
709  #define XFS_ATTR3_LEAF_NULLOFF	0
710  
711  /*
712   * Flags used in the leaf_entry[i].flags field.
713   */
714  #define	XFS_ATTR_LOCAL_BIT	0	/* attr is stored locally */
715  #define	XFS_ATTR_ROOT_BIT	1	/* limit access to trusted attrs */
716  #define	XFS_ATTR_SECURE_BIT	2	/* limit access to secure attrs */
717  #define	XFS_ATTR_PARENT_BIT	3	/* parent pointer attrs */
718  #define	XFS_ATTR_INCOMPLETE_BIT	7	/* attr in middle of create/delete */
719  #define XFS_ATTR_LOCAL		(1u << XFS_ATTR_LOCAL_BIT)
720  #define XFS_ATTR_ROOT		(1u << XFS_ATTR_ROOT_BIT)
721  #define XFS_ATTR_SECURE		(1u << XFS_ATTR_SECURE_BIT)
722  #define XFS_ATTR_PARENT		(1u << XFS_ATTR_PARENT_BIT)
723  #define XFS_ATTR_INCOMPLETE	(1u << XFS_ATTR_INCOMPLETE_BIT)
724  
725  #define XFS_ATTR_NSP_ONDISK_MASK	(XFS_ATTR_ROOT | \
726  					 XFS_ATTR_SECURE | \
727  					 XFS_ATTR_PARENT)
728  
729  /* Private attr namespaces not exposed to userspace */
730  #define XFS_ATTR_PRIVATE_NSP_MASK	(XFS_ATTR_PARENT)
731  
732  #define XFS_ATTR_ONDISK_MASK	(XFS_ATTR_NSP_ONDISK_MASK | \
733  				 XFS_ATTR_LOCAL | \
734  				 XFS_ATTR_INCOMPLETE)
735  
736  #define XFS_ATTR_NAMESPACE_STR \
737  	{ XFS_ATTR_LOCAL,	"local" }, \
738  	{ XFS_ATTR_ROOT,	"root" }, \
739  	{ XFS_ATTR_SECURE,	"secure" }, \
740  	{ XFS_ATTR_PARENT,	"parent" }
741  
742  /*
743   * Alignment for namelist and valuelist entries (since they are mixed
744   * there can be only one alignment value)
745   */
746  #define	XFS_ATTR_LEAF_NAME_ALIGN	((uint)sizeof(xfs_dablk_t))
747  
748  static inline int
xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock * leafp)749  xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
750  {
751  	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
752  		return sizeof(struct xfs_attr3_leaf_hdr);
753  	return sizeof(struct xfs_attr_leaf_hdr);
754  }
755  
756  static inline struct xfs_attr_leaf_entry *
xfs_attr3_leaf_entryp(xfs_attr_leafblock_t * leafp)757  xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
758  {
759  	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
760  		return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
761  	return &leafp->entries[0];
762  }
763  
764  /*
765   * Cast typed pointers for "local" and "remote" name/value structs.
766   */
767  static inline char *
xfs_attr3_leaf_name(xfs_attr_leafblock_t * leafp,int idx)768  xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
769  {
770  	struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
771  
772  	return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
773  }
774  
775  static inline xfs_attr_leaf_name_remote_t *
xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t * leafp,int idx)776  xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
777  {
778  	return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
779  }
780  
781  static inline xfs_attr_leaf_name_local_t *
xfs_attr3_leaf_name_local(xfs_attr_leafblock_t * leafp,int idx)782  xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
783  {
784  	return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
785  }
786  
787  /*
788   * Calculate total bytes used (including trailing pad for alignment) for
789   * a "local" name/value structure, a "remote" name/value structure, and
790   * a pointer which might be either.
791   */
xfs_attr_leaf_entsize_remote(int nlen)792  static inline int xfs_attr_leaf_entsize_remote(int nlen)
793  {
794  	/*
795  	 * Prior to Linux 6.5, struct xfs_attr_leaf_name_remote ended with
796  	 * name[1], which was used as a flexarray.  The layout of this struct
797  	 * is 9 bytes of fixed-length fields followed by a __u8 flex array at
798  	 * offset 9.
799  	 *
800  	 * On most architectures, struct xfs_attr_leaf_name_remote had two
801  	 * bytes of implicit padding at the end of the struct to make the
802  	 * struct length 12.  After converting name[1] to name[], there are
803  	 * three implicit padding bytes and the struct size remains 12.
804  	 * However, there are compiler configurations that do not add implicit
805  	 * padding at all (m68k) and have been broken for years.
806  	 *
807  	 * This entsize computation historically added (the xattr name length)
808  	 * to (the padded struct length - 1) and rounded that sum up to the
809  	 * nearest multiple of 4 (NAME_ALIGN).  IOWs, round_up(11 + nlen, 4).
810  	 * This is encoded in the ondisk format, so we cannot change this.
811  	 *
812  	 * Compute the entsize from offsetof of the flexarray and manually
813  	 * adding bytes for the implicit padding.
814  	 */
815  	const size_t remotesize =
816  			offsetof(struct xfs_attr_leaf_name_remote, name) + 2;
817  
818  	return round_up(remotesize + nlen, XFS_ATTR_LEAF_NAME_ALIGN);
819  }
820  
xfs_attr_leaf_entsize_local(int nlen,int vlen)821  static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
822  {
823  	/*
824  	 * Prior to Linux 6.5, struct xfs_attr_leaf_name_local ended with
825  	 * nameval[1], which was used as a flexarray.  The layout of this
826  	 * struct is 3 bytes of fixed-length fields followed by a __u8 flex
827  	 * array at offset 3.
828  	 *
829  	 * struct xfs_attr_leaf_name_local had zero bytes of implicit padding
830  	 * at the end of the struct to make the struct length 4.  On most
831  	 * architectures, after converting nameval[1] to nameval[], there is
832  	 * one implicit padding byte and the struct size remains 4.  However,
833  	 * there are compiler configurations that do not add implicit padding
834  	 * at all (m68k) and would break.
835  	 *
836  	 * This entsize computation historically added (the xattr name and
837  	 * value length) to (the padded struct length - 1) and rounded that sum
838  	 * up to the nearest multiple of 4 (NAME_ALIGN).  IOWs, the formula is
839  	 * round_up(3 + nlen + vlen, 4).  This is encoded in the ondisk format,
840  	 * so we cannot change this.
841  	 *
842  	 * Compute the entsize from offsetof of the flexarray and manually
843  	 * adding bytes for the implicit padding.
844  	 */
845  	const size_t localsize =
846  			offsetof(struct xfs_attr_leaf_name_local, nameval);
847  
848  	return round_up(localsize + nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN);
849  }
850  
xfs_attr_leaf_entsize_local_max(int bsize)851  static inline int xfs_attr_leaf_entsize_local_max(int bsize)
852  {
853  	return (((bsize) >> 1) + ((bsize) >> 2));
854  }
855  
856  
857  
858  /*
859   * Remote attribute block format definition
860   *
861   * There is one of these headers per filesystem block in a remote attribute.
862   * This is done to ensure there is a 1:1 mapping between the attribute value
863   * length and the number of blocks needed to store the attribute. This makes the
864   * verification of a buffer a little more complex, but greatly simplifies the
865   * allocation, reading and writing of these attributes as we don't have to guess
866   * the number of blocks needed to store the attribute data.
867   */
868  #define XFS_ATTR3_RMT_MAGIC	0x5841524d	/* XARM */
869  
870  struct xfs_attr3_rmt_hdr {
871  	__be32	rm_magic;
872  	__be32	rm_offset;
873  	__be32	rm_bytes;
874  	__be32	rm_crc;
875  	uuid_t	rm_uuid;
876  	__be64	rm_owner;
877  	__be64	rm_blkno;
878  	__be64	rm_lsn;
879  };
880  
881  #define XFS_ATTR3_RMT_CRC_OFF	offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
882  
883  unsigned int xfs_attr3_rmt_buf_space(struct xfs_mount *mp);
884  
885  /* Number of bytes in a directory block. */
xfs_dir2_dirblock_bytes(struct xfs_sb * sbp)886  static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
887  {
888  	return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog);
889  }
890  
891  xfs_failaddr_t xfs_da3_blkinfo_verify(struct xfs_buf *bp,
892  				      struct xfs_da3_blkinfo *hdr3);
893  
894  /*
895   * Parent pointer attribute format definition
896   *
897   * The xattr name contains the dirent name.
898   * The xattr value encodes the parent inode number and generation to ease
899   * opening parents by handle.
900   * The xattr hashval is xfs_dir2_namehash() ^ p_ino
901   */
902  struct xfs_parent_rec {
903  	__be64	p_ino;
904  	__be32	p_gen;
905  } __packed;
906  
907  #endif /* __XFS_DA_FORMAT_H__ */
908