xref: /openbmc/linux/fs/xfs/libxfs/xfs_da_format.h (revision addee42a)
1 /*
2  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3  * Copyright (c) 2013 Red Hat, Inc.
4  * All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it would be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write the Free Software Foundation,
17  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 #ifndef __XFS_DA_FORMAT_H__
20 #define __XFS_DA_FORMAT_H__
21 
22 /*
23  * This structure is common to both leaf nodes and non-leaf nodes in the Btree.
24  *
25  * It is used to manage a doubly linked list of all blocks at the same
26  * level in the Btree, and to identify which type of block this is.
27  */
28 #define XFS_DA_NODE_MAGIC	0xfebe	/* magic number: non-leaf blocks */
29 #define XFS_ATTR_LEAF_MAGIC	0xfbee	/* magic number: attribute leaf blks */
30 #define	XFS_DIR2_LEAF1_MAGIC	0xd2f1	/* magic number: v2 dirlf single blks */
31 #define	XFS_DIR2_LEAFN_MAGIC	0xd2ff	/* magic number: v2 dirlf multi blks */
32 
33 typedef struct xfs_da_blkinfo {
34 	__be32		forw;			/* previous block in list */
35 	__be32		back;			/* following block in list */
36 	__be16		magic;			/* validity check on block */
37 	__be16		pad;			/* unused */
38 } xfs_da_blkinfo_t;
39 
40 /*
41  * CRC enabled directory structure types
42  *
43  * The headers change size for the additional verification information, but
44  * otherwise the tree layouts and contents are unchanged. Hence the da btree
45  * code can use the struct xfs_da_blkinfo for manipulating the tree links and
46  * magic numbers without modification for both v2 and v3 nodes.
47  */
48 #define XFS_DA3_NODE_MAGIC	0x3ebe	/* magic number: non-leaf blocks */
49 #define XFS_ATTR3_LEAF_MAGIC	0x3bee	/* magic number: attribute leaf blks */
50 #define	XFS_DIR3_LEAF1_MAGIC	0x3df1	/* magic number: v2 dirlf single blks */
51 #define	XFS_DIR3_LEAFN_MAGIC	0x3dff	/* magic number: v2 dirlf multi blks */
52 
53 struct xfs_da3_blkinfo {
54 	/*
55 	 * the node link manipulation code relies on the fact that the first
56 	 * element of this structure is the struct xfs_da_blkinfo so it can
57 	 * ignore the differences in the rest of the structures.
58 	 */
59 	struct xfs_da_blkinfo	hdr;
60 	__be32			crc;	/* CRC of block */
61 	__be64			blkno;	/* first block of the buffer */
62 	__be64			lsn;	/* sequence number of last write */
63 	uuid_t			uuid;	/* filesystem we belong to */
64 	__be64			owner;	/* inode that owns the block */
65 };
66 
67 /*
68  * This is the structure of the root and intermediate nodes in the Btree.
69  * The leaf nodes are defined above.
70  *
71  * Entries are not packed.
72  *
73  * Since we have duplicate keys, use a binary search but always follow
74  * all match in the block, not just the first match found.
75  */
76 #define	XFS_DA_NODE_MAXDEPTH	5	/* max depth of Btree */
77 
78 typedef struct xfs_da_node_hdr {
79 	struct xfs_da_blkinfo	info;	/* block type, links, etc. */
80 	__be16			__count; /* count of active entries */
81 	__be16			__level; /* level above leaves (leaf == 0) */
82 } xfs_da_node_hdr_t;
83 
84 struct xfs_da3_node_hdr {
85 	struct xfs_da3_blkinfo	info;	/* block type, links, etc. */
86 	__be16			__count; /* count of active entries */
87 	__be16			__level; /* level above leaves (leaf == 0) */
88 	__be32			__pad32;
89 };
90 
91 #define XFS_DA3_NODE_CRC_OFF	(offsetof(struct xfs_da3_node_hdr, info.crc))
92 
93 typedef struct xfs_da_node_entry {
94 	__be32	hashval;	/* hash value for this descendant */
95 	__be32	before;		/* Btree block before this key */
96 } xfs_da_node_entry_t;
97 
98 typedef struct xfs_da_intnode {
99 	struct xfs_da_node_hdr	hdr;
100 	struct xfs_da_node_entry __btree[];
101 } xfs_da_intnode_t;
102 
103 struct xfs_da3_intnode {
104 	struct xfs_da3_node_hdr	hdr;
105 	struct xfs_da_node_entry __btree[];
106 };
107 
108 /*
109  * In-core version of the node header to abstract the differences in the v2 and
110  * v3 disk format of the headers. Callers need to convert to/from disk format as
111  * appropriate.
112  */
113 struct xfs_da3_icnode_hdr {
114 	uint32_t	forw;
115 	uint32_t	back;
116 	uint16_t	magic;
117 	uint16_t	count;
118 	uint16_t	level;
119 };
120 
121 /*
122  * Directory version 2.
123  *
124  * There are 4 possible formats:
125  *  - shortform - embedded into the inode
126  *  - single block - data with embedded leaf at the end
127  *  - multiple data blocks, single leaf+freeindex block
128  *  - data blocks, node and leaf blocks (btree), freeindex blocks
129  *
130  * Note: many node blocks structures and constants are shared with the attr
131  * code and defined in xfs_da_btree.h.
132  */
133 
134 #define	XFS_DIR2_BLOCK_MAGIC	0x58443242	/* XD2B: single block dirs */
135 #define	XFS_DIR2_DATA_MAGIC	0x58443244	/* XD2D: multiblock dirs */
136 #define	XFS_DIR2_FREE_MAGIC	0x58443246	/* XD2F: free index blocks */
137 
138 /*
139  * Directory Version 3 With CRCs.
140  *
141  * The tree formats are the same as for version 2 directories.  The difference
142  * is in the block header and dirent formats. In many cases the v3 structures
143  * use v2 definitions as they are no different and this makes code sharing much
144  * easier.
145  *
146  * Also, the xfs_dir3_*() functions handle both v2 and v3 formats - if the
147  * format is v2 then they switch to the existing v2 code, or the format is v3
148  * they implement the v3 functionality. This means the existing dir2 is a mix of
149  * xfs_dir2/xfs_dir3 calls and functions. The xfs_dir3 functions are called
150  * where there is a difference in the formats, otherwise the code is unchanged.
151  *
152  * Where it is possible, the code decides what to do based on the magic numbers
153  * in the blocks rather than feature bits in the superblock. This means the code
154  * is as independent of the external XFS code as possible as doesn't require
155  * passing struct xfs_mount pointers into places where it isn't really
156  * necessary.
157  *
158  * Version 3 includes:
159  *
160  *	- a larger block header for CRC and identification purposes and so the
161  *	offsets of all the structures inside the blocks are different.
162  *
163  *	- new magic numbers to be able to detect the v2/v3 types on the fly.
164  */
165 
166 #define	XFS_DIR3_BLOCK_MAGIC	0x58444233	/* XDB3: single block dirs */
167 #define	XFS_DIR3_DATA_MAGIC	0x58444433	/* XDD3: multiblock dirs */
168 #define	XFS_DIR3_FREE_MAGIC	0x58444633	/* XDF3: free index blocks */
169 
170 /*
171  * Dirents in version 3 directories have a file type field. Additions to this
172  * list are an on-disk format change, requiring feature bits. Valid values
173  * are as follows:
174  */
175 #define XFS_DIR3_FT_UNKNOWN		0
176 #define XFS_DIR3_FT_REG_FILE		1
177 #define XFS_DIR3_FT_DIR			2
178 #define XFS_DIR3_FT_CHRDEV		3
179 #define XFS_DIR3_FT_BLKDEV		4
180 #define XFS_DIR3_FT_FIFO		5
181 #define XFS_DIR3_FT_SOCK		6
182 #define XFS_DIR3_FT_SYMLINK		7
183 #define XFS_DIR3_FT_WHT			8
184 
185 #define XFS_DIR3_FT_MAX			9
186 
187 /*
188  * Byte offset in data block and shortform entry.
189  */
190 typedef uint16_t	xfs_dir2_data_off_t;
191 #define	NULLDATAOFF	0xffffU
192 typedef uint		xfs_dir2_data_aoff_t;	/* argument form */
193 
194 /*
195  * Offset in data space of a data entry.
196  */
197 typedef uint32_t	xfs_dir2_dataptr_t;
198 #define	XFS_DIR2_MAX_DATAPTR	((xfs_dir2_dataptr_t)0xffffffff)
199 #define	XFS_DIR2_NULL_DATAPTR	((xfs_dir2_dataptr_t)0)
200 
201 /*
202  * Byte offset in a directory.
203  */
204 typedef	xfs_off_t	xfs_dir2_off_t;
205 
206 /*
207  * Directory block number (logical dirblk in file)
208  */
209 typedef uint32_t	xfs_dir2_db_t;
210 
211 #define XFS_INO32_SIZE	4
212 #define XFS_INO64_SIZE	8
213 #define XFS_INO64_DIFF	(XFS_INO64_SIZE - XFS_INO32_SIZE)
214 
215 #define	XFS_DIR2_MAX_SHORT_INUM	((xfs_ino_t)0xffffffffULL)
216 
217 /*
218  * Directory layout when stored internal to an inode.
219  *
220  * Small directories are packed as tightly as possible so as to fit into the
221  * literal area of the inode.  These "shortform" directories consist of a
222  * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
223  * structures.  Due the different inode number storage size and the variable
224  * length name field in the xfs_dir2_sf_entry all these structure are
225  * variable length, and the accessors in this file should be used to iterate
226  * over them.
227  */
228 typedef struct xfs_dir2_sf_hdr {
229 	uint8_t			count;		/* count of entries */
230 	uint8_t			i8count;	/* count of 8-byte inode #s */
231 	uint8_t			parent[8];	/* parent dir inode number */
232 } __packed xfs_dir2_sf_hdr_t;
233 
234 typedef struct xfs_dir2_sf_entry {
235 	__u8			namelen;	/* actual name length */
236 	__u8			offset[2];	/* saved offset */
237 	__u8			name[];		/* name, variable size */
238 	/*
239 	 * A single byte containing the file type field follows the inode
240 	 * number for version 3 directory entries.
241 	 *
242 	 * A 64-bit or 32-bit inode number follows here, at a variable offset
243 	 * after the name.
244 	 */
245 } xfs_dir2_sf_entry_t;
246 
247 static inline int xfs_dir2_sf_hdr_size(int i8count)
248 {
249 	return sizeof(struct xfs_dir2_sf_hdr) -
250 		(i8count == 0) * XFS_INO64_DIFF;
251 }
252 
253 static inline xfs_dir2_data_aoff_t
254 xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
255 {
256 	return get_unaligned_be16(sfep->offset);
257 }
258 
259 static inline void
260 xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
261 {
262 	put_unaligned_be16(off, sfep->offset);
263 }
264 
265 static inline struct xfs_dir2_sf_entry *
266 xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
267 {
268 	return (struct xfs_dir2_sf_entry *)
269 		((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
270 }
271 
272 /*
273  * Data block structures.
274  *
275  * A pure data block looks like the following drawing on disk:
276  *
277  *    +-------------------------------------------------+
278  *    | xfs_dir2_data_hdr_t                             |
279  *    +-------------------------------------------------+
280  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
281  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
282  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
283  *    | ...                                             |
284  *    +-------------------------------------------------+
285  *    | unused space                                    |
286  *    +-------------------------------------------------+
287  *
288  * As all the entries are variable size structures the accessors below should
289  * be used to iterate over them.
290  *
291  * In addition to the pure data blocks for the data and node formats,
292  * most structures are also used for the combined data/freespace "block"
293  * format below.
294  */
295 
296 #define	XFS_DIR2_DATA_ALIGN_LOG	3		/* i.e., 8 bytes */
297 #define	XFS_DIR2_DATA_ALIGN	(1 << XFS_DIR2_DATA_ALIGN_LOG)
298 #define	XFS_DIR2_DATA_FREE_TAG	0xffff
299 #define	XFS_DIR2_DATA_FD_COUNT	3
300 
301 /*
302  * Directory address space divided into sections,
303  * spaces separated by 32GB.
304  */
305 #define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
306 #define	XFS_DIR2_DATA_SPACE	0
307 #define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
308 
309 /*
310  * Describe a free area in the data block.
311  *
312  * The freespace will be formatted as a xfs_dir2_data_unused_t.
313  */
314 typedef struct xfs_dir2_data_free {
315 	__be16			offset;		/* start of freespace */
316 	__be16			length;		/* length of freespace */
317 } xfs_dir2_data_free_t;
318 
319 /*
320  * Header for the data blocks.
321  *
322  * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
323  */
324 typedef struct xfs_dir2_data_hdr {
325 	__be32			magic;		/* XFS_DIR2_DATA_MAGIC or */
326 						/* XFS_DIR2_BLOCK_MAGIC */
327 	xfs_dir2_data_free_t	bestfree[XFS_DIR2_DATA_FD_COUNT];
328 } xfs_dir2_data_hdr_t;
329 
330 /*
331  * define a structure for all the verification fields we are adding to the
332  * directory block structures. This will be used in several structures.
333  * The magic number must be the first entry to align with all the dir2
334  * structures so we determine how to decode them just by the magic number.
335  */
336 struct xfs_dir3_blk_hdr {
337 	__be32			magic;	/* magic number */
338 	__be32			crc;	/* CRC of block */
339 	__be64			blkno;	/* first block of the buffer */
340 	__be64			lsn;	/* sequence number of last write */
341 	uuid_t			uuid;	/* filesystem we belong to */
342 	__be64			owner;	/* inode that owns the block */
343 };
344 
345 struct xfs_dir3_data_hdr {
346 	struct xfs_dir3_blk_hdr	hdr;
347 	xfs_dir2_data_free_t	best_free[XFS_DIR2_DATA_FD_COUNT];
348 	__be32			pad;	/* 64 bit alignment */
349 };
350 
351 #define XFS_DIR3_DATA_CRC_OFF  offsetof(struct xfs_dir3_data_hdr, hdr.crc)
352 
353 /*
354  * Active entry in a data block.
355  *
356  * Aligned to 8 bytes.  After the variable length name field there is a
357  * 2 byte tag field, which can be accessed using xfs_dir3_data_entry_tag_p.
358  *
359  * For dir3 structures, there is file type field between the name and the tag.
360  * This can only be manipulated by helper functions. It is packed hard against
361  * the end of the name so any padding for rounding is between the file type and
362  * the tag.
363  */
364 typedef struct xfs_dir2_data_entry {
365 	__be64			inumber;	/* inode number */
366 	__u8			namelen;	/* name length */
367 	__u8			name[];		/* name bytes, no null */
368      /* __u8			filetype; */	/* type of inode we point to */
369      /*	__be16                  tag; */		/* starting offset of us */
370 } xfs_dir2_data_entry_t;
371 
372 /*
373  * Unused entry in a data block.
374  *
375  * Aligned to 8 bytes.  Tag appears as the last 2 bytes and must be accessed
376  * using xfs_dir2_data_unused_tag_p.
377  */
378 typedef struct xfs_dir2_data_unused {
379 	__be16			freetag;	/* XFS_DIR2_DATA_FREE_TAG */
380 	__be16			length;		/* total free length */
381 						/* variable offset */
382 	__be16			tag;		/* starting offset of us */
383 } xfs_dir2_data_unused_t;
384 
385 /*
386  * Pointer to a freespace's tag word.
387  */
388 static inline __be16 *
389 xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
390 {
391 	return (__be16 *)((char *)dup +
392 			be16_to_cpu(dup->length) - sizeof(__be16));
393 }
394 
395 /*
396  * Leaf block structures.
397  *
398  * A pure leaf block looks like the following drawing on disk:
399  *
400  *    +---------------------------+
401  *    | xfs_dir2_leaf_hdr_t       |
402  *    +---------------------------+
403  *    | xfs_dir2_leaf_entry_t     |
404  *    | xfs_dir2_leaf_entry_t     |
405  *    | xfs_dir2_leaf_entry_t     |
406  *    | xfs_dir2_leaf_entry_t     |
407  *    | ...                       |
408  *    +---------------------------+
409  *    | xfs_dir2_data_off_t       |
410  *    | xfs_dir2_data_off_t       |
411  *    | xfs_dir2_data_off_t       |
412  *    | ...                       |
413  *    +---------------------------+
414  *    | xfs_dir2_leaf_tail_t      |
415  *    +---------------------------+
416  *
417  * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
418  * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
419  * for directories with separate leaf nodes and free space blocks
420  * (magic = XFS_DIR2_LEAFN_MAGIC).
421  *
422  * As all the entries are variable size structures the accessors below should
423  * be used to iterate over them.
424  */
425 
426 /*
427  * Offset of the leaf/node space.  First block in this space
428  * is the btree root.
429  */
430 #define	XFS_DIR2_LEAF_SPACE	1
431 #define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
432 
433 /*
434  * Leaf block header.
435  */
436 typedef struct xfs_dir2_leaf_hdr {
437 	xfs_da_blkinfo_t	info;		/* header for da routines */
438 	__be16			count;		/* count of entries */
439 	__be16			stale;		/* count of stale entries */
440 } xfs_dir2_leaf_hdr_t;
441 
442 struct xfs_dir3_leaf_hdr {
443 	struct xfs_da3_blkinfo	info;		/* header for da routines */
444 	__be16			count;		/* count of entries */
445 	__be16			stale;		/* count of stale entries */
446 	__be32			pad;		/* 64 bit alignment */
447 };
448 
449 struct xfs_dir3_icleaf_hdr {
450 	uint32_t		forw;
451 	uint32_t		back;
452 	uint16_t		magic;
453 	uint16_t		count;
454 	uint16_t		stale;
455 };
456 
457 /*
458  * Leaf block entry.
459  */
460 typedef struct xfs_dir2_leaf_entry {
461 	__be32			hashval;	/* hash value of name */
462 	__be32			address;	/* address of data entry */
463 } xfs_dir2_leaf_entry_t;
464 
465 /*
466  * Leaf block tail.
467  */
468 typedef struct xfs_dir2_leaf_tail {
469 	__be32			bestcount;
470 } xfs_dir2_leaf_tail_t;
471 
472 /*
473  * Leaf block.
474  */
475 typedef struct xfs_dir2_leaf {
476 	xfs_dir2_leaf_hdr_t	hdr;			/* leaf header */
477 	xfs_dir2_leaf_entry_t	__ents[];		/* entries */
478 } xfs_dir2_leaf_t;
479 
480 struct xfs_dir3_leaf {
481 	struct xfs_dir3_leaf_hdr	hdr;		/* leaf header */
482 	struct xfs_dir2_leaf_entry	__ents[];	/* entries */
483 };
484 
485 #define XFS_DIR3_LEAF_CRC_OFF  offsetof(struct xfs_dir3_leaf_hdr, info.crc)
486 
487 /*
488  * Get address of the bests array in the single-leaf block.
489  */
490 static inline __be16 *
491 xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
492 {
493 	return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
494 }
495 
496 /*
497  * Free space block defintions for the node format.
498  */
499 
500 /*
501  * Offset of the freespace index.
502  */
503 #define	XFS_DIR2_FREE_SPACE	2
504 #define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
505 
506 typedef	struct xfs_dir2_free_hdr {
507 	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */
508 	__be32			firstdb;	/* db of first entry */
509 	__be32			nvalid;		/* count of valid entries */
510 	__be32			nused;		/* count of used entries */
511 } xfs_dir2_free_hdr_t;
512 
513 typedef struct xfs_dir2_free {
514 	xfs_dir2_free_hdr_t	hdr;		/* block header */
515 	__be16			bests[];	/* best free counts */
516 						/* unused entries are -1 */
517 } xfs_dir2_free_t;
518 
519 struct xfs_dir3_free_hdr {
520 	struct xfs_dir3_blk_hdr	hdr;
521 	__be32			firstdb;	/* db of first entry */
522 	__be32			nvalid;		/* count of valid entries */
523 	__be32			nused;		/* count of used entries */
524 	__be32			pad;		/* 64 bit alignment */
525 };
526 
527 struct xfs_dir3_free {
528 	struct xfs_dir3_free_hdr hdr;
529 	__be16			bests[];	/* best free counts */
530 						/* unused entries are -1 */
531 };
532 
533 #define XFS_DIR3_FREE_CRC_OFF  offsetof(struct xfs_dir3_free, hdr.hdr.crc)
534 
535 /*
536  * In core version of the free block header, abstracted away from on-disk format
537  * differences. Use this in the code, and convert to/from the disk version using
538  * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
539  */
540 struct xfs_dir3_icfree_hdr {
541 	uint32_t	magic;
542 	uint32_t	firstdb;
543 	uint32_t	nvalid;
544 	uint32_t	nused;
545 
546 };
547 
548 /*
549  * Single block format.
550  *
551  * The single block format looks like the following drawing on disk:
552  *
553  *    +-------------------------------------------------+
554  *    | xfs_dir2_data_hdr_t                             |
555  *    +-------------------------------------------------+
556  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
557  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
558  *    | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
559  *    | ...                                             |
560  *    +-------------------------------------------------+
561  *    | unused space                                    |
562  *    +-------------------------------------------------+
563  *    | ...                                             |
564  *    | xfs_dir2_leaf_entry_t                           |
565  *    | xfs_dir2_leaf_entry_t                           |
566  *    +-------------------------------------------------+
567  *    | xfs_dir2_block_tail_t                           |
568  *    +-------------------------------------------------+
569  *
570  * As all the entries are variable size structures the accessors below should
571  * be used to iterate over them.
572  */
573 
574 typedef struct xfs_dir2_block_tail {
575 	__be32		count;			/* count of leaf entries */
576 	__be32		stale;			/* count of stale lf entries */
577 } xfs_dir2_block_tail_t;
578 
579 /*
580  * Pointer to the leaf entries embedded in a data block (1-block format)
581  */
582 static inline struct xfs_dir2_leaf_entry *
583 xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
584 {
585 	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
586 }
587 
588 
589 /*
590  * Attribute storage layout
591  *
592  * Attribute lists are structured around Btrees where all the data
593  * elements are in the leaf nodes.  Attribute names are hashed into an int,
594  * then that int is used as the index into the Btree.  Since the hashval
595  * of an attribute name may not be unique, we may have duplicate keys.  The
596  * internal links in the Btree are logical block offsets into the file.
597  *
598  * Struct leaf_entry's are packed from the top.  Name/values grow from the
599  * bottom but are not packed.  The freemap contains run-length-encoded entries
600  * for the free bytes after the leaf_entry's, but only the N largest such,
601  * smaller runs are dropped.  When the freemap doesn't show enough space
602  * for an allocation, we compact the name/value area and try again.  If we
603  * still don't have enough space, then we have to split the block.  The
604  * name/value structs (both local and remote versions) must be 32bit aligned.
605  *
606  * Since we have duplicate hash keys, for each key that matches, compare
607  * the actual name string.  The root and intermediate node search always
608  * takes the first-in-the-block key match found, so we should only have
609  * to work "forw"ard.  If none matches, continue with the "forw"ard leaf
610  * nodes until the hash key changes or the attribute name is found.
611  *
612  * We store the fact that an attribute is a ROOT/USER/SECURE attribute in
613  * the leaf_entry.  The namespaces are independent only because we also look
614  * at the namespace bit when we are looking for a matching attribute name.
615  *
616  * We also store an "incomplete" bit in the leaf_entry.  It shows that an
617  * attribute is in the middle of being created and should not be shown to
618  * the user if we crash during the time that the bit is set.  We clear the
619  * bit when we have finished setting up the attribute.  We do this because
620  * we cannot create some large attributes inside a single transaction, and we
621  * need some indication that we weren't finished if we crash in the middle.
622  */
623 #define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */
624 
625 /*
626  * Entries are packed toward the top as tight as possible.
627  */
628 typedef struct xfs_attr_shortform {
629 	struct xfs_attr_sf_hdr {	/* constant-structure header block */
630 		__be16	totsize;	/* total bytes in shortform list */
631 		__u8	count;	/* count of active entries */
632 		__u8	padding;
633 	} hdr;
634 	struct xfs_attr_sf_entry {
635 		uint8_t namelen;	/* actual length of name (no NULL) */
636 		uint8_t valuelen;	/* actual length of value (no NULL) */
637 		uint8_t flags;	/* flags bits (see xfs_attr_leaf.h) */
638 		uint8_t nameval[1];	/* name & value bytes concatenated */
639 	} list[1];			/* variable sized array */
640 } xfs_attr_shortform_t;
641 
642 typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */
643 	__be16	base;			  /* base of free region */
644 	__be16	size;			  /* length of free region */
645 } xfs_attr_leaf_map_t;
646 
647 typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */
648 	xfs_da_blkinfo_t info;		/* block type, links, etc. */
649 	__be16	count;			/* count of active leaf_entry's */
650 	__be16	usedbytes;		/* num bytes of names/values stored */
651 	__be16	firstused;		/* first used byte in name area */
652 	__u8	holes;			/* != 0 if blk needs compaction */
653 	__u8	pad1;
654 	xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE];
655 					/* N largest free regions */
656 } xfs_attr_leaf_hdr_t;
657 
658 typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */
659 	__be32	hashval;		/* hash value of name */
660 	__be16	nameidx;		/* index into buffer of name/value */
661 	__u8	flags;			/* LOCAL/ROOT/SECURE/INCOMPLETE flag */
662 	__u8	pad2;			/* unused pad byte */
663 } xfs_attr_leaf_entry_t;
664 
665 typedef struct xfs_attr_leaf_name_local {
666 	__be16	valuelen;		/* number of bytes in value */
667 	__u8	namelen;		/* length of name bytes */
668 	__u8	nameval[1];		/* name/value bytes */
669 } xfs_attr_leaf_name_local_t;
670 
671 typedef struct xfs_attr_leaf_name_remote {
672 	__be32	valueblk;		/* block number of value bytes */
673 	__be32	valuelen;		/* number of bytes in value */
674 	__u8	namelen;		/* length of name bytes */
675 	__u8	name[1];		/* name bytes */
676 } xfs_attr_leaf_name_remote_t;
677 
678 typedef struct xfs_attr_leafblock {
679 	xfs_attr_leaf_hdr_t	hdr;	/* constant-structure header block */
680 	xfs_attr_leaf_entry_t	entries[1];	/* sorted on key, not name */
681 	/*
682 	 * The rest of the block contains the following structures after the
683 	 * leaf entries, growing from the bottom up. The variables are never
684 	 * referenced and definining them can actually make gcc optimize away
685 	 * accesses to the 'entries' array above index 0 so don't do that.
686 	 *
687 	 * xfs_attr_leaf_name_local_t namelist;
688 	 * xfs_attr_leaf_name_remote_t valuelist;
689 	 */
690 } xfs_attr_leafblock_t;
691 
692 /*
693  * CRC enabled leaf structures. Called "version 3" structures to match the
694  * version number of the directory and dablk structures for this feature, and
695  * attr2 is already taken by the variable inode attribute fork size feature.
696  */
697 struct xfs_attr3_leaf_hdr {
698 	struct xfs_da3_blkinfo	info;
699 	__be16			count;
700 	__be16			usedbytes;
701 	__be16			firstused;
702 	__u8			holes;
703 	__u8			pad1;
704 	struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE];
705 	__be32			pad2;		/* 64 bit alignment */
706 };
707 
708 #define XFS_ATTR3_LEAF_CRC_OFF	(offsetof(struct xfs_attr3_leaf_hdr, info.crc))
709 
710 struct xfs_attr3_leafblock {
711 	struct xfs_attr3_leaf_hdr	hdr;
712 	struct xfs_attr_leaf_entry	entries[1];
713 
714 	/*
715 	 * The rest of the block contains the following structures after the
716 	 * leaf entries, growing from the bottom up. The variables are never
717 	 * referenced, the locations accessed purely from helper functions.
718 	 *
719 	 * struct xfs_attr_leaf_name_local
720 	 * struct xfs_attr_leaf_name_remote
721 	 */
722 };
723 
724 /*
725  * incore, neutral version of the attribute leaf header
726  */
727 struct xfs_attr3_icleaf_hdr {
728 	uint32_t	forw;
729 	uint32_t	back;
730 	uint16_t	magic;
731 	uint16_t	count;
732 	uint16_t	usedbytes;
733 	/*
734 	 * firstused is 32-bit here instead of 16-bit like the on-disk variant
735 	 * to support maximum fsb size of 64k without overflow issues throughout
736 	 * the attr code. Instead, the overflow condition is handled on
737 	 * conversion to/from disk.
738 	 */
739 	uint32_t	firstused;
740 	__u8		holes;
741 	struct {
742 		uint16_t	base;
743 		uint16_t	size;
744 	} freemap[XFS_ATTR_LEAF_MAPSIZE];
745 };
746 
747 /*
748  * Special value to represent fs block size in the leaf header firstused field.
749  * Only used when block size overflows the 2-bytes available on disk.
750  */
751 #define XFS_ATTR3_LEAF_NULLOFF	0
752 
753 /*
754  * Flags used in the leaf_entry[i].flags field.
755  * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
756  * on the system call, they are "or"ed together for various operations.
757  */
758 #define	XFS_ATTR_LOCAL_BIT	0	/* attr is stored locally */
759 #define	XFS_ATTR_ROOT_BIT	1	/* limit access to trusted attrs */
760 #define	XFS_ATTR_SECURE_BIT	2	/* limit access to secure attrs */
761 #define	XFS_ATTR_INCOMPLETE_BIT	7	/* attr in middle of create/delete */
762 #define XFS_ATTR_LOCAL		(1 << XFS_ATTR_LOCAL_BIT)
763 #define XFS_ATTR_ROOT		(1 << XFS_ATTR_ROOT_BIT)
764 #define XFS_ATTR_SECURE		(1 << XFS_ATTR_SECURE_BIT)
765 #define XFS_ATTR_INCOMPLETE	(1 << XFS_ATTR_INCOMPLETE_BIT)
766 
767 /*
768  * Conversion macros for converting namespace bits from argument flags
769  * to ondisk flags.
770  */
771 #define XFS_ATTR_NSP_ARGS_MASK		(ATTR_ROOT | ATTR_SECURE)
772 #define XFS_ATTR_NSP_ONDISK_MASK	(XFS_ATTR_ROOT | XFS_ATTR_SECURE)
773 #define XFS_ATTR_NSP_ONDISK(flags)	((flags) & XFS_ATTR_NSP_ONDISK_MASK)
774 #define XFS_ATTR_NSP_ARGS(flags)	((flags) & XFS_ATTR_NSP_ARGS_MASK)
775 #define XFS_ATTR_NSP_ARGS_TO_ONDISK(x)	(((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\
776 					 ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0))
777 #define XFS_ATTR_NSP_ONDISK_TO_ARGS(x)	(((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\
778 					 ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0))
779 
780 /*
781  * Alignment for namelist and valuelist entries (since they are mixed
782  * there can be only one alignment value)
783  */
784 #define	XFS_ATTR_LEAF_NAME_ALIGN	((uint)sizeof(xfs_dablk_t))
785 
786 static inline int
787 xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)
788 {
789 	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
790 		return sizeof(struct xfs_attr3_leaf_hdr);
791 	return sizeof(struct xfs_attr_leaf_hdr);
792 }
793 
794 static inline struct xfs_attr_leaf_entry *
795 xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp)
796 {
797 	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC))
798 		return &((struct xfs_attr3_leafblock *)leafp)->entries[0];
799 	return &leafp->entries[0];
800 }
801 
802 /*
803  * Cast typed pointers for "local" and "remote" name/value structs.
804  */
805 static inline char *
806 xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx)
807 {
808 	struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp);
809 
810 	return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)];
811 }
812 
813 static inline xfs_attr_leaf_name_remote_t *
814 xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx)
815 {
816 	return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx);
817 }
818 
819 static inline xfs_attr_leaf_name_local_t *
820 xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
821 {
822 	return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);
823 }
824 
825 /*
826  * Calculate total bytes used (including trailing pad for alignment) for
827  * a "local" name/value structure, a "remote" name/value structure, and
828  * a pointer which might be either.
829  */
830 static inline int xfs_attr_leaf_entsize_remote(int nlen)
831 {
832 	return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \
833 		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
834 }
835 
836 static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
837 {
838 	return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) +
839 		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1);
840 }
841 
842 static inline int xfs_attr_leaf_entsize_local_max(int bsize)
843 {
844 	return (((bsize) >> 1) + ((bsize) >> 2));
845 }
846 
847 
848 
849 /*
850  * Remote attribute block format definition
851  *
852  * There is one of these headers per filesystem block in a remote attribute.
853  * This is done to ensure there is a 1:1 mapping between the attribute value
854  * length and the number of blocks needed to store the attribute. This makes the
855  * verification of a buffer a little more complex, but greatly simplifies the
856  * allocation, reading and writing of these attributes as we don't have to guess
857  * the number of blocks needed to store the attribute data.
858  */
859 #define XFS_ATTR3_RMT_MAGIC	0x5841524d	/* XARM */
860 
861 struct xfs_attr3_rmt_hdr {
862 	__be32	rm_magic;
863 	__be32	rm_offset;
864 	__be32	rm_bytes;
865 	__be32	rm_crc;
866 	uuid_t	rm_uuid;
867 	__be64	rm_owner;
868 	__be64	rm_blkno;
869 	__be64	rm_lsn;
870 };
871 
872 #define XFS_ATTR3_RMT_CRC_OFF	offsetof(struct xfs_attr3_rmt_hdr, rm_crc)
873 
874 #define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize)	\
875 	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \
876 			sizeof(struct xfs_attr3_rmt_hdr) : 0))
877 
878 /* Number of bytes in a directory block. */
879 static inline unsigned int xfs_dir2_dirblock_bytes(struct xfs_sb *sbp)
880 {
881 	return 1 << (sbp->sb_blocklog + sbp->sb_dirblklog);
882 }
883 
884 #endif /* __XFS_DA_FORMAT_H__ */
885