xref: /linux-6.15/include/uapi/linux/btrfs_tree.h (revision 94a48aef)
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 #ifndef _BTRFS_CTREE_H_
3 #define _BTRFS_CTREE_H_
4 
5 #include <linux/btrfs.h>
6 #include <linux/types.h>
7 #ifdef __KERNEL__
8 #include <linux/stddef.h>
9 #else
10 #include <stddef.h>
11 #endif
12 
13 /* ASCII for _BHRfS_M, no terminating nul */
14 #define BTRFS_MAGIC 0x4D5F53665248425FULL
15 
16 #define BTRFS_MAX_LEVEL 8
17 
18 /*
19  * We can actually store much bigger names, but lets not confuse the rest of
20  * linux.
21  */
22 #define BTRFS_NAME_LEN 255
23 
24 /*
25  * Theoretical limit is larger, but we keep this down to a sane value. That
26  * should limit greatly the possibility of collisions on inode ref items.
27  */
28 #define BTRFS_LINK_MAX 65535U
29 
30 /*
31  * This header contains the structure definitions and constants used
32  * by file system objects that can be retrieved using
33  * the BTRFS_IOC_SEARCH_TREE ioctl.  That means basically anything that
34  * is needed to describe a leaf node's key or item contents.
35  */
36 
37 /* holds pointers to all of the tree roots */
38 #define BTRFS_ROOT_TREE_OBJECTID 1ULL
39 
40 /* stores information about which extents are in use, and reference counts */
41 #define BTRFS_EXTENT_TREE_OBJECTID 2ULL
42 
43 /*
44  * chunk tree stores translations from logical -> physical block numbering
45  * the super block points to the chunk tree
46  */
47 #define BTRFS_CHUNK_TREE_OBJECTID 3ULL
48 
49 /*
50  * stores information about which areas of a given device are in use.
51  * one per device.  The tree of tree roots points to the device tree
52  */
53 #define BTRFS_DEV_TREE_OBJECTID 4ULL
54 
55 /* one per subvolume, storing files and directories */
56 #define BTRFS_FS_TREE_OBJECTID 5ULL
57 
58 /* directory objectid inside the root tree */
59 #define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
60 
61 /* holds checksums of all the data extents */
62 #define BTRFS_CSUM_TREE_OBJECTID 7ULL
63 
64 /* holds quota configuration and tracking */
65 #define BTRFS_QUOTA_TREE_OBJECTID 8ULL
66 
67 /* for storing items that use the BTRFS_UUID_KEY* types */
68 #define BTRFS_UUID_TREE_OBJECTID 9ULL
69 
70 /* tracks free space in block groups. */
71 #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
72 
73 /* Holds the block group items for extent tree v2. */
74 #define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
75 
76 /* device stats in the device tree */
77 #define BTRFS_DEV_STATS_OBJECTID 0ULL
78 
79 /* for storing balance parameters in the root tree */
80 #define BTRFS_BALANCE_OBJECTID -4ULL
81 
82 /* orphan objectid for tracking unlinked/truncated files */
83 #define BTRFS_ORPHAN_OBJECTID -5ULL
84 
85 /* does write ahead logging to speed up fsyncs */
86 #define BTRFS_TREE_LOG_OBJECTID -6ULL
87 #define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
88 
89 /* for space balancing */
90 #define BTRFS_TREE_RELOC_OBJECTID -8ULL
91 #define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
92 
93 /*
94  * extent checksums all have this objectid
95  * this allows them to share the logging tree
96  * for fsyncs
97  */
98 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
99 
100 /* For storing free space cache */
101 #define BTRFS_FREE_SPACE_OBJECTID -11ULL
102 
103 /*
104  * The inode number assigned to the special inode for storing
105  * free ino cache
106  */
107 #define BTRFS_FREE_INO_OBJECTID -12ULL
108 
109 /* dummy objectid represents multiple objectids */
110 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
111 
112 /*
113  * All files have objectids in this range.
114  */
115 #define BTRFS_FIRST_FREE_OBJECTID 256ULL
116 #define BTRFS_LAST_FREE_OBJECTID -256ULL
117 #define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
118 
119 
120 /*
121  * the device items go into the chunk tree.  The key is in the form
122  * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
123  */
124 #define BTRFS_DEV_ITEMS_OBJECTID 1ULL
125 
126 #define BTRFS_BTREE_INODE_OBJECTID 1
127 
128 #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
129 
130 #define BTRFS_DEV_REPLACE_DEVID 0ULL
131 
132 /*
133  * inode items have the data typically returned from stat and store other
134  * info about object characteristics.  There is one for every file and dir in
135  * the FS
136  */
137 #define BTRFS_INODE_ITEM_KEY		1
138 #define BTRFS_INODE_REF_KEY		12
139 #define BTRFS_INODE_EXTREF_KEY		13
140 #define BTRFS_XATTR_ITEM_KEY		24
141 
142 /*
143  * fs verity items are stored under two different key types on disk.
144  * The descriptor items:
145  * [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
146  *
147  * At offset 0, we store a btrfs_verity_descriptor_item which tracks the size
148  * of the descriptor item and some extra data for encryption.
149  * Starting at offset 1, these hold the generic fs verity descriptor.  The
150  * latter are opaque to btrfs, we just read and write them as a blob for the
151  * higher level verity code.  The most common descriptor size is 256 bytes.
152  *
153  * The merkle tree items:
154  * [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
155  *
156  * These also start at offset 0, and correspond to the merkle tree bytes.  When
157  * fsverity asks for page 0 of the merkle tree, we pull up one page starting at
158  * offset 0 for this key type.  These are also opaque to btrfs, we're blindly
159  * storing whatever fsverity sends down.
160  */
161 #define BTRFS_VERITY_DESC_ITEM_KEY	36
162 #define BTRFS_VERITY_MERKLE_ITEM_KEY	37
163 
164 #define BTRFS_ORPHAN_ITEM_KEY		48
165 /* reserve 2-15 close to the inode for later flexibility */
166 
167 /*
168  * dir items are the name -> inode pointers in a directory.  There is one
169  * for every name in a directory.  BTRFS_DIR_LOG_ITEM_KEY is no longer used
170  * but it's still defined here for documentation purposes and to help avoid
171  * having its numerical value reused in the future.
172  */
173 #define BTRFS_DIR_LOG_ITEM_KEY  60
174 #define BTRFS_DIR_LOG_INDEX_KEY 72
175 #define BTRFS_DIR_ITEM_KEY	84
176 #define BTRFS_DIR_INDEX_KEY	96
177 /*
178  * extent data is for file data
179  */
180 #define BTRFS_EXTENT_DATA_KEY	108
181 
182 /*
183  * extent csums are stored in a separate tree and hold csums for
184  * an entire extent on disk.
185  */
186 #define BTRFS_EXTENT_CSUM_KEY	128
187 
188 /*
189  * root items point to tree roots.  They are typically in the root
190  * tree used by the super block to find all the other trees
191  */
192 #define BTRFS_ROOT_ITEM_KEY	132
193 
194 /*
195  * root backrefs tie subvols and snapshots to the directory entries that
196  * reference them
197  */
198 #define BTRFS_ROOT_BACKREF_KEY	144
199 
200 /*
201  * root refs make a fast index for listing all of the snapshots and
202  * subvolumes referenced by a given root.  They point directly to the
203  * directory item in the root that references the subvol
204  */
205 #define BTRFS_ROOT_REF_KEY	156
206 
207 /*
208  * extent items are in the extent map tree.  These record which blocks
209  * are used, and how many references there are to each block
210  */
211 #define BTRFS_EXTENT_ITEM_KEY	168
212 
213 /*
214  * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
215  * the length, so we save the level in key->offset instead of the length.
216  */
217 #define BTRFS_METADATA_ITEM_KEY	169
218 
219 #define BTRFS_TREE_BLOCK_REF_KEY	176
220 
221 #define BTRFS_EXTENT_DATA_REF_KEY	178
222 
223 #define BTRFS_EXTENT_REF_V0_KEY		180
224 
225 #define BTRFS_SHARED_BLOCK_REF_KEY	182
226 
227 #define BTRFS_SHARED_DATA_REF_KEY	184
228 
229 /*
230  * block groups give us hints into the extent allocation trees.  Which
231  * blocks are free etc etc
232  */
233 #define BTRFS_BLOCK_GROUP_ITEM_KEY 192
234 
235 /*
236  * Every block group is represented in the free space tree by a free space info
237  * item, which stores some accounting information. It is keyed on
238  * (block_group_start, FREE_SPACE_INFO, block_group_length).
239  */
240 #define BTRFS_FREE_SPACE_INFO_KEY 198
241 
242 /*
243  * A free space extent tracks an extent of space that is free in a block group.
244  * It is keyed on (start, FREE_SPACE_EXTENT, length).
245  */
246 #define BTRFS_FREE_SPACE_EXTENT_KEY 199
247 
248 /*
249  * When a block group becomes very fragmented, we convert it to use bitmaps
250  * instead of extents. A free space bitmap is keyed on
251  * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
252  * (length / sectorsize) bits.
253  */
254 #define BTRFS_FREE_SPACE_BITMAP_KEY 200
255 
256 #define BTRFS_DEV_EXTENT_KEY	204
257 #define BTRFS_DEV_ITEM_KEY	216
258 #define BTRFS_CHUNK_ITEM_KEY	228
259 
260 /*
261  * Records the overall state of the qgroups.
262  * There's only one instance of this key present,
263  * (0, BTRFS_QGROUP_STATUS_KEY, 0)
264  */
265 #define BTRFS_QGROUP_STATUS_KEY         240
266 /*
267  * Records the currently used space of the qgroup.
268  * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
269  */
270 #define BTRFS_QGROUP_INFO_KEY           242
271 /*
272  * Contains the user configured limits for the qgroup.
273  * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
274  */
275 #define BTRFS_QGROUP_LIMIT_KEY          244
276 /*
277  * Records the child-parent relationship of qgroups. For
278  * each relation, 2 keys are present:
279  * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
280  * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
281  */
282 #define BTRFS_QGROUP_RELATION_KEY       246
283 
284 /*
285  * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
286  */
287 #define BTRFS_BALANCE_ITEM_KEY	248
288 
289 /*
290  * The key type for tree items that are stored persistently, but do not need to
291  * exist for extended period of time. The items can exist in any tree.
292  *
293  * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
294  *
295  * Existing items:
296  *
297  * - balance status item
298  *   (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
299  */
300 #define BTRFS_TEMPORARY_ITEM_KEY	248
301 
302 /*
303  * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
304  */
305 #define BTRFS_DEV_STATS_KEY		249
306 
307 /*
308  * The key type for tree items that are stored persistently and usually exist
309  * for a long period, eg. filesystem lifetime. The item kinds can be status
310  * information, stats or preference values. The item can exist in any tree.
311  *
312  * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
313  *
314  * Existing items:
315  *
316  * - device statistics, store IO stats in the device tree, one key for all
317  *   stats
318  *   (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
319  */
320 #define BTRFS_PERSISTENT_ITEM_KEY	249
321 
322 /*
323  * Persistently stores the device replace state in the device tree.
324  * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
325  */
326 #define BTRFS_DEV_REPLACE_KEY	250
327 
328 /*
329  * Stores items that allow to quickly map UUIDs to something else.
330  * These items are part of the filesystem UUID tree.
331  * The key is built like this:
332  * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
333  */
334 #if BTRFS_UUID_SIZE != 16
335 #error "UUID items require BTRFS_UUID_SIZE == 16!"
336 #endif
337 #define BTRFS_UUID_KEY_SUBVOL	251	/* for UUIDs assigned to subvols */
338 #define BTRFS_UUID_KEY_RECEIVED_SUBVOL	252	/* for UUIDs assigned to
339 						 * received subvols */
340 
341 /*
342  * string items are for debugging.  They just store a short string of
343  * data in the FS
344  */
345 #define BTRFS_STRING_ITEM_KEY	253
346 
347 /* Maximum metadata block size (nodesize) */
348 #define BTRFS_MAX_METADATA_BLOCKSIZE			65536
349 
350 /* 32 bytes in various csum fields */
351 #define BTRFS_CSUM_SIZE 32
352 
353 /* csum types */
354 enum btrfs_csum_type {
355 	BTRFS_CSUM_TYPE_CRC32	= 0,
356 	BTRFS_CSUM_TYPE_XXHASH	= 1,
357 	BTRFS_CSUM_TYPE_SHA256	= 2,
358 	BTRFS_CSUM_TYPE_BLAKE2	= 3,
359 };
360 
361 /*
362  * flags definitions for directory entry item type
363  *
364  * Used by:
365  * struct btrfs_dir_item.type
366  *
367  * Values 0..7 must match common file type values in fs_types.h.
368  */
369 #define BTRFS_FT_UNKNOWN	0
370 #define BTRFS_FT_REG_FILE	1
371 #define BTRFS_FT_DIR		2
372 #define BTRFS_FT_CHRDEV		3
373 #define BTRFS_FT_BLKDEV		4
374 #define BTRFS_FT_FIFO		5
375 #define BTRFS_FT_SOCK		6
376 #define BTRFS_FT_SYMLINK	7
377 #define BTRFS_FT_XATTR		8
378 #define BTRFS_FT_MAX		9
379 /* Directory contains encrypted data */
380 #define BTRFS_FT_ENCRYPTED	0x80
381 
382 static inline __u8 btrfs_dir_flags_to_ftype(__u8 flags)
383 {
384 	return flags & ~BTRFS_FT_ENCRYPTED;
385 }
386 
387 /*
388  * Inode flags
389  */
390 #define BTRFS_INODE_NODATASUM		(1U << 0)
391 #define BTRFS_INODE_NODATACOW		(1U << 1)
392 #define BTRFS_INODE_READONLY		(1U << 2)
393 #define BTRFS_INODE_NOCOMPRESS		(1U << 3)
394 #define BTRFS_INODE_PREALLOC		(1U << 4)
395 #define BTRFS_INODE_SYNC		(1U << 5)
396 #define BTRFS_INODE_IMMUTABLE		(1U << 6)
397 #define BTRFS_INODE_APPEND		(1U << 7)
398 #define BTRFS_INODE_NODUMP		(1U << 8)
399 #define BTRFS_INODE_NOATIME		(1U << 9)
400 #define BTRFS_INODE_DIRSYNC		(1U << 10)
401 #define BTRFS_INODE_COMPRESS		(1U << 11)
402 
403 #define BTRFS_INODE_ROOT_ITEM_INIT	(1U << 31)
404 
405 #define BTRFS_INODE_FLAG_MASK						\
406 	(BTRFS_INODE_NODATASUM |					\
407 	 BTRFS_INODE_NODATACOW |					\
408 	 BTRFS_INODE_READONLY |						\
409 	 BTRFS_INODE_NOCOMPRESS |					\
410 	 BTRFS_INODE_PREALLOC |						\
411 	 BTRFS_INODE_SYNC |						\
412 	 BTRFS_INODE_IMMUTABLE |					\
413 	 BTRFS_INODE_APPEND |						\
414 	 BTRFS_INODE_NODUMP |						\
415 	 BTRFS_INODE_NOATIME |						\
416 	 BTRFS_INODE_DIRSYNC |						\
417 	 BTRFS_INODE_COMPRESS |						\
418 	 BTRFS_INODE_ROOT_ITEM_INIT)
419 
420 #define BTRFS_INODE_RO_VERITY		(1U << 0)
421 
422 #define BTRFS_INODE_RO_FLAG_MASK	(BTRFS_INODE_RO_VERITY)
423 
424 /*
425  * The key defines the order in the tree, and so it also defines (optimal)
426  * block layout.
427  *
428  * objectid corresponds to the inode number.
429  *
430  * type tells us things about the object, and is a kind of stream selector.
431  * so for a given inode, keys with type of 1 might refer to the inode data,
432  * type of 2 may point to file data in the btree and type == 3 may point to
433  * extents.
434  *
435  * offset is the starting byte offset for this key in the stream.
436  *
437  * btrfs_disk_key is in disk byte order.  struct btrfs_key is always
438  * in cpu native order.  Otherwise they are identical and their sizes
439  * should be the same (ie both packed)
440  */
441 struct btrfs_disk_key {
442 	__le64 objectid;
443 	__u8 type;
444 	__le64 offset;
445 } __attribute__ ((__packed__));
446 
447 struct btrfs_key {
448 	__u64 objectid;
449 	__u8 type;
450 	__u64 offset;
451 } __attribute__ ((__packed__));
452 
453 /*
454  * Every tree block (leaf or node) starts with this header.
455  */
456 struct btrfs_header {
457 	/* These first four must match the super block */
458 	__u8 csum[BTRFS_CSUM_SIZE];
459 	/* FS specific uuid */
460 	__u8 fsid[BTRFS_FSID_SIZE];
461 	/* Which block this node is supposed to live in */
462 	__le64 bytenr;
463 	__le64 flags;
464 
465 	/* Allowed to be different from the super from here on down */
466 	__u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
467 	__le64 generation;
468 	__le64 owner;
469 	__le32 nritems;
470 	__u8 level;
471 } __attribute__ ((__packed__));
472 
473 /*
474  * This is a very generous portion of the super block, giving us room to
475  * translate 14 chunks with 3 stripes each.
476  */
477 #define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
478 
479 /*
480  * Just in case we somehow lose the roots and are not able to mount, we store
481  * an array of the roots from previous transactions in the super.
482  */
483 #define BTRFS_NUM_BACKUP_ROOTS 4
484 struct btrfs_root_backup {
485 	__le64 tree_root;
486 	__le64 tree_root_gen;
487 
488 	__le64 chunk_root;
489 	__le64 chunk_root_gen;
490 
491 	__le64 extent_root;
492 	__le64 extent_root_gen;
493 
494 	__le64 fs_root;
495 	__le64 fs_root_gen;
496 
497 	__le64 dev_root;
498 	__le64 dev_root_gen;
499 
500 	__le64 csum_root;
501 	__le64 csum_root_gen;
502 
503 	__le64 total_bytes;
504 	__le64 bytes_used;
505 	__le64 num_devices;
506 	/* future */
507 	__le64 unused_64[4];
508 
509 	__u8 tree_root_level;
510 	__u8 chunk_root_level;
511 	__u8 extent_root_level;
512 	__u8 fs_root_level;
513 	__u8 dev_root_level;
514 	__u8 csum_root_level;
515 	/* future and to align */
516 	__u8 unused_8[10];
517 } __attribute__ ((__packed__));
518 
519 /*
520  * A leaf is full of items. offset and size tell us where to find the item in
521  * the leaf (relative to the start of the data area)
522  */
523 struct btrfs_item {
524 	struct btrfs_disk_key key;
525 	__le32 offset;
526 	__le32 size;
527 } __attribute__ ((__packed__));
528 
529 /*
530  * Leaves have an item area and a data area:
531  * [item0, item1....itemN] [free space] [dataN...data1, data0]
532  *
533  * The data is separate from the items to get the keys closer together during
534  * searches.
535  */
536 struct btrfs_leaf {
537 	struct btrfs_header header;
538 	struct btrfs_item items[];
539 } __attribute__ ((__packed__));
540 
541 /*
542  * All non-leaf blocks are nodes, they hold only keys and pointers to other
543  * blocks.
544  */
545 struct btrfs_key_ptr {
546 	struct btrfs_disk_key key;
547 	__le64 blockptr;
548 	__le64 generation;
549 } __attribute__ ((__packed__));
550 
551 struct btrfs_node {
552 	struct btrfs_header header;
553 	struct btrfs_key_ptr ptrs[];
554 } __attribute__ ((__packed__));
555 
556 struct btrfs_dev_item {
557 	/* the internal btrfs device id */
558 	__le64 devid;
559 
560 	/* size of the device */
561 	__le64 total_bytes;
562 
563 	/* bytes used */
564 	__le64 bytes_used;
565 
566 	/* optimal io alignment for this device */
567 	__le32 io_align;
568 
569 	/* optimal io width for this device */
570 	__le32 io_width;
571 
572 	/* minimal io size for this device */
573 	__le32 sector_size;
574 
575 	/* type and info about this device */
576 	__le64 type;
577 
578 	/* expected generation for this device */
579 	__le64 generation;
580 
581 	/*
582 	 * starting byte of this partition on the device,
583 	 * to allow for stripe alignment in the future
584 	 */
585 	__le64 start_offset;
586 
587 	/* grouping information for allocation decisions */
588 	__le32 dev_group;
589 
590 	/* seek speed 0-100 where 100 is fastest */
591 	__u8 seek_speed;
592 
593 	/* bandwidth 0-100 where 100 is fastest */
594 	__u8 bandwidth;
595 
596 	/* btrfs generated uuid for this device */
597 	__u8 uuid[BTRFS_UUID_SIZE];
598 
599 	/* uuid of FS who owns this device */
600 	__u8 fsid[BTRFS_UUID_SIZE];
601 } __attribute__ ((__packed__));
602 
603 struct btrfs_stripe {
604 	__le64 devid;
605 	__le64 offset;
606 	__u8 dev_uuid[BTRFS_UUID_SIZE];
607 } __attribute__ ((__packed__));
608 
609 struct btrfs_chunk {
610 	/* size of this chunk in bytes */
611 	__le64 length;
612 
613 	/* objectid of the root referencing this chunk */
614 	__le64 owner;
615 
616 	__le64 stripe_len;
617 	__le64 type;
618 
619 	/* optimal io alignment for this chunk */
620 	__le32 io_align;
621 
622 	/* optimal io width for this chunk */
623 	__le32 io_width;
624 
625 	/* minimal io size for this chunk */
626 	__le32 sector_size;
627 
628 	/* 2^16 stripes is quite a lot, a second limit is the size of a single
629 	 * item in the btree
630 	 */
631 	__le16 num_stripes;
632 
633 	/* sub stripes only matter for raid10 */
634 	__le16 sub_stripes;
635 	struct btrfs_stripe stripe;
636 	/* additional stripes go here */
637 } __attribute__ ((__packed__));
638 
639 /*
640  * The super block basically lists the main trees of the FS.
641  */
642 struct btrfs_super_block {
643 	/* The first 4 fields must match struct btrfs_header */
644 	__u8 csum[BTRFS_CSUM_SIZE];
645 	/* FS specific UUID, visible to user */
646 	__u8 fsid[BTRFS_FSID_SIZE];
647 	/* This block number */
648 	__le64 bytenr;
649 	__le64 flags;
650 
651 	/* Allowed to be different from the btrfs_header from here own down */
652 	__le64 magic;
653 	__le64 generation;
654 	__le64 root;
655 	__le64 chunk_root;
656 	__le64 log_root;
657 
658 	/*
659 	 * This member has never been utilized since the very beginning, thus
660 	 * it's always 0 regardless of kernel version.  We always use
661 	 * generation + 1 to read log tree root.  So here we mark it deprecated.
662 	 */
663 	__le64 __unused_log_root_transid;
664 	__le64 total_bytes;
665 	__le64 bytes_used;
666 	__le64 root_dir_objectid;
667 	__le64 num_devices;
668 	__le32 sectorsize;
669 	__le32 nodesize;
670 	__le32 __unused_leafsize;
671 	__le32 stripesize;
672 	__le32 sys_chunk_array_size;
673 	__le64 chunk_root_generation;
674 	__le64 compat_flags;
675 	__le64 compat_ro_flags;
676 	__le64 incompat_flags;
677 	__le16 csum_type;
678 	__u8 root_level;
679 	__u8 chunk_root_level;
680 	__u8 log_root_level;
681 	struct btrfs_dev_item dev_item;
682 
683 	char label[BTRFS_LABEL_SIZE];
684 
685 	__le64 cache_generation;
686 	__le64 uuid_tree_generation;
687 
688 	/* The UUID written into btree blocks */
689 	__u8 metadata_uuid[BTRFS_FSID_SIZE];
690 
691 	/* Future expansion */
692 	__u8 reserved8[8];
693 	__le64 reserved[27];
694 	__u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
695 	struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
696 
697 	/* Padded to 4096 bytes */
698 	__u8 padding[565];
699 } __attribute__ ((__packed__));
700 
701 #define BTRFS_FREE_SPACE_EXTENT	1
702 #define BTRFS_FREE_SPACE_BITMAP	2
703 
704 struct btrfs_free_space_entry {
705 	__le64 offset;
706 	__le64 bytes;
707 	__u8 type;
708 } __attribute__ ((__packed__));
709 
710 struct btrfs_free_space_header {
711 	struct btrfs_disk_key location;
712 	__le64 generation;
713 	__le64 num_entries;
714 	__le64 num_bitmaps;
715 } __attribute__ ((__packed__));
716 
717 #define BTRFS_HEADER_FLAG_WRITTEN	(1ULL << 0)
718 #define BTRFS_HEADER_FLAG_RELOC		(1ULL << 1)
719 
720 /* Super block flags */
721 /* Errors detected */
722 #define BTRFS_SUPER_FLAG_ERROR		(1ULL << 2)
723 
724 #define BTRFS_SUPER_FLAG_SEEDING	(1ULL << 32)
725 #define BTRFS_SUPER_FLAG_METADUMP	(1ULL << 33)
726 #define BTRFS_SUPER_FLAG_METADUMP_V2	(1ULL << 34)
727 #define BTRFS_SUPER_FLAG_CHANGING_FSID	(1ULL << 35)
728 #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
729 
730 
731 /*
732  * items in the extent btree are used to record the objectid of the
733  * owner of the block and the number of references
734  */
735 
736 struct btrfs_extent_item {
737 	__le64 refs;
738 	__le64 generation;
739 	__le64 flags;
740 } __attribute__ ((__packed__));
741 
742 struct btrfs_extent_item_v0 {
743 	__le32 refs;
744 } __attribute__ ((__packed__));
745 
746 
747 #define BTRFS_EXTENT_FLAG_DATA		(1ULL << 0)
748 #define BTRFS_EXTENT_FLAG_TREE_BLOCK	(1ULL << 1)
749 
750 /* following flags only apply to tree blocks */
751 
752 /* use full backrefs for extent pointers in the block */
753 #define BTRFS_BLOCK_FLAG_FULL_BACKREF	(1ULL << 8)
754 
755 #define BTRFS_BACKREF_REV_MAX		256
756 #define BTRFS_BACKREF_REV_SHIFT		56
757 #define BTRFS_BACKREF_REV_MASK		(((u64)BTRFS_BACKREF_REV_MAX - 1) << \
758 					 BTRFS_BACKREF_REV_SHIFT)
759 
760 #define BTRFS_OLD_BACKREF_REV		0
761 #define BTRFS_MIXED_BACKREF_REV		1
762 
763 /*
764  * this flag is only used internally by scrub and may be changed at any time
765  * it is only declared here to avoid collisions
766  */
767 #define BTRFS_EXTENT_FLAG_SUPER		(1ULL << 48)
768 
769 struct btrfs_tree_block_info {
770 	struct btrfs_disk_key key;
771 	__u8 level;
772 } __attribute__ ((__packed__));
773 
774 struct btrfs_extent_data_ref {
775 	__le64 root;
776 	__le64 objectid;
777 	__le64 offset;
778 	__le32 count;
779 } __attribute__ ((__packed__));
780 
781 struct btrfs_shared_data_ref {
782 	__le32 count;
783 } __attribute__ ((__packed__));
784 
785 struct btrfs_extent_inline_ref {
786 	__u8 type;
787 	__le64 offset;
788 } __attribute__ ((__packed__));
789 
790 /* dev extents record free space on individual devices.  The owner
791  * field points back to the chunk allocation mapping tree that allocated
792  * the extent.  The chunk tree uuid field is a way to double check the owner
793  */
794 struct btrfs_dev_extent {
795 	__le64 chunk_tree;
796 	__le64 chunk_objectid;
797 	__le64 chunk_offset;
798 	__le64 length;
799 	__u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
800 } __attribute__ ((__packed__));
801 
802 struct btrfs_inode_ref {
803 	__le64 index;
804 	__le16 name_len;
805 	/* name goes here */
806 } __attribute__ ((__packed__));
807 
808 struct btrfs_inode_extref {
809 	__le64 parent_objectid;
810 	__le64 index;
811 	__le16 name_len;
812 	__u8   name[];
813 	/* name goes here */
814 } __attribute__ ((__packed__));
815 
816 struct btrfs_timespec {
817 	__le64 sec;
818 	__le32 nsec;
819 } __attribute__ ((__packed__));
820 
821 struct btrfs_inode_item {
822 	/* nfs style generation number */
823 	__le64 generation;
824 	/* transid that last touched this inode */
825 	__le64 transid;
826 	__le64 size;
827 	__le64 nbytes;
828 	__le64 block_group;
829 	__le32 nlink;
830 	__le32 uid;
831 	__le32 gid;
832 	__le32 mode;
833 	__le64 rdev;
834 	__le64 flags;
835 
836 	/* modification sequence number for NFS */
837 	__le64 sequence;
838 
839 	/*
840 	 * a little future expansion, for more than this we can
841 	 * just grow the inode item and version it
842 	 */
843 	__le64 reserved[4];
844 	struct btrfs_timespec atime;
845 	struct btrfs_timespec ctime;
846 	struct btrfs_timespec mtime;
847 	struct btrfs_timespec otime;
848 } __attribute__ ((__packed__));
849 
850 struct btrfs_dir_log_item {
851 	__le64 end;
852 } __attribute__ ((__packed__));
853 
854 struct btrfs_dir_item {
855 	struct btrfs_disk_key location;
856 	__le64 transid;
857 	__le16 data_len;
858 	__le16 name_len;
859 	__u8 type;
860 } __attribute__ ((__packed__));
861 
862 #define BTRFS_ROOT_SUBVOL_RDONLY	(1ULL << 0)
863 
864 /*
865  * Internal in-memory flag that a subvolume has been marked for deletion but
866  * still visible as a directory
867  */
868 #define BTRFS_ROOT_SUBVOL_DEAD		(1ULL << 48)
869 
870 struct btrfs_root_item {
871 	struct btrfs_inode_item inode;
872 	__le64 generation;
873 	__le64 root_dirid;
874 	__le64 bytenr;
875 	__le64 byte_limit;
876 	__le64 bytes_used;
877 	__le64 last_snapshot;
878 	__le64 flags;
879 	__le32 refs;
880 	struct btrfs_disk_key drop_progress;
881 	__u8 drop_level;
882 	__u8 level;
883 
884 	/*
885 	 * The following fields appear after subvol_uuids+subvol_times
886 	 * were introduced.
887 	 */
888 
889 	/*
890 	 * This generation number is used to test if the new fields are valid
891 	 * and up to date while reading the root item. Every time the root item
892 	 * is written out, the "generation" field is copied into this field. If
893 	 * anyone ever mounted the fs with an older kernel, we will have
894 	 * mismatching generation values here and thus must invalidate the
895 	 * new fields. See btrfs_update_root and btrfs_find_last_root for
896 	 * details.
897 	 * the offset of generation_v2 is also used as the start for the memset
898 	 * when invalidating the fields.
899 	 */
900 	__le64 generation_v2;
901 	__u8 uuid[BTRFS_UUID_SIZE];
902 	__u8 parent_uuid[BTRFS_UUID_SIZE];
903 	__u8 received_uuid[BTRFS_UUID_SIZE];
904 	__le64 ctransid; /* updated when an inode changes */
905 	__le64 otransid; /* trans when created */
906 	__le64 stransid; /* trans when sent. non-zero for received subvol */
907 	__le64 rtransid; /* trans when received. non-zero for received subvol */
908 	struct btrfs_timespec ctime;
909 	struct btrfs_timespec otime;
910 	struct btrfs_timespec stime;
911 	struct btrfs_timespec rtime;
912 	__le64 reserved[8]; /* for future */
913 } __attribute__ ((__packed__));
914 
915 /*
916  * Btrfs root item used to be smaller than current size.  The old format ends
917  * at where member generation_v2 is.
918  */
919 static inline __u32 btrfs_legacy_root_item_size(void)
920 {
921 	return offsetof(struct btrfs_root_item, generation_v2);
922 }
923 
924 /*
925  * this is used for both forward and backward root refs
926  */
927 struct btrfs_root_ref {
928 	__le64 dirid;
929 	__le64 sequence;
930 	__le16 name_len;
931 } __attribute__ ((__packed__));
932 
933 struct btrfs_disk_balance_args {
934 	/*
935 	 * profiles to operate on, single is denoted by
936 	 * BTRFS_AVAIL_ALLOC_BIT_SINGLE
937 	 */
938 	__le64 profiles;
939 
940 	/*
941 	 * usage filter
942 	 * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
943 	 * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
944 	 */
945 	union {
946 		__le64 usage;
947 		struct {
948 			__le32 usage_min;
949 			__le32 usage_max;
950 		};
951 	};
952 
953 	/* devid filter */
954 	__le64 devid;
955 
956 	/* devid subset filter [pstart..pend) */
957 	__le64 pstart;
958 	__le64 pend;
959 
960 	/* btrfs virtual address space subset filter [vstart..vend) */
961 	__le64 vstart;
962 	__le64 vend;
963 
964 	/*
965 	 * profile to convert to, single is denoted by
966 	 * BTRFS_AVAIL_ALLOC_BIT_SINGLE
967 	 */
968 	__le64 target;
969 
970 	/* BTRFS_BALANCE_ARGS_* */
971 	__le64 flags;
972 
973 	/*
974 	 * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'
975 	 * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
976 	 * and maximum
977 	 */
978 	union {
979 		__le64 limit;
980 		struct {
981 			__le32 limit_min;
982 			__le32 limit_max;
983 		};
984 	};
985 
986 	/*
987 	 * Process chunks that cross stripes_min..stripes_max devices,
988 	 * BTRFS_BALANCE_ARGS_STRIPES_RANGE
989 	 */
990 	__le32 stripes_min;
991 	__le32 stripes_max;
992 
993 	__le64 unused[6];
994 } __attribute__ ((__packed__));
995 
996 /*
997  * store balance parameters to disk so that balance can be properly
998  * resumed after crash or unmount
999  */
1000 struct btrfs_balance_item {
1001 	/* BTRFS_BALANCE_* */
1002 	__le64 flags;
1003 
1004 	struct btrfs_disk_balance_args data;
1005 	struct btrfs_disk_balance_args meta;
1006 	struct btrfs_disk_balance_args sys;
1007 
1008 	__le64 unused[4];
1009 } __attribute__ ((__packed__));
1010 
1011 enum {
1012 	BTRFS_FILE_EXTENT_INLINE   = 0,
1013 	BTRFS_FILE_EXTENT_REG      = 1,
1014 	BTRFS_FILE_EXTENT_PREALLOC = 2,
1015 	BTRFS_NR_FILE_EXTENT_TYPES = 3,
1016 };
1017 
1018 struct btrfs_file_extent_item {
1019 	/*
1020 	 * transaction id that created this extent
1021 	 */
1022 	__le64 generation;
1023 	/*
1024 	 * max number of bytes to hold this extent in ram
1025 	 * when we split a compressed extent we can't know how big
1026 	 * each of the resulting pieces will be.  So, this is
1027 	 * an upper limit on the size of the extent in ram instead of
1028 	 * an exact limit.
1029 	 */
1030 	__le64 ram_bytes;
1031 
1032 	/*
1033 	 * 32 bits for the various ways we might encode the data,
1034 	 * including compression and encryption.  If any of these
1035 	 * are set to something a given disk format doesn't understand
1036 	 * it is treated like an incompat flag for reading and writing,
1037 	 * but not for stat.
1038 	 */
1039 	__u8 compression;
1040 	__u8 encryption;
1041 	__le16 other_encoding; /* spare for later use */
1042 
1043 	/* are we inline data or a real extent? */
1044 	__u8 type;
1045 
1046 	/*
1047 	 * disk space consumed by the extent, checksum blocks are included
1048 	 * in these numbers
1049 	 *
1050 	 * At this offset in the structure, the inline extent data start.
1051 	 */
1052 	__le64 disk_bytenr;
1053 	__le64 disk_num_bytes;
1054 	/*
1055 	 * the logical offset in file blocks (no csums)
1056 	 * this extent record is for.  This allows a file extent to point
1057 	 * into the middle of an existing extent on disk, sharing it
1058 	 * between two snapshots (useful if some bytes in the middle of the
1059 	 * extent have changed
1060 	 */
1061 	__le64 offset;
1062 	/*
1063 	 * the logical number of file blocks (no csums included).  This
1064 	 * always reflects the size uncompressed and without encoding.
1065 	 */
1066 	__le64 num_bytes;
1067 
1068 } __attribute__ ((__packed__));
1069 
1070 struct btrfs_csum_item {
1071 	__u8 csum;
1072 } __attribute__ ((__packed__));
1073 
1074 struct btrfs_dev_stats_item {
1075 	/*
1076 	 * grow this item struct at the end for future enhancements and keep
1077 	 * the existing values unchanged
1078 	 */
1079 	__le64 values[BTRFS_DEV_STAT_VALUES_MAX];
1080 } __attribute__ ((__packed__));
1081 
1082 #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS	0
1083 #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID	1
1084 
1085 struct btrfs_dev_replace_item {
1086 	/*
1087 	 * grow this item struct at the end for future enhancements and keep
1088 	 * the existing values unchanged
1089 	 */
1090 	__le64 src_devid;
1091 	__le64 cursor_left;
1092 	__le64 cursor_right;
1093 	__le64 cont_reading_from_srcdev_mode;
1094 
1095 	__le64 replace_state;
1096 	__le64 time_started;
1097 	__le64 time_stopped;
1098 	__le64 num_write_errors;
1099 	__le64 num_uncorrectable_read_errors;
1100 } __attribute__ ((__packed__));
1101 
1102 /* different types of block groups (and chunks) */
1103 #define BTRFS_BLOCK_GROUP_DATA		(1ULL << 0)
1104 #define BTRFS_BLOCK_GROUP_SYSTEM	(1ULL << 1)
1105 #define BTRFS_BLOCK_GROUP_METADATA	(1ULL << 2)
1106 #define BTRFS_BLOCK_GROUP_RAID0		(1ULL << 3)
1107 #define BTRFS_BLOCK_GROUP_RAID1		(1ULL << 4)
1108 #define BTRFS_BLOCK_GROUP_DUP		(1ULL << 5)
1109 #define BTRFS_BLOCK_GROUP_RAID10	(1ULL << 6)
1110 #define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
1111 #define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
1112 #define BTRFS_BLOCK_GROUP_RAID1C3       (1ULL << 9)
1113 #define BTRFS_BLOCK_GROUP_RAID1C4       (1ULL << 10)
1114 #define BTRFS_BLOCK_GROUP_RESERVED	(BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
1115 					 BTRFS_SPACE_INFO_GLOBAL_RSV)
1116 
1117 #define BTRFS_BLOCK_GROUP_TYPE_MASK	(BTRFS_BLOCK_GROUP_DATA |    \
1118 					 BTRFS_BLOCK_GROUP_SYSTEM |  \
1119 					 BTRFS_BLOCK_GROUP_METADATA)
1120 
1121 #define BTRFS_BLOCK_GROUP_PROFILE_MASK	(BTRFS_BLOCK_GROUP_RAID0 |   \
1122 					 BTRFS_BLOCK_GROUP_RAID1 |   \
1123 					 BTRFS_BLOCK_GROUP_RAID1C3 | \
1124 					 BTRFS_BLOCK_GROUP_RAID1C4 | \
1125 					 BTRFS_BLOCK_GROUP_RAID5 |   \
1126 					 BTRFS_BLOCK_GROUP_RAID6 |   \
1127 					 BTRFS_BLOCK_GROUP_DUP |     \
1128 					 BTRFS_BLOCK_GROUP_RAID10)
1129 #define BTRFS_BLOCK_GROUP_RAID56_MASK	(BTRFS_BLOCK_GROUP_RAID5 |   \
1130 					 BTRFS_BLOCK_GROUP_RAID6)
1131 
1132 #define BTRFS_BLOCK_GROUP_RAID1_MASK	(BTRFS_BLOCK_GROUP_RAID1 |   \
1133 					 BTRFS_BLOCK_GROUP_RAID1C3 | \
1134 					 BTRFS_BLOCK_GROUP_RAID1C4)
1135 
1136 /*
1137  * We need a bit for restriper to be able to tell when chunks of type
1138  * SINGLE are available.  This "extended" profile format is used in
1139  * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
1140  * (on-disk).  The corresponding on-disk bit in chunk.type is reserved
1141  * to avoid remappings between two formats in future.
1142  */
1143 #define BTRFS_AVAIL_ALLOC_BIT_SINGLE	(1ULL << 48)
1144 
1145 /*
1146  * A fake block group type that is used to communicate global block reserve
1147  * size to userspace via the SPACE_INFO ioctl.
1148  */
1149 #define BTRFS_SPACE_INFO_GLOBAL_RSV	(1ULL << 49)
1150 
1151 #define BTRFS_EXTENDED_PROFILE_MASK	(BTRFS_BLOCK_GROUP_PROFILE_MASK | \
1152 					 BTRFS_AVAIL_ALLOC_BIT_SINGLE)
1153 
1154 static inline __u64 chunk_to_extended(__u64 flags)
1155 {
1156 	if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
1157 		flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
1158 
1159 	return flags;
1160 }
1161 static inline __u64 extended_to_chunk(__u64 flags)
1162 {
1163 	return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
1164 }
1165 
1166 struct btrfs_block_group_item {
1167 	__le64 used;
1168 	__le64 chunk_objectid;
1169 	__le64 flags;
1170 } __attribute__ ((__packed__));
1171 
1172 struct btrfs_free_space_info {
1173 	__le32 extent_count;
1174 	__le32 flags;
1175 } __attribute__ ((__packed__));
1176 
1177 #define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
1178 
1179 #define BTRFS_QGROUP_LEVEL_SHIFT		48
1180 static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
1181 {
1182 	return (__u16)(qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT);
1183 }
1184 
1185 /*
1186  * is subvolume quota turned on?
1187  */
1188 #define BTRFS_QGROUP_STATUS_FLAG_ON		(1ULL << 0)
1189 /*
1190  * RESCAN is set during the initialization phase
1191  */
1192 #define BTRFS_QGROUP_STATUS_FLAG_RESCAN		(1ULL << 1)
1193 /*
1194  * Some qgroup entries are known to be out of date,
1195  * either because the configuration has changed in a way that
1196  * makes a rescan necessary, or because the fs has been mounted
1197  * with a non-qgroup-aware version.
1198  * Turning qouta off and on again makes it inconsistent, too.
1199  */
1200 #define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT	(1ULL << 2)
1201 
1202 #define BTRFS_QGROUP_STATUS_FLAGS_MASK	(BTRFS_QGROUP_STATUS_FLAG_ON |		\
1203 					 BTRFS_QGROUP_STATUS_FLAG_RESCAN |	\
1204 					 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
1205 
1206 #define BTRFS_QGROUP_STATUS_VERSION        1
1207 
1208 struct btrfs_qgroup_status_item {
1209 	__le64 version;
1210 	/*
1211 	 * the generation is updated during every commit. As older
1212 	 * versions of btrfs are not aware of qgroups, it will be
1213 	 * possible to detect inconsistencies by checking the
1214 	 * generation on mount time
1215 	 */
1216 	__le64 generation;
1217 
1218 	/* flag definitions see above */
1219 	__le64 flags;
1220 
1221 	/*
1222 	 * only used during scanning to record the progress
1223 	 * of the scan. It contains a logical address
1224 	 */
1225 	__le64 rescan;
1226 } __attribute__ ((__packed__));
1227 
1228 struct btrfs_qgroup_info_item {
1229 	__le64 generation;
1230 	__le64 rfer;
1231 	__le64 rfer_cmpr;
1232 	__le64 excl;
1233 	__le64 excl_cmpr;
1234 } __attribute__ ((__packed__));
1235 
1236 struct btrfs_qgroup_limit_item {
1237 	/*
1238 	 * only updated when any of the other values change
1239 	 */
1240 	__le64 flags;
1241 	__le64 max_rfer;
1242 	__le64 max_excl;
1243 	__le64 rsv_rfer;
1244 	__le64 rsv_excl;
1245 } __attribute__ ((__packed__));
1246 
1247 struct btrfs_verity_descriptor_item {
1248 	/* Size of the verity descriptor in bytes */
1249 	__le64 size;
1250 	/*
1251 	 * When we implement support for fscrypt, we will need to encrypt the
1252 	 * Merkle tree for encrypted verity files. These 128 bits are for the
1253 	 * eventual storage of an fscrypt initialization vector.
1254 	 */
1255 	__le64 reserved[2];
1256 	__u8 encryption;
1257 } __attribute__ ((__packed__));
1258 
1259 #endif /* _BTRFS_CTREE_H_ */
1260