xref: /openbmc/linux/fs/btrfs/tree-checker.c (revision 151f4e2b)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) Qu Wenruo 2017.  All rights reserved.
4  */
5 
6 /*
7  * The module is used to catch unexpected/corrupted tree block data.
8  * Such behavior can be caused either by a fuzzed image or bugs.
9  *
10  * The objective is to do leaf/node validation checks when tree block is read
11  * from disk, and check *every* possible member, so other code won't
12  * need to checking them again.
13  *
14  * Due to the potential and unwanted damage, every checker needs to be
15  * carefully reviewed otherwise so it does not prevent mount of valid images.
16  */
17 
18 #include <linux/types.h>
19 #include <linux/stddef.h>
20 #include <linux/error-injection.h>
21 #include "ctree.h"
22 #include "tree-checker.h"
23 #include "disk-io.h"
24 #include "compression.h"
25 #include "volumes.h"
26 
27 /*
28  * Error message should follow the following format:
29  * corrupt <type>: <identifier>, <reason>[, <bad_value>]
30  *
31  * @type:	leaf or node
32  * @identifier:	the necessary info to locate the leaf/node.
33  * 		It's recommended to decode key.objecitd/offset if it's
34  * 		meaningful.
35  * @reason:	describe the error
36  * @bad_value:	optional, it's recommended to output bad value and its
37  *		expected value (range).
38  *
39  * Since comma is used to separate the components, only space is allowed
40  * inside each component.
41  */
42 
43 /*
44  * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt.
45  * Allows callers to customize the output.
46  */
47 __printf(3, 4)
48 __cold
49 static void generic_err(const struct extent_buffer *eb, int slot,
50 			const char *fmt, ...)
51 {
52 	const struct btrfs_fs_info *fs_info = eb->fs_info;
53 	struct va_format vaf;
54 	va_list args;
55 
56 	va_start(args, fmt);
57 
58 	vaf.fmt = fmt;
59 	vaf.va = &args;
60 
61 	btrfs_crit(fs_info,
62 		"corrupt %s: root=%llu block=%llu slot=%d, %pV",
63 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
64 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf);
65 	va_end(args);
66 }
67 
68 /*
69  * Customized reporter for extent data item, since its key objectid and
70  * offset has its own meaning.
71  */
72 __printf(3, 4)
73 __cold
74 static void file_extent_err(const struct extent_buffer *eb, int slot,
75 			    const char *fmt, ...)
76 {
77 	const struct btrfs_fs_info *fs_info = eb->fs_info;
78 	struct btrfs_key key;
79 	struct va_format vaf;
80 	va_list args;
81 
82 	btrfs_item_key_to_cpu(eb, &key, slot);
83 	va_start(args, fmt);
84 
85 	vaf.fmt = fmt;
86 	vaf.va = &args;
87 
88 	btrfs_crit(fs_info,
89 	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV",
90 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
91 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
92 		key.objectid, key.offset, &vaf);
93 	va_end(args);
94 }
95 
96 /*
97  * Return 0 if the btrfs_file_extent_##name is aligned to @alignment
98  * Else return 1
99  */
100 #define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment)		      \
101 ({									      \
102 	if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \
103 		file_extent_err((leaf), (slot),				      \
104 	"invalid %s for file extent, have %llu, should be aligned to %u",     \
105 			(#name), btrfs_file_extent_##name((leaf), (fi)),      \
106 			(alignment));					      \
107 	(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment)));   \
108 })
109 
110 static int check_extent_data_item(struct extent_buffer *leaf,
111 				  struct btrfs_key *key, int slot)
112 {
113 	struct btrfs_fs_info *fs_info = leaf->fs_info;
114 	struct btrfs_file_extent_item *fi;
115 	u32 sectorsize = fs_info->sectorsize;
116 	u32 item_size = btrfs_item_size_nr(leaf, slot);
117 
118 	if (!IS_ALIGNED(key->offset, sectorsize)) {
119 		file_extent_err(leaf, slot,
120 "unaligned file_offset for file extent, have %llu should be aligned to %u",
121 			key->offset, sectorsize);
122 		return -EUCLEAN;
123 	}
124 
125 	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
126 
127 	if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) {
128 		file_extent_err(leaf, slot,
129 		"invalid type for file extent, have %u expect range [0, %u]",
130 			btrfs_file_extent_type(leaf, fi),
131 			BTRFS_FILE_EXTENT_TYPES);
132 		return -EUCLEAN;
133 	}
134 
135 	/*
136 	 * Support for new compression/encryption must introduce incompat flag,
137 	 * and must be caught in open_ctree().
138 	 */
139 	if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) {
140 		file_extent_err(leaf, slot,
141 	"invalid compression for file extent, have %u expect range [0, %u]",
142 			btrfs_file_extent_compression(leaf, fi),
143 			BTRFS_COMPRESS_TYPES);
144 		return -EUCLEAN;
145 	}
146 	if (btrfs_file_extent_encryption(leaf, fi)) {
147 		file_extent_err(leaf, slot,
148 			"invalid encryption for file extent, have %u expect 0",
149 			btrfs_file_extent_encryption(leaf, fi));
150 		return -EUCLEAN;
151 	}
152 	if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
153 		/* Inline extent must have 0 as key offset */
154 		if (key->offset) {
155 			file_extent_err(leaf, slot,
156 		"invalid file_offset for inline file extent, have %llu expect 0",
157 				key->offset);
158 			return -EUCLEAN;
159 		}
160 
161 		/* Compressed inline extent has no on-disk size, skip it */
162 		if (btrfs_file_extent_compression(leaf, fi) !=
163 		    BTRFS_COMPRESS_NONE)
164 			return 0;
165 
166 		/* Uncompressed inline extent size must match item size */
167 		if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START +
168 		    btrfs_file_extent_ram_bytes(leaf, fi)) {
169 			file_extent_err(leaf, slot,
170 	"invalid ram_bytes for uncompressed inline extent, have %u expect %llu",
171 				item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START +
172 				btrfs_file_extent_ram_bytes(leaf, fi));
173 			return -EUCLEAN;
174 		}
175 		return 0;
176 	}
177 
178 	/* Regular or preallocated extent has fixed item size */
179 	if (item_size != sizeof(*fi)) {
180 		file_extent_err(leaf, slot,
181 	"invalid item size for reg/prealloc file extent, have %u expect %zu",
182 			item_size, sizeof(*fi));
183 		return -EUCLEAN;
184 	}
185 	if (CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) ||
186 	    CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) ||
187 	    CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) ||
188 	    CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) ||
189 	    CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))
190 		return -EUCLEAN;
191 	return 0;
192 }
193 
194 static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
195 			   int slot)
196 {
197 	struct btrfs_fs_info *fs_info = leaf->fs_info;
198 	u32 sectorsize = fs_info->sectorsize;
199 	u32 csumsize = btrfs_super_csum_size(fs_info->super_copy);
200 
201 	if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) {
202 		generic_err(leaf, slot,
203 		"invalid key objectid for csum item, have %llu expect %llu",
204 			key->objectid, BTRFS_EXTENT_CSUM_OBJECTID);
205 		return -EUCLEAN;
206 	}
207 	if (!IS_ALIGNED(key->offset, sectorsize)) {
208 		generic_err(leaf, slot,
209 	"unaligned key offset for csum item, have %llu should be aligned to %u",
210 			key->offset, sectorsize);
211 		return -EUCLEAN;
212 	}
213 	if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) {
214 		generic_err(leaf, slot,
215 	"unaligned item size for csum item, have %u should be aligned to %u",
216 			btrfs_item_size_nr(leaf, slot), csumsize);
217 		return -EUCLEAN;
218 	}
219 	return 0;
220 }
221 
222 /*
223  * Customized reported for dir_item, only important new info is key->objectid,
224  * which represents inode number
225  */
226 __printf(3, 4)
227 __cold
228 static void dir_item_err(const struct extent_buffer *eb, int slot,
229 			 const char *fmt, ...)
230 {
231 	const struct btrfs_fs_info *fs_info = eb->fs_info;
232 	struct btrfs_key key;
233 	struct va_format vaf;
234 	va_list args;
235 
236 	btrfs_item_key_to_cpu(eb, &key, slot);
237 	va_start(args, fmt);
238 
239 	vaf.fmt = fmt;
240 	vaf.va = &args;
241 
242 	btrfs_crit(fs_info,
243 	"corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV",
244 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
245 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
246 		key.objectid, &vaf);
247 	va_end(args);
248 }
249 
250 static int check_dir_item(struct extent_buffer *leaf,
251 			  struct btrfs_key *key, int slot)
252 {
253 	struct btrfs_fs_info *fs_info = leaf->fs_info;
254 	struct btrfs_dir_item *di;
255 	u32 item_size = btrfs_item_size_nr(leaf, slot);
256 	u32 cur = 0;
257 
258 	di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
259 	while (cur < item_size) {
260 		u32 name_len;
261 		u32 data_len;
262 		u32 max_name_len;
263 		u32 total_size;
264 		u32 name_hash;
265 		u8 dir_type;
266 
267 		/* header itself should not cross item boundary */
268 		if (cur + sizeof(*di) > item_size) {
269 			dir_item_err(leaf, slot,
270 		"dir item header crosses item boundary, have %zu boundary %u",
271 				cur + sizeof(*di), item_size);
272 			return -EUCLEAN;
273 		}
274 
275 		/* dir type check */
276 		dir_type = btrfs_dir_type(leaf, di);
277 		if (dir_type >= BTRFS_FT_MAX) {
278 			dir_item_err(leaf, slot,
279 			"invalid dir item type, have %u expect [0, %u)",
280 				dir_type, BTRFS_FT_MAX);
281 			return -EUCLEAN;
282 		}
283 
284 		if (key->type == BTRFS_XATTR_ITEM_KEY &&
285 		    dir_type != BTRFS_FT_XATTR) {
286 			dir_item_err(leaf, slot,
287 		"invalid dir item type for XATTR key, have %u expect %u",
288 				dir_type, BTRFS_FT_XATTR);
289 			return -EUCLEAN;
290 		}
291 		if (dir_type == BTRFS_FT_XATTR &&
292 		    key->type != BTRFS_XATTR_ITEM_KEY) {
293 			dir_item_err(leaf, slot,
294 			"xattr dir type found for non-XATTR key");
295 			return -EUCLEAN;
296 		}
297 		if (dir_type == BTRFS_FT_XATTR)
298 			max_name_len = XATTR_NAME_MAX;
299 		else
300 			max_name_len = BTRFS_NAME_LEN;
301 
302 		/* Name/data length check */
303 		name_len = btrfs_dir_name_len(leaf, di);
304 		data_len = btrfs_dir_data_len(leaf, di);
305 		if (name_len > max_name_len) {
306 			dir_item_err(leaf, slot,
307 			"dir item name len too long, have %u max %u",
308 				name_len, max_name_len);
309 			return -EUCLEAN;
310 		}
311 		if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info)) {
312 			dir_item_err(leaf, slot,
313 			"dir item name and data len too long, have %u max %u",
314 				name_len + data_len,
315 				BTRFS_MAX_XATTR_SIZE(fs_info));
316 			return -EUCLEAN;
317 		}
318 
319 		if (data_len && dir_type != BTRFS_FT_XATTR) {
320 			dir_item_err(leaf, slot,
321 			"dir item with invalid data len, have %u expect 0",
322 				data_len);
323 			return -EUCLEAN;
324 		}
325 
326 		total_size = sizeof(*di) + name_len + data_len;
327 
328 		/* header and name/data should not cross item boundary */
329 		if (cur + total_size > item_size) {
330 			dir_item_err(leaf, slot,
331 		"dir item data crosses item boundary, have %u boundary %u",
332 				cur + total_size, item_size);
333 			return -EUCLEAN;
334 		}
335 
336 		/*
337 		 * Special check for XATTR/DIR_ITEM, as key->offset is name
338 		 * hash, should match its name
339 		 */
340 		if (key->type == BTRFS_DIR_ITEM_KEY ||
341 		    key->type == BTRFS_XATTR_ITEM_KEY) {
342 			char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)];
343 
344 			read_extent_buffer(leaf, namebuf,
345 					(unsigned long)(di + 1), name_len);
346 			name_hash = btrfs_name_hash(namebuf, name_len);
347 			if (key->offset != name_hash) {
348 				dir_item_err(leaf, slot,
349 		"name hash mismatch with key, have 0x%016x expect 0x%016llx",
350 					name_hash, key->offset);
351 				return -EUCLEAN;
352 			}
353 		}
354 		cur += total_size;
355 		di = (struct btrfs_dir_item *)((void *)di + total_size);
356 	}
357 	return 0;
358 }
359 
360 __printf(3, 4)
361 __cold
362 static void block_group_err(const struct extent_buffer *eb, int slot,
363 			    const char *fmt, ...)
364 {
365 	const struct btrfs_fs_info *fs_info = eb->fs_info;
366 	struct btrfs_key key;
367 	struct va_format vaf;
368 	va_list args;
369 
370 	btrfs_item_key_to_cpu(eb, &key, slot);
371 	va_start(args, fmt);
372 
373 	vaf.fmt = fmt;
374 	vaf.va = &args;
375 
376 	btrfs_crit(fs_info,
377 	"corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV",
378 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
379 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
380 		key.objectid, key.offset, &vaf);
381 	va_end(args);
382 }
383 
384 static int check_block_group_item(struct extent_buffer *leaf,
385 				  struct btrfs_key *key, int slot)
386 {
387 	struct btrfs_block_group_item bgi;
388 	u32 item_size = btrfs_item_size_nr(leaf, slot);
389 	u64 flags;
390 	u64 type;
391 
392 	/*
393 	 * Here we don't really care about alignment since extent allocator can
394 	 * handle it.  We care more about the size.
395 	 */
396 	if (key->offset == 0) {
397 		block_group_err(leaf, slot,
398 				"invalid block group size 0");
399 		return -EUCLEAN;
400 	}
401 
402 	if (item_size != sizeof(bgi)) {
403 		block_group_err(leaf, slot,
404 			"invalid item size, have %u expect %zu",
405 				item_size, sizeof(bgi));
406 		return -EUCLEAN;
407 	}
408 
409 	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
410 			   sizeof(bgi));
411 	if (btrfs_block_group_chunk_objectid(&bgi) !=
412 	    BTRFS_FIRST_CHUNK_TREE_OBJECTID) {
413 		block_group_err(leaf, slot,
414 		"invalid block group chunk objectid, have %llu expect %llu",
415 				btrfs_block_group_chunk_objectid(&bgi),
416 				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
417 		return -EUCLEAN;
418 	}
419 
420 	if (btrfs_block_group_used(&bgi) > key->offset) {
421 		block_group_err(leaf, slot,
422 			"invalid block group used, have %llu expect [0, %llu)",
423 				btrfs_block_group_used(&bgi), key->offset);
424 		return -EUCLEAN;
425 	}
426 
427 	flags = btrfs_block_group_flags(&bgi);
428 	if (hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1) {
429 		block_group_err(leaf, slot,
430 "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set",
431 			flags & BTRFS_BLOCK_GROUP_PROFILE_MASK,
432 			hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK));
433 		return -EUCLEAN;
434 	}
435 
436 	type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK;
437 	if (type != BTRFS_BLOCK_GROUP_DATA &&
438 	    type != BTRFS_BLOCK_GROUP_METADATA &&
439 	    type != BTRFS_BLOCK_GROUP_SYSTEM &&
440 	    type != (BTRFS_BLOCK_GROUP_METADATA |
441 			   BTRFS_BLOCK_GROUP_DATA)) {
442 		block_group_err(leaf, slot,
443 "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
444 			type, hweight64(type),
445 			BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
446 			BTRFS_BLOCK_GROUP_SYSTEM,
447 			BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA);
448 		return -EUCLEAN;
449 	}
450 	return 0;
451 }
452 
453 __printf(4, 5)
454 __cold
455 static void chunk_err(const struct extent_buffer *leaf,
456 		      const struct btrfs_chunk *chunk, u64 logical,
457 		      const char *fmt, ...)
458 {
459 	const struct btrfs_fs_info *fs_info = leaf->fs_info;
460 	bool is_sb;
461 	struct va_format vaf;
462 	va_list args;
463 	int i;
464 	int slot = -1;
465 
466 	/* Only superblock eb is able to have such small offset */
467 	is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET);
468 
469 	if (!is_sb) {
470 		/*
471 		 * Get the slot number by iterating through all slots, this
472 		 * would provide better readability.
473 		 */
474 		for (i = 0; i < btrfs_header_nritems(leaf); i++) {
475 			if (btrfs_item_ptr_offset(leaf, i) ==
476 					(unsigned long)chunk) {
477 				slot = i;
478 				break;
479 			}
480 		}
481 	}
482 	va_start(args, fmt);
483 	vaf.fmt = fmt;
484 	vaf.va = &args;
485 
486 	if (is_sb)
487 		btrfs_crit(fs_info,
488 		"corrupt superblock syschunk array: chunk_start=%llu, %pV",
489 			   logical, &vaf);
490 	else
491 		btrfs_crit(fs_info,
492 	"corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV",
493 			   BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot,
494 			   logical, &vaf);
495 	va_end(args);
496 }
497 
498 /*
499  * The common chunk check which could also work on super block sys chunk array.
500  *
501  * Return -EUCLEAN if anything is corrupted.
502  * Return 0 if everything is OK.
503  */
504 int btrfs_check_chunk_valid(struct extent_buffer *leaf,
505 			    struct btrfs_chunk *chunk, u64 logical)
506 {
507 	struct btrfs_fs_info *fs_info = leaf->fs_info;
508 	u64 length;
509 	u64 stripe_len;
510 	u16 num_stripes;
511 	u16 sub_stripes;
512 	u64 type;
513 	u64 features;
514 	bool mixed = false;
515 
516 	length = btrfs_chunk_length(leaf, chunk);
517 	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
518 	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
519 	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
520 	type = btrfs_chunk_type(leaf, chunk);
521 
522 	if (!num_stripes) {
523 		chunk_err(leaf, chunk, logical,
524 			  "invalid chunk num_stripes, have %u", num_stripes);
525 		return -EUCLEAN;
526 	}
527 	if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
528 		chunk_err(leaf, chunk, logical,
529 		"invalid chunk logical, have %llu should aligned to %u",
530 			  logical, fs_info->sectorsize);
531 		return -EUCLEAN;
532 	}
533 	if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
534 		chunk_err(leaf, chunk, logical,
535 			  "invalid chunk sectorsize, have %u expect %u",
536 			  btrfs_chunk_sector_size(leaf, chunk),
537 			  fs_info->sectorsize);
538 		return -EUCLEAN;
539 	}
540 	if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
541 		chunk_err(leaf, chunk, logical,
542 			  "invalid chunk length, have %llu", length);
543 		return -EUCLEAN;
544 	}
545 	if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
546 		chunk_err(leaf, chunk, logical,
547 			  "invalid chunk stripe length: %llu",
548 			  stripe_len);
549 		return -EUCLEAN;
550 	}
551 	if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
552 	    type) {
553 		chunk_err(leaf, chunk, logical,
554 			  "unrecognized chunk type: 0x%llx",
555 			  ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
556 			    BTRFS_BLOCK_GROUP_PROFILE_MASK) &
557 			  btrfs_chunk_type(leaf, chunk));
558 		return -EUCLEAN;
559 	}
560 
561 	if (!is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
562 	    (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) {
563 		chunk_err(leaf, chunk, logical,
564 		"invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set",
565 			  type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
566 		return -EUCLEAN;
567 	}
568 	if ((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0) {
569 		chunk_err(leaf, chunk, logical,
570 	"missing chunk type flag, have 0x%llx one bit must be set in 0x%llx",
571 			  type, BTRFS_BLOCK_GROUP_TYPE_MASK);
572 		return -EUCLEAN;
573 	}
574 
575 	if ((type & BTRFS_BLOCK_GROUP_SYSTEM) &&
576 	    (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA))) {
577 		chunk_err(leaf, chunk, logical,
578 			  "system chunk with data or metadata type: 0x%llx",
579 			  type);
580 		return -EUCLEAN;
581 	}
582 
583 	features = btrfs_super_incompat_flags(fs_info->super_copy);
584 	if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
585 		mixed = true;
586 
587 	if (!mixed) {
588 		if ((type & BTRFS_BLOCK_GROUP_METADATA) &&
589 		    (type & BTRFS_BLOCK_GROUP_DATA)) {
590 			chunk_err(leaf, chunk, logical,
591 			"mixed chunk type in non-mixed mode: 0x%llx", type);
592 			return -EUCLEAN;
593 		}
594 	}
595 
596 	if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
597 	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes != 2) ||
598 	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
599 	    (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
600 	    (type & BTRFS_BLOCK_GROUP_DUP && num_stripes != 2) ||
601 	    ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && num_stripes != 1)) {
602 		chunk_err(leaf, chunk, logical,
603 			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
604 			num_stripes, sub_stripes,
605 			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
606 		return -EUCLEAN;
607 	}
608 
609 	return 0;
610 }
611 
612 __printf(3, 4)
613 __cold
614 static void dev_item_err(const struct extent_buffer *eb, int slot,
615 			 const char *fmt, ...)
616 {
617 	struct btrfs_key key;
618 	struct va_format vaf;
619 	va_list args;
620 
621 	btrfs_item_key_to_cpu(eb, &key, slot);
622 	va_start(args, fmt);
623 
624 	vaf.fmt = fmt;
625 	vaf.va = &args;
626 
627 	btrfs_crit(eb->fs_info,
628 	"corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV",
629 		btrfs_header_level(eb) == 0 ? "leaf" : "node",
630 		btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot,
631 		key.objectid, &vaf);
632 	va_end(args);
633 }
634 
635 static int check_dev_item(struct extent_buffer *leaf,
636 			  struct btrfs_key *key, int slot)
637 {
638 	struct btrfs_fs_info *fs_info = leaf->fs_info;
639 	struct btrfs_dev_item *ditem;
640 	u64 max_devid = max(BTRFS_MAX_DEVS(fs_info), BTRFS_MAX_DEVS_SYS_CHUNK);
641 
642 	if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) {
643 		dev_item_err(leaf, slot,
644 			     "invalid objectid: has=%llu expect=%llu",
645 			     key->objectid, BTRFS_DEV_ITEMS_OBJECTID);
646 		return -EUCLEAN;
647 	}
648 	if (key->offset > max_devid) {
649 		dev_item_err(leaf, slot,
650 			     "invalid devid: has=%llu expect=[0, %llu]",
651 			     key->offset, max_devid);
652 		return -EUCLEAN;
653 	}
654 	ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item);
655 	if (btrfs_device_id(leaf, ditem) != key->offset) {
656 		dev_item_err(leaf, slot,
657 			     "devid mismatch: key has=%llu item has=%llu",
658 			     key->offset, btrfs_device_id(leaf, ditem));
659 		return -EUCLEAN;
660 	}
661 
662 	/*
663 	 * For device total_bytes, we don't have reliable way to check it, as
664 	 * it can be 0 for device removal. Device size check can only be done
665 	 * by dev extents check.
666 	 */
667 	if (btrfs_device_bytes_used(leaf, ditem) >
668 	    btrfs_device_total_bytes(leaf, ditem)) {
669 		dev_item_err(leaf, slot,
670 			     "invalid bytes used: have %llu expect [0, %llu]",
671 			     btrfs_device_bytes_used(leaf, ditem),
672 			     btrfs_device_total_bytes(leaf, ditem));
673 		return -EUCLEAN;
674 	}
675 	/*
676 	 * Remaining members like io_align/type/gen/dev_group aren't really
677 	 * utilized.  Skip them to make later usage of them easier.
678 	 */
679 	return 0;
680 }
681 
682 /* Inode item error output has the same format as dir_item_err() */
683 #define inode_item_err(fs_info, eb, slot, fmt, ...)			\
684 	dir_item_err(eb, slot, fmt, __VA_ARGS__)
685 
686 static int check_inode_item(struct extent_buffer *leaf,
687 			    struct btrfs_key *key, int slot)
688 {
689 	struct btrfs_fs_info *fs_info = leaf->fs_info;
690 	struct btrfs_inode_item *iitem;
691 	u64 super_gen = btrfs_super_generation(fs_info->super_copy);
692 	u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
693 	u32 mode;
694 
695 	if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
696 	     key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
697 	    key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
698 	    key->objectid != BTRFS_FREE_INO_OBJECTID) {
699 		generic_err(leaf, slot,
700 	"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
701 			    key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
702 			    BTRFS_FIRST_FREE_OBJECTID,
703 			    BTRFS_LAST_FREE_OBJECTID,
704 			    BTRFS_FREE_INO_OBJECTID);
705 		return -EUCLEAN;
706 	}
707 	if (key->offset != 0) {
708 		inode_item_err(fs_info, leaf, slot,
709 			"invalid key offset: has %llu expect 0",
710 			key->offset);
711 		return -EUCLEAN;
712 	}
713 	iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
714 
715 	/* Here we use super block generation + 1 to handle log tree */
716 	if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
717 		inode_item_err(fs_info, leaf, slot,
718 			"invalid inode generation: has %llu expect (0, %llu]",
719 			       btrfs_inode_generation(leaf, iitem),
720 			       super_gen + 1);
721 		return -EUCLEAN;
722 	}
723 	/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
724 	if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
725 		inode_item_err(fs_info, leaf, slot,
726 			"invalid inode generation: has %llu expect [0, %llu]",
727 			       btrfs_inode_transid(leaf, iitem), super_gen + 1);
728 		return -EUCLEAN;
729 	}
730 
731 	/*
732 	 * For size and nbytes it's better not to be too strict, as for dir
733 	 * item its size/nbytes can easily get wrong, but doesn't affect
734 	 * anything in the fs. So here we skip the check.
735 	 */
736 	mode = btrfs_inode_mode(leaf, iitem);
737 	if (mode & ~valid_mask) {
738 		inode_item_err(fs_info, leaf, slot,
739 			       "unknown mode bit detected: 0x%x",
740 			       mode & ~valid_mask);
741 		return -EUCLEAN;
742 	}
743 
744 	/*
745 	 * S_IFMT is not bit mapped so we can't completely rely on is_power_of_2,
746 	 * but is_power_of_2() can save us from checking FIFO/CHR/DIR/REG.
747 	 * Only needs to check BLK, LNK and SOCKS
748 	 */
749 	if (!is_power_of_2(mode & S_IFMT)) {
750 		if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
751 			inode_item_err(fs_info, leaf, slot,
752 			"invalid mode: has 0%o expect valid S_IF* bit(s)",
753 				       mode & S_IFMT);
754 			return -EUCLEAN;
755 		}
756 	}
757 	if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
758 		inode_item_err(fs_info, leaf, slot,
759 		       "invalid nlink: has %u expect no more than 1 for dir",
760 			btrfs_inode_nlink(leaf, iitem));
761 		return -EUCLEAN;
762 	}
763 	if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
764 		inode_item_err(fs_info, leaf, slot,
765 			       "unknown flags detected: 0x%llx",
766 			       btrfs_inode_flags(leaf, iitem) &
767 			       ~BTRFS_INODE_FLAG_MASK);
768 		return -EUCLEAN;
769 	}
770 	return 0;
771 }
772 
773 /*
774  * Common point to switch the item-specific validation.
775  */
776 static int check_leaf_item(struct extent_buffer *leaf,
777 			   struct btrfs_key *key, int slot)
778 {
779 	int ret = 0;
780 	struct btrfs_chunk *chunk;
781 
782 	switch (key->type) {
783 	case BTRFS_EXTENT_DATA_KEY:
784 		ret = check_extent_data_item(leaf, key, slot);
785 		break;
786 	case BTRFS_EXTENT_CSUM_KEY:
787 		ret = check_csum_item(leaf, key, slot);
788 		break;
789 	case BTRFS_DIR_ITEM_KEY:
790 	case BTRFS_DIR_INDEX_KEY:
791 	case BTRFS_XATTR_ITEM_KEY:
792 		ret = check_dir_item(leaf, key, slot);
793 		break;
794 	case BTRFS_BLOCK_GROUP_ITEM_KEY:
795 		ret = check_block_group_item(leaf, key, slot);
796 		break;
797 	case BTRFS_CHUNK_ITEM_KEY:
798 		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
799 		ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
800 		break;
801 	case BTRFS_DEV_ITEM_KEY:
802 		ret = check_dev_item(leaf, key, slot);
803 		break;
804 	case BTRFS_INODE_ITEM_KEY:
805 		ret = check_inode_item(leaf, key, slot);
806 		break;
807 	}
808 	return ret;
809 }
810 
811 static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
812 {
813 	struct btrfs_fs_info *fs_info = leaf->fs_info;
814 	/* No valid key type is 0, so all key should be larger than this key */
815 	struct btrfs_key prev_key = {0, 0, 0};
816 	struct btrfs_key key;
817 	u32 nritems = btrfs_header_nritems(leaf);
818 	int slot;
819 
820 	if (btrfs_header_level(leaf) != 0) {
821 		generic_err(leaf, 0,
822 			"invalid level for leaf, have %d expect 0",
823 			btrfs_header_level(leaf));
824 		return -EUCLEAN;
825 	}
826 
827 	/*
828 	 * Extent buffers from a relocation tree have a owner field that
829 	 * corresponds to the subvolume tree they are based on. So just from an
830 	 * extent buffer alone we can not find out what is the id of the
831 	 * corresponding subvolume tree, so we can not figure out if the extent
832 	 * buffer corresponds to the root of the relocation tree or not. So
833 	 * skip this check for relocation trees.
834 	 */
835 	if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) {
836 		u64 owner = btrfs_header_owner(leaf);
837 
838 		/* These trees must never be empty */
839 		if (owner == BTRFS_ROOT_TREE_OBJECTID ||
840 		    owner == BTRFS_CHUNK_TREE_OBJECTID ||
841 		    owner == BTRFS_EXTENT_TREE_OBJECTID ||
842 		    owner == BTRFS_DEV_TREE_OBJECTID ||
843 		    owner == BTRFS_FS_TREE_OBJECTID ||
844 		    owner == BTRFS_DATA_RELOC_TREE_OBJECTID) {
845 			generic_err(leaf, 0,
846 			"invalid root, root %llu must never be empty",
847 				    owner);
848 			return -EUCLEAN;
849 		}
850 		return 0;
851 	}
852 
853 	if (nritems == 0)
854 		return 0;
855 
856 	/*
857 	 * Check the following things to make sure this is a good leaf, and
858 	 * leaf users won't need to bother with similar sanity checks:
859 	 *
860 	 * 1) key ordering
861 	 * 2) item offset and size
862 	 *    No overlap, no hole, all inside the leaf.
863 	 * 3) item content
864 	 *    If possible, do comprehensive sanity check.
865 	 *    NOTE: All checks must only rely on the item data itself.
866 	 */
867 	for (slot = 0; slot < nritems; slot++) {
868 		u32 item_end_expected;
869 		int ret;
870 
871 		btrfs_item_key_to_cpu(leaf, &key, slot);
872 
873 		/* Make sure the keys are in the right order */
874 		if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) {
875 			generic_err(leaf, slot,
876 	"bad key order, prev (%llu %u %llu) current (%llu %u %llu)",
877 				prev_key.objectid, prev_key.type,
878 				prev_key.offset, key.objectid, key.type,
879 				key.offset);
880 			return -EUCLEAN;
881 		}
882 
883 		/*
884 		 * Make sure the offset and ends are right, remember that the
885 		 * item data starts at the end of the leaf and grows towards the
886 		 * front.
887 		 */
888 		if (slot == 0)
889 			item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info);
890 		else
891 			item_end_expected = btrfs_item_offset_nr(leaf,
892 								 slot - 1);
893 		if (btrfs_item_end_nr(leaf, slot) != item_end_expected) {
894 			generic_err(leaf, slot,
895 				"unexpected item end, have %u expect %u",
896 				btrfs_item_end_nr(leaf, slot),
897 				item_end_expected);
898 			return -EUCLEAN;
899 		}
900 
901 		/*
902 		 * Check to make sure that we don't point outside of the leaf,
903 		 * just in case all the items are consistent to each other, but
904 		 * all point outside of the leaf.
905 		 */
906 		if (btrfs_item_end_nr(leaf, slot) >
907 		    BTRFS_LEAF_DATA_SIZE(fs_info)) {
908 			generic_err(leaf, slot,
909 			"slot end outside of leaf, have %u expect range [0, %u]",
910 				btrfs_item_end_nr(leaf, slot),
911 				BTRFS_LEAF_DATA_SIZE(fs_info));
912 			return -EUCLEAN;
913 		}
914 
915 		/* Also check if the item pointer overlaps with btrfs item. */
916 		if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) >
917 		    btrfs_item_ptr_offset(leaf, slot)) {
918 			generic_err(leaf, slot,
919 		"slot overlaps with its data, item end %lu data start %lu",
920 				btrfs_item_nr_offset(slot) +
921 				sizeof(struct btrfs_item),
922 				btrfs_item_ptr_offset(leaf, slot));
923 			return -EUCLEAN;
924 		}
925 
926 		if (check_item_data) {
927 			/*
928 			 * Check if the item size and content meet other
929 			 * criteria
930 			 */
931 			ret = check_leaf_item(leaf, &key, slot);
932 			if (ret < 0)
933 				return ret;
934 		}
935 
936 		prev_key.objectid = key.objectid;
937 		prev_key.type = key.type;
938 		prev_key.offset = key.offset;
939 	}
940 
941 	return 0;
942 }
943 
944 int btrfs_check_leaf_full(struct extent_buffer *leaf)
945 {
946 	return check_leaf(leaf, true);
947 }
948 ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
949 
950 int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
951 {
952 	return check_leaf(leaf, false);
953 }
954 
955 int btrfs_check_node(struct extent_buffer *node)
956 {
957 	struct btrfs_fs_info *fs_info = node->fs_info;
958 	unsigned long nr = btrfs_header_nritems(node);
959 	struct btrfs_key key, next_key;
960 	int slot;
961 	int level = btrfs_header_level(node);
962 	u64 bytenr;
963 	int ret = 0;
964 
965 	if (level <= 0 || level >= BTRFS_MAX_LEVEL) {
966 		generic_err(node, 0,
967 			"invalid level for node, have %d expect [1, %d]",
968 			level, BTRFS_MAX_LEVEL - 1);
969 		return -EUCLEAN;
970 	}
971 	if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info)) {
972 		btrfs_crit(fs_info,
973 "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]",
974 			   btrfs_header_owner(node), node->start,
975 			   nr == 0 ? "small" : "large", nr,
976 			   BTRFS_NODEPTRS_PER_BLOCK(fs_info));
977 		return -EUCLEAN;
978 	}
979 
980 	for (slot = 0; slot < nr - 1; slot++) {
981 		bytenr = btrfs_node_blockptr(node, slot);
982 		btrfs_node_key_to_cpu(node, &key, slot);
983 		btrfs_node_key_to_cpu(node, &next_key, slot + 1);
984 
985 		if (!bytenr) {
986 			generic_err(node, slot,
987 				"invalid NULL node pointer");
988 			ret = -EUCLEAN;
989 			goto out;
990 		}
991 		if (!IS_ALIGNED(bytenr, fs_info->sectorsize)) {
992 			generic_err(node, slot,
993 			"unaligned pointer, have %llu should be aligned to %u",
994 				bytenr, fs_info->sectorsize);
995 			ret = -EUCLEAN;
996 			goto out;
997 		}
998 
999 		if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
1000 			generic_err(node, slot,
1001 	"bad key order, current (%llu %u %llu) next (%llu %u %llu)",
1002 				key.objectid, key.type, key.offset,
1003 				next_key.objectid, next_key.type,
1004 				next_key.offset);
1005 			ret = -EUCLEAN;
1006 			goto out;
1007 		}
1008 	}
1009 out:
1010 	return ret;
1011 }
1012 ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
1013