1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) Qu Wenruo 2017. All rights reserved. 4 */ 5 6 /* 7 * The module is used to catch unexpected/corrupted tree block data. 8 * Such behavior can be caused either by a fuzzed image or bugs. 9 * 10 * The objective is to do leaf/node validation checks when tree block is read 11 * from disk, and check *every* possible member, so other code won't 12 * need to checking them again. 13 * 14 * Due to the potential and unwanted damage, every checker needs to be 15 * carefully reviewed otherwise so it does not prevent mount of valid images. 16 */ 17 18 #include <linux/types.h> 19 #include <linux/stddef.h> 20 #include <linux/error-injection.h> 21 #include "messages.h" 22 #include "ctree.h" 23 #include "tree-checker.h" 24 #include "disk-io.h" 25 #include "compression.h" 26 #include "volumes.h" 27 #include "misc.h" 28 #include "fs.h" 29 #include "accessors.h" 30 #include "file-item.h" 31 #include "inode-item.h" 32 #include "extent-tree.h" 33 34 /* 35 * Error message should follow the following format: 36 * corrupt <type>: <identifier>, <reason>[, <bad_value>] 37 * 38 * @type: leaf or node 39 * @identifier: the necessary info to locate the leaf/node. 40 * It's recommended to decode key.objecitd/offset if it's 41 * meaningful. 42 * @reason: describe the error 43 * @bad_value: optional, it's recommended to output bad value and its 44 * expected value (range). 45 * 46 * Since comma is used to separate the components, only space is allowed 47 * inside each component. 48 */ 49 50 /* 51 * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. 52 * Allows callers to customize the output. 53 */ 54 __printf(3, 4) 55 __cold 56 static void generic_err(const struct extent_buffer *eb, int slot, 57 const char *fmt, ...) 58 { 59 const struct btrfs_fs_info *fs_info = eb->fs_info; 60 struct va_format vaf; 61 va_list args; 62 63 va_start(args, fmt); 64 65 vaf.fmt = fmt; 66 vaf.va = &args; 67 68 btrfs_crit(fs_info, 69 "corrupt %s: root=%llu block=%llu slot=%d, %pV", 70 btrfs_header_level(eb) == 0 ? "leaf" : "node", 71 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, &vaf); 72 va_end(args); 73 } 74 75 /* 76 * Customized reporter for extent data item, since its key objectid and 77 * offset has its own meaning. 78 */ 79 __printf(3, 4) 80 __cold 81 static void file_extent_err(const struct extent_buffer *eb, int slot, 82 const char *fmt, ...) 83 { 84 const struct btrfs_fs_info *fs_info = eb->fs_info; 85 struct btrfs_key key; 86 struct va_format vaf; 87 va_list args; 88 89 btrfs_item_key_to_cpu(eb, &key, slot); 90 va_start(args, fmt); 91 92 vaf.fmt = fmt; 93 vaf.va = &args; 94 95 btrfs_crit(fs_info, 96 "corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV", 97 btrfs_header_level(eb) == 0 ? "leaf" : "node", 98 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 99 key.objectid, key.offset, &vaf); 100 va_end(args); 101 } 102 103 /* 104 * Return 0 if the btrfs_file_extent_##name is aligned to @alignment 105 * Else return 1 106 */ 107 #define CHECK_FE_ALIGNED(leaf, slot, fi, name, alignment) \ 108 ({ \ 109 if (unlikely(!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), \ 110 (alignment)))) \ 111 file_extent_err((leaf), (slot), \ 112 "invalid %s for file extent, have %llu, should be aligned to %u", \ 113 (#name), btrfs_file_extent_##name((leaf), (fi)), \ 114 (alignment)); \ 115 (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ 116 }) 117 118 static u64 file_extent_end(struct extent_buffer *leaf, 119 struct btrfs_key *key, 120 struct btrfs_file_extent_item *extent) 121 { 122 u64 end; 123 u64 len; 124 125 if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { 126 len = btrfs_file_extent_ram_bytes(leaf, extent); 127 end = ALIGN(key->offset + len, leaf->fs_info->sectorsize); 128 } else { 129 len = btrfs_file_extent_num_bytes(leaf, extent); 130 end = key->offset + len; 131 } 132 return end; 133 } 134 135 /* 136 * Customized report for dir_item, the only new important information is 137 * key->objectid, which represents inode number 138 */ 139 __printf(3, 4) 140 __cold 141 static void dir_item_err(const struct extent_buffer *eb, int slot, 142 const char *fmt, ...) 143 { 144 const struct btrfs_fs_info *fs_info = eb->fs_info; 145 struct btrfs_key key; 146 struct va_format vaf; 147 va_list args; 148 149 btrfs_item_key_to_cpu(eb, &key, slot); 150 va_start(args, fmt); 151 152 vaf.fmt = fmt; 153 vaf.va = &args; 154 155 btrfs_crit(fs_info, 156 "corrupt %s: root=%llu block=%llu slot=%d ino=%llu, %pV", 157 btrfs_header_level(eb) == 0 ? "leaf" : "node", 158 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 159 key.objectid, &vaf); 160 va_end(args); 161 } 162 163 /* 164 * This functions checks prev_key->objectid, to ensure current key and prev_key 165 * share the same objectid as inode number. 166 * 167 * This is to detect missing INODE_ITEM in subvolume trees. 168 * 169 * Return true if everything is OK or we don't need to check. 170 * Return false if anything is wrong. 171 */ 172 static bool check_prev_ino(struct extent_buffer *leaf, 173 struct btrfs_key *key, int slot, 174 struct btrfs_key *prev_key) 175 { 176 /* No prev key, skip check */ 177 if (slot == 0) 178 return true; 179 180 /* Only these key->types needs to be checked */ 181 ASSERT(key->type == BTRFS_XATTR_ITEM_KEY || 182 key->type == BTRFS_INODE_REF_KEY || 183 key->type == BTRFS_DIR_INDEX_KEY || 184 key->type == BTRFS_DIR_ITEM_KEY || 185 key->type == BTRFS_EXTENT_DATA_KEY); 186 187 /* 188 * Only subvolume trees along with their reloc trees need this check. 189 * Things like log tree doesn't follow this ino requirement. 190 */ 191 if (!is_fstree(btrfs_header_owner(leaf))) 192 return true; 193 194 if (key->objectid == prev_key->objectid) 195 return true; 196 197 /* Error found */ 198 dir_item_err(leaf, slot, 199 "invalid previous key objectid, have %llu expect %llu", 200 prev_key->objectid, key->objectid); 201 return false; 202 } 203 static int check_extent_data_item(struct extent_buffer *leaf, 204 struct btrfs_key *key, int slot, 205 struct btrfs_key *prev_key) 206 { 207 struct btrfs_fs_info *fs_info = leaf->fs_info; 208 struct btrfs_file_extent_item *fi; 209 u32 sectorsize = fs_info->sectorsize; 210 u32 item_size = btrfs_item_size(leaf, slot); 211 u64 extent_end; 212 213 if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { 214 file_extent_err(leaf, slot, 215 "unaligned file_offset for file extent, have %llu should be aligned to %u", 216 key->offset, sectorsize); 217 return -EUCLEAN; 218 } 219 220 /* 221 * Previous key must have the same key->objectid (ino). 222 * It can be XATTR_ITEM, INODE_ITEM or just another EXTENT_DATA. 223 * But if objectids mismatch, it means we have a missing 224 * INODE_ITEM. 225 */ 226 if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) 227 return -EUCLEAN; 228 229 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 230 231 /* 232 * Make sure the item contains at least inline header, so the file 233 * extent type is not some garbage. 234 */ 235 if (unlikely(item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START)) { 236 file_extent_err(leaf, slot, 237 "invalid item size, have %u expect [%zu, %u)", 238 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START, 239 SZ_4K); 240 return -EUCLEAN; 241 } 242 if (unlikely(btrfs_file_extent_type(leaf, fi) >= 243 BTRFS_NR_FILE_EXTENT_TYPES)) { 244 file_extent_err(leaf, slot, 245 "invalid type for file extent, have %u expect range [0, %u]", 246 btrfs_file_extent_type(leaf, fi), 247 BTRFS_NR_FILE_EXTENT_TYPES - 1); 248 return -EUCLEAN; 249 } 250 251 /* 252 * Support for new compression/encryption must introduce incompat flag, 253 * and must be caught in open_ctree(). 254 */ 255 if (unlikely(btrfs_file_extent_compression(leaf, fi) >= 256 BTRFS_NR_COMPRESS_TYPES)) { 257 file_extent_err(leaf, slot, 258 "invalid compression for file extent, have %u expect range [0, %u]", 259 btrfs_file_extent_compression(leaf, fi), 260 BTRFS_NR_COMPRESS_TYPES - 1); 261 return -EUCLEAN; 262 } 263 if (unlikely(btrfs_file_extent_encryption(leaf, fi))) { 264 file_extent_err(leaf, slot, 265 "invalid encryption for file extent, have %u expect 0", 266 btrfs_file_extent_encryption(leaf, fi)); 267 return -EUCLEAN; 268 } 269 if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { 270 /* Inline extent must have 0 as key offset */ 271 if (unlikely(key->offset)) { 272 file_extent_err(leaf, slot, 273 "invalid file_offset for inline file extent, have %llu expect 0", 274 key->offset); 275 return -EUCLEAN; 276 } 277 278 /* Compressed inline extent has no on-disk size, skip it */ 279 if (btrfs_file_extent_compression(leaf, fi) != 280 BTRFS_COMPRESS_NONE) 281 return 0; 282 283 /* Uncompressed inline extent size must match item size */ 284 if (unlikely(item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + 285 btrfs_file_extent_ram_bytes(leaf, fi))) { 286 file_extent_err(leaf, slot, 287 "invalid ram_bytes for uncompressed inline extent, have %u expect %llu", 288 item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START + 289 btrfs_file_extent_ram_bytes(leaf, fi)); 290 return -EUCLEAN; 291 } 292 return 0; 293 } 294 295 /* Regular or preallocated extent has fixed item size */ 296 if (unlikely(item_size != sizeof(*fi))) { 297 file_extent_err(leaf, slot, 298 "invalid item size for reg/prealloc file extent, have %u expect %zu", 299 item_size, sizeof(*fi)); 300 return -EUCLEAN; 301 } 302 if (unlikely(CHECK_FE_ALIGNED(leaf, slot, fi, ram_bytes, sectorsize) || 303 CHECK_FE_ALIGNED(leaf, slot, fi, disk_bytenr, sectorsize) || 304 CHECK_FE_ALIGNED(leaf, slot, fi, disk_num_bytes, sectorsize) || 305 CHECK_FE_ALIGNED(leaf, slot, fi, offset, sectorsize) || 306 CHECK_FE_ALIGNED(leaf, slot, fi, num_bytes, sectorsize))) 307 return -EUCLEAN; 308 309 /* Catch extent end overflow */ 310 if (unlikely(check_add_overflow(btrfs_file_extent_num_bytes(leaf, fi), 311 key->offset, &extent_end))) { 312 file_extent_err(leaf, slot, 313 "extent end overflow, have file offset %llu extent num bytes %llu", 314 key->offset, 315 btrfs_file_extent_num_bytes(leaf, fi)); 316 return -EUCLEAN; 317 } 318 319 /* 320 * Check that no two consecutive file extent items, in the same leaf, 321 * present ranges that overlap each other. 322 */ 323 if (slot > 0 && 324 prev_key->objectid == key->objectid && 325 prev_key->type == BTRFS_EXTENT_DATA_KEY) { 326 struct btrfs_file_extent_item *prev_fi; 327 u64 prev_end; 328 329 prev_fi = btrfs_item_ptr(leaf, slot - 1, 330 struct btrfs_file_extent_item); 331 prev_end = file_extent_end(leaf, prev_key, prev_fi); 332 if (unlikely(prev_end > key->offset)) { 333 file_extent_err(leaf, slot - 1, 334 "file extent end range (%llu) goes beyond start offset (%llu) of the next file extent", 335 prev_end, key->offset); 336 return -EUCLEAN; 337 } 338 } 339 340 return 0; 341 } 342 343 static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key, 344 int slot, struct btrfs_key *prev_key) 345 { 346 struct btrfs_fs_info *fs_info = leaf->fs_info; 347 u32 sectorsize = fs_info->sectorsize; 348 const u32 csumsize = fs_info->csum_size; 349 350 if (unlikely(key->objectid != BTRFS_EXTENT_CSUM_OBJECTID)) { 351 generic_err(leaf, slot, 352 "invalid key objectid for csum item, have %llu expect %llu", 353 key->objectid, BTRFS_EXTENT_CSUM_OBJECTID); 354 return -EUCLEAN; 355 } 356 if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { 357 generic_err(leaf, slot, 358 "unaligned key offset for csum item, have %llu should be aligned to %u", 359 key->offset, sectorsize); 360 return -EUCLEAN; 361 } 362 if (unlikely(!IS_ALIGNED(btrfs_item_size(leaf, slot), csumsize))) { 363 generic_err(leaf, slot, 364 "unaligned item size for csum item, have %u should be aligned to %u", 365 btrfs_item_size(leaf, slot), csumsize); 366 return -EUCLEAN; 367 } 368 if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) { 369 u64 prev_csum_end; 370 u32 prev_item_size; 371 372 prev_item_size = btrfs_item_size(leaf, slot - 1); 373 prev_csum_end = (prev_item_size / csumsize) * sectorsize; 374 prev_csum_end += prev_key->offset; 375 if (unlikely(prev_csum_end > key->offset)) { 376 generic_err(leaf, slot - 1, 377 "csum end range (%llu) goes beyond the start range (%llu) of the next csum item", 378 prev_csum_end, key->offset); 379 return -EUCLEAN; 380 } 381 } 382 return 0; 383 } 384 385 /* Inode item error output has the same format as dir_item_err() */ 386 #define inode_item_err(eb, slot, fmt, ...) \ 387 dir_item_err(eb, slot, fmt, __VA_ARGS__) 388 389 static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key, 390 int slot) 391 { 392 struct btrfs_key item_key; 393 bool is_inode_item; 394 395 btrfs_item_key_to_cpu(leaf, &item_key, slot); 396 is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY); 397 398 /* For XATTR_ITEM, location key should be all 0 */ 399 if (item_key.type == BTRFS_XATTR_ITEM_KEY) { 400 if (unlikely(key->objectid != 0 || key->type != 0 || 401 key->offset != 0)) 402 return -EUCLEAN; 403 return 0; 404 } 405 406 if (unlikely((key->objectid < BTRFS_FIRST_FREE_OBJECTID || 407 key->objectid > BTRFS_LAST_FREE_OBJECTID) && 408 key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID && 409 key->objectid != BTRFS_FREE_INO_OBJECTID)) { 410 if (is_inode_item) { 411 generic_err(leaf, slot, 412 "invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu", 413 key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, 414 BTRFS_FIRST_FREE_OBJECTID, 415 BTRFS_LAST_FREE_OBJECTID, 416 BTRFS_FREE_INO_OBJECTID); 417 } else { 418 dir_item_err(leaf, slot, 419 "invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu", 420 key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID, 421 BTRFS_FIRST_FREE_OBJECTID, 422 BTRFS_LAST_FREE_OBJECTID, 423 BTRFS_FREE_INO_OBJECTID); 424 } 425 return -EUCLEAN; 426 } 427 if (unlikely(key->offset != 0)) { 428 if (is_inode_item) 429 inode_item_err(leaf, slot, 430 "invalid key offset: has %llu expect 0", 431 key->offset); 432 else 433 dir_item_err(leaf, slot, 434 "invalid location key offset:has %llu expect 0", 435 key->offset); 436 return -EUCLEAN; 437 } 438 return 0; 439 } 440 441 static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key, 442 int slot) 443 { 444 struct btrfs_key item_key; 445 bool is_root_item; 446 447 btrfs_item_key_to_cpu(leaf, &item_key, slot); 448 is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY); 449 450 /* 451 * Bad rootid for reloc trees. 452 * 453 * Reloc trees are only for subvolume trees, other trees only need 454 * to be COWed to be relocated. 455 */ 456 if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID && 457 !is_fstree(key->offset))) { 458 generic_err(leaf, slot, 459 "invalid reloc tree for root %lld, root id is not a subvolume tree", 460 key->offset); 461 return -EUCLEAN; 462 } 463 464 /* No such tree id */ 465 if (unlikely(key->objectid == 0)) { 466 if (is_root_item) 467 generic_err(leaf, slot, "invalid root id 0"); 468 else 469 dir_item_err(leaf, slot, 470 "invalid location key root id 0"); 471 return -EUCLEAN; 472 } 473 474 /* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */ 475 if (unlikely(!is_fstree(key->objectid) && !is_root_item)) { 476 dir_item_err(leaf, slot, 477 "invalid location key objectid, have %llu expect [%llu, %llu]", 478 key->objectid, BTRFS_FIRST_FREE_OBJECTID, 479 BTRFS_LAST_FREE_OBJECTID); 480 return -EUCLEAN; 481 } 482 483 /* 484 * ROOT_ITEM with non-zero offset means this is a snapshot, created at 485 * @offset transid. 486 * Furthermore, for location key in DIR_ITEM, its offset is always -1. 487 * 488 * So here we only check offset for reloc tree whose key->offset must 489 * be a valid tree. 490 */ 491 if (unlikely(key->objectid == BTRFS_TREE_RELOC_OBJECTID && 492 key->offset == 0)) { 493 generic_err(leaf, slot, "invalid root id 0 for reloc tree"); 494 return -EUCLEAN; 495 } 496 return 0; 497 } 498 499 static int check_dir_item(struct extent_buffer *leaf, 500 struct btrfs_key *key, struct btrfs_key *prev_key, 501 int slot) 502 { 503 struct btrfs_fs_info *fs_info = leaf->fs_info; 504 struct btrfs_dir_item *di; 505 u32 item_size = btrfs_item_size(leaf, slot); 506 u32 cur = 0; 507 508 if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) 509 return -EUCLEAN; 510 511 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); 512 while (cur < item_size) { 513 struct btrfs_key location_key; 514 u32 name_len; 515 u32 data_len; 516 u32 max_name_len; 517 u32 total_size; 518 u32 name_hash; 519 u8 dir_type; 520 int ret; 521 522 /* header itself should not cross item boundary */ 523 if (unlikely(cur + sizeof(*di) > item_size)) { 524 dir_item_err(leaf, slot, 525 "dir item header crosses item boundary, have %zu boundary %u", 526 cur + sizeof(*di), item_size); 527 return -EUCLEAN; 528 } 529 530 /* Location key check */ 531 btrfs_dir_item_key_to_cpu(leaf, di, &location_key); 532 if (location_key.type == BTRFS_ROOT_ITEM_KEY) { 533 ret = check_root_key(leaf, &location_key, slot); 534 if (unlikely(ret < 0)) 535 return ret; 536 } else if (location_key.type == BTRFS_INODE_ITEM_KEY || 537 location_key.type == 0) { 538 ret = check_inode_key(leaf, &location_key, slot); 539 if (unlikely(ret < 0)) 540 return ret; 541 } else { 542 dir_item_err(leaf, slot, 543 "invalid location key type, have %u, expect %u or %u", 544 location_key.type, BTRFS_ROOT_ITEM_KEY, 545 BTRFS_INODE_ITEM_KEY); 546 return -EUCLEAN; 547 } 548 549 /* dir type check */ 550 dir_type = btrfs_dir_ftype(leaf, di); 551 if (unlikely(dir_type >= BTRFS_FT_MAX)) { 552 dir_item_err(leaf, slot, 553 "invalid dir item type, have %u expect [0, %u)", 554 dir_type, BTRFS_FT_MAX); 555 return -EUCLEAN; 556 } 557 558 if (unlikely(key->type == BTRFS_XATTR_ITEM_KEY && 559 dir_type != BTRFS_FT_XATTR)) { 560 dir_item_err(leaf, slot, 561 "invalid dir item type for XATTR key, have %u expect %u", 562 dir_type, BTRFS_FT_XATTR); 563 return -EUCLEAN; 564 } 565 if (unlikely(dir_type == BTRFS_FT_XATTR && 566 key->type != BTRFS_XATTR_ITEM_KEY)) { 567 dir_item_err(leaf, slot, 568 "xattr dir type found for non-XATTR key"); 569 return -EUCLEAN; 570 } 571 if (dir_type == BTRFS_FT_XATTR) 572 max_name_len = XATTR_NAME_MAX; 573 else 574 max_name_len = BTRFS_NAME_LEN; 575 576 /* Name/data length check */ 577 name_len = btrfs_dir_name_len(leaf, di); 578 data_len = btrfs_dir_data_len(leaf, di); 579 if (unlikely(name_len > max_name_len)) { 580 dir_item_err(leaf, slot, 581 "dir item name len too long, have %u max %u", 582 name_len, max_name_len); 583 return -EUCLEAN; 584 } 585 if (unlikely(name_len + data_len > BTRFS_MAX_XATTR_SIZE(fs_info))) { 586 dir_item_err(leaf, slot, 587 "dir item name and data len too long, have %u max %u", 588 name_len + data_len, 589 BTRFS_MAX_XATTR_SIZE(fs_info)); 590 return -EUCLEAN; 591 } 592 593 if (unlikely(data_len && dir_type != BTRFS_FT_XATTR)) { 594 dir_item_err(leaf, slot, 595 "dir item with invalid data len, have %u expect 0", 596 data_len); 597 return -EUCLEAN; 598 } 599 600 total_size = sizeof(*di) + name_len + data_len; 601 602 /* header and name/data should not cross item boundary */ 603 if (unlikely(cur + total_size > item_size)) { 604 dir_item_err(leaf, slot, 605 "dir item data crosses item boundary, have %u boundary %u", 606 cur + total_size, item_size); 607 return -EUCLEAN; 608 } 609 610 /* 611 * Special check for XATTR/DIR_ITEM, as key->offset is name 612 * hash, should match its name 613 */ 614 if (key->type == BTRFS_DIR_ITEM_KEY || 615 key->type == BTRFS_XATTR_ITEM_KEY) { 616 char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; 617 618 read_extent_buffer(leaf, namebuf, 619 (unsigned long)(di + 1), name_len); 620 name_hash = btrfs_name_hash(namebuf, name_len); 621 if (unlikely(key->offset != name_hash)) { 622 dir_item_err(leaf, slot, 623 "name hash mismatch with key, have 0x%016x expect 0x%016llx", 624 name_hash, key->offset); 625 return -EUCLEAN; 626 } 627 } 628 cur += total_size; 629 di = (struct btrfs_dir_item *)((void *)di + total_size); 630 } 631 return 0; 632 } 633 634 __printf(3, 4) 635 __cold 636 static void block_group_err(const struct extent_buffer *eb, int slot, 637 const char *fmt, ...) 638 { 639 const struct btrfs_fs_info *fs_info = eb->fs_info; 640 struct btrfs_key key; 641 struct va_format vaf; 642 va_list args; 643 644 btrfs_item_key_to_cpu(eb, &key, slot); 645 va_start(args, fmt); 646 647 vaf.fmt = fmt; 648 vaf.va = &args; 649 650 btrfs_crit(fs_info, 651 "corrupt %s: root=%llu block=%llu slot=%d bg_start=%llu bg_len=%llu, %pV", 652 btrfs_header_level(eb) == 0 ? "leaf" : "node", 653 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 654 key.objectid, key.offset, &vaf); 655 va_end(args); 656 } 657 658 static int check_block_group_item(struct extent_buffer *leaf, 659 struct btrfs_key *key, int slot) 660 { 661 struct btrfs_fs_info *fs_info = leaf->fs_info; 662 struct btrfs_block_group_item bgi; 663 u32 item_size = btrfs_item_size(leaf, slot); 664 u64 chunk_objectid; 665 u64 flags; 666 u64 type; 667 668 /* 669 * Here we don't really care about alignment since extent allocator can 670 * handle it. We care more about the size. 671 */ 672 if (unlikely(key->offset == 0)) { 673 block_group_err(leaf, slot, 674 "invalid block group size 0"); 675 return -EUCLEAN; 676 } 677 678 if (unlikely(item_size != sizeof(bgi))) { 679 block_group_err(leaf, slot, 680 "invalid item size, have %u expect %zu", 681 item_size, sizeof(bgi)); 682 return -EUCLEAN; 683 } 684 685 read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), 686 sizeof(bgi)); 687 chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi); 688 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) { 689 /* 690 * We don't init the nr_global_roots until we load the global 691 * roots, so this could be 0 at mount time. If it's 0 we'll 692 * just assume we're fine, and later we'll check against our 693 * actual value. 694 */ 695 if (unlikely(fs_info->nr_global_roots && 696 chunk_objectid >= fs_info->nr_global_roots)) { 697 block_group_err(leaf, slot, 698 "invalid block group global root id, have %llu, needs to be <= %llu", 699 chunk_objectid, 700 fs_info->nr_global_roots); 701 return -EUCLEAN; 702 } 703 } else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { 704 block_group_err(leaf, slot, 705 "invalid block group chunk objectid, have %llu expect %llu", 706 btrfs_stack_block_group_chunk_objectid(&bgi), 707 BTRFS_FIRST_CHUNK_TREE_OBJECTID); 708 return -EUCLEAN; 709 } 710 711 if (unlikely(btrfs_stack_block_group_used(&bgi) > key->offset)) { 712 block_group_err(leaf, slot, 713 "invalid block group used, have %llu expect [0, %llu)", 714 btrfs_stack_block_group_used(&bgi), key->offset); 715 return -EUCLEAN; 716 } 717 718 flags = btrfs_stack_block_group_flags(&bgi); 719 if (unlikely(hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) > 1)) { 720 block_group_err(leaf, slot, 721 "invalid profile flags, have 0x%llx (%lu bits set) expect no more than 1 bit set", 722 flags & BTRFS_BLOCK_GROUP_PROFILE_MASK, 723 hweight64(flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)); 724 return -EUCLEAN; 725 } 726 727 type = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; 728 if (unlikely(type != BTRFS_BLOCK_GROUP_DATA && 729 type != BTRFS_BLOCK_GROUP_METADATA && 730 type != BTRFS_BLOCK_GROUP_SYSTEM && 731 type != (BTRFS_BLOCK_GROUP_METADATA | 732 BTRFS_BLOCK_GROUP_DATA))) { 733 block_group_err(leaf, slot, 734 "invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx", 735 type, hweight64(type), 736 BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA, 737 BTRFS_BLOCK_GROUP_SYSTEM, 738 BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA); 739 return -EUCLEAN; 740 } 741 return 0; 742 } 743 744 __printf(4, 5) 745 __cold 746 static void chunk_err(const struct extent_buffer *leaf, 747 const struct btrfs_chunk *chunk, u64 logical, 748 const char *fmt, ...) 749 { 750 const struct btrfs_fs_info *fs_info = leaf->fs_info; 751 bool is_sb; 752 struct va_format vaf; 753 va_list args; 754 int i; 755 int slot = -1; 756 757 /* Only superblock eb is able to have such small offset */ 758 is_sb = (leaf->start == BTRFS_SUPER_INFO_OFFSET); 759 760 if (!is_sb) { 761 /* 762 * Get the slot number by iterating through all slots, this 763 * would provide better readability. 764 */ 765 for (i = 0; i < btrfs_header_nritems(leaf); i++) { 766 if (btrfs_item_ptr_offset(leaf, i) == 767 (unsigned long)chunk) { 768 slot = i; 769 break; 770 } 771 } 772 } 773 va_start(args, fmt); 774 vaf.fmt = fmt; 775 vaf.va = &args; 776 777 if (is_sb) 778 btrfs_crit(fs_info, 779 "corrupt superblock syschunk array: chunk_start=%llu, %pV", 780 logical, &vaf); 781 else 782 btrfs_crit(fs_info, 783 "corrupt leaf: root=%llu block=%llu slot=%d chunk_start=%llu, %pV", 784 BTRFS_CHUNK_TREE_OBJECTID, leaf->start, slot, 785 logical, &vaf); 786 va_end(args); 787 } 788 789 /* 790 * The common chunk check which could also work on super block sys chunk array. 791 * 792 * Return -EUCLEAN if anything is corrupted. 793 * Return 0 if everything is OK. 794 */ 795 int btrfs_check_chunk_valid(struct extent_buffer *leaf, 796 struct btrfs_chunk *chunk, u64 logical) 797 { 798 struct btrfs_fs_info *fs_info = leaf->fs_info; 799 u64 length; 800 u64 chunk_end; 801 u64 stripe_len; 802 u16 num_stripes; 803 u16 sub_stripes; 804 u64 type; 805 u64 features; 806 bool mixed = false; 807 int raid_index; 808 int nparity; 809 int ncopies; 810 811 length = btrfs_chunk_length(leaf, chunk); 812 stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 813 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 814 sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk); 815 type = btrfs_chunk_type(leaf, chunk); 816 raid_index = btrfs_bg_flags_to_raid_index(type); 817 ncopies = btrfs_raid_array[raid_index].ncopies; 818 nparity = btrfs_raid_array[raid_index].nparity; 819 820 if (unlikely(!num_stripes)) { 821 chunk_err(leaf, chunk, logical, 822 "invalid chunk num_stripes, have %u", num_stripes); 823 return -EUCLEAN; 824 } 825 if (unlikely(num_stripes < ncopies)) { 826 chunk_err(leaf, chunk, logical, 827 "invalid chunk num_stripes < ncopies, have %u < %d", 828 num_stripes, ncopies); 829 return -EUCLEAN; 830 } 831 if (unlikely(nparity && num_stripes == nparity)) { 832 chunk_err(leaf, chunk, logical, 833 "invalid chunk num_stripes == nparity, have %u == %d", 834 num_stripes, nparity); 835 return -EUCLEAN; 836 } 837 if (unlikely(!IS_ALIGNED(logical, fs_info->sectorsize))) { 838 chunk_err(leaf, chunk, logical, 839 "invalid chunk logical, have %llu should aligned to %u", 840 logical, fs_info->sectorsize); 841 return -EUCLEAN; 842 } 843 if (unlikely(btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize)) { 844 chunk_err(leaf, chunk, logical, 845 "invalid chunk sectorsize, have %u expect %u", 846 btrfs_chunk_sector_size(leaf, chunk), 847 fs_info->sectorsize); 848 return -EUCLEAN; 849 } 850 if (unlikely(!length || !IS_ALIGNED(length, fs_info->sectorsize))) { 851 chunk_err(leaf, chunk, logical, 852 "invalid chunk length, have %llu", length); 853 return -EUCLEAN; 854 } 855 if (unlikely(check_add_overflow(logical, length, &chunk_end))) { 856 chunk_err(leaf, chunk, logical, 857 "invalid chunk logical start and length, have logical start %llu length %llu", 858 logical, length); 859 return -EUCLEAN; 860 } 861 if (unlikely(!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN)) { 862 chunk_err(leaf, chunk, logical, 863 "invalid chunk stripe length: %llu", 864 stripe_len); 865 return -EUCLEAN; 866 } 867 /* 868 * We artificially limit the chunk size, so that the number of stripes 869 * inside a chunk can be fit into a U32. The current limit (256G) is 870 * way too large for real world usage anyway, and it's also much larger 871 * than our existing limit (10G). 872 * 873 * Thus it should be a good way to catch obvious bitflips. 874 */ 875 if (unlikely(length >= btrfs_stripe_nr_to_offset(U32_MAX))) { 876 chunk_err(leaf, chunk, logical, 877 "chunk length too large: have %llu limit %llu", 878 length, btrfs_stripe_nr_to_offset(U32_MAX)); 879 return -EUCLEAN; 880 } 881 if (unlikely(type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK | 882 BTRFS_BLOCK_GROUP_PROFILE_MASK))) { 883 chunk_err(leaf, chunk, logical, 884 "unrecognized chunk type: 0x%llx", 885 ~(BTRFS_BLOCK_GROUP_TYPE_MASK | 886 BTRFS_BLOCK_GROUP_PROFILE_MASK) & 887 btrfs_chunk_type(leaf, chunk)); 888 return -EUCLEAN; 889 } 890 891 if (unlikely(!has_single_bit_set(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && 892 (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)) { 893 chunk_err(leaf, chunk, logical, 894 "invalid chunk profile flag: 0x%llx, expect 0 or 1 bit set", 895 type & BTRFS_BLOCK_GROUP_PROFILE_MASK); 896 return -EUCLEAN; 897 } 898 if (unlikely((type & BTRFS_BLOCK_GROUP_TYPE_MASK) == 0)) { 899 chunk_err(leaf, chunk, logical, 900 "missing chunk type flag, have 0x%llx one bit must be set in 0x%llx", 901 type, BTRFS_BLOCK_GROUP_TYPE_MASK); 902 return -EUCLEAN; 903 } 904 905 if (unlikely((type & BTRFS_BLOCK_GROUP_SYSTEM) && 906 (type & (BTRFS_BLOCK_GROUP_METADATA | 907 BTRFS_BLOCK_GROUP_DATA)))) { 908 chunk_err(leaf, chunk, logical, 909 "system chunk with data or metadata type: 0x%llx", 910 type); 911 return -EUCLEAN; 912 } 913 914 features = btrfs_super_incompat_flags(fs_info->super_copy); 915 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 916 mixed = true; 917 918 if (!mixed) { 919 if (unlikely((type & BTRFS_BLOCK_GROUP_METADATA) && 920 (type & BTRFS_BLOCK_GROUP_DATA))) { 921 chunk_err(leaf, chunk, logical, 922 "mixed chunk type in non-mixed mode: 0x%llx", type); 923 return -EUCLEAN; 924 } 925 } 926 927 if (unlikely((type & BTRFS_BLOCK_GROUP_RAID10 && 928 sub_stripes != btrfs_raid_array[BTRFS_RAID_RAID10].sub_stripes) || 929 (type & BTRFS_BLOCK_GROUP_RAID1 && 930 num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1].devs_min) || 931 (type & BTRFS_BLOCK_GROUP_RAID1C3 && 932 num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C3].devs_min) || 933 (type & BTRFS_BLOCK_GROUP_RAID1C4 && 934 num_stripes != btrfs_raid_array[BTRFS_RAID_RAID1C4].devs_min) || 935 (type & BTRFS_BLOCK_GROUP_RAID5 && 936 num_stripes < btrfs_raid_array[BTRFS_RAID_RAID5].devs_min) || 937 (type & BTRFS_BLOCK_GROUP_RAID6 && 938 num_stripes < btrfs_raid_array[BTRFS_RAID_RAID6].devs_min) || 939 (type & BTRFS_BLOCK_GROUP_DUP && 940 num_stripes != btrfs_raid_array[BTRFS_RAID_DUP].dev_stripes) || 941 ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 && 942 num_stripes != btrfs_raid_array[BTRFS_RAID_SINGLE].dev_stripes))) { 943 chunk_err(leaf, chunk, logical, 944 "invalid num_stripes:sub_stripes %u:%u for profile %llu", 945 num_stripes, sub_stripes, 946 type & BTRFS_BLOCK_GROUP_PROFILE_MASK); 947 return -EUCLEAN; 948 } 949 950 return 0; 951 } 952 953 /* 954 * Enhanced version of chunk item checker. 955 * 956 * The common btrfs_check_chunk_valid() doesn't check item size since it needs 957 * to work on super block sys_chunk_array which doesn't have full item ptr. 958 */ 959 static int check_leaf_chunk_item(struct extent_buffer *leaf, 960 struct btrfs_chunk *chunk, 961 struct btrfs_key *key, int slot) 962 { 963 int num_stripes; 964 965 if (unlikely(btrfs_item_size(leaf, slot) < sizeof(struct btrfs_chunk))) { 966 chunk_err(leaf, chunk, key->offset, 967 "invalid chunk item size: have %u expect [%zu, %u)", 968 btrfs_item_size(leaf, slot), 969 sizeof(struct btrfs_chunk), 970 BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); 971 return -EUCLEAN; 972 } 973 974 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 975 /* Let btrfs_check_chunk_valid() handle this error type */ 976 if (num_stripes == 0) 977 goto out; 978 979 if (unlikely(btrfs_chunk_item_size(num_stripes) != 980 btrfs_item_size(leaf, slot))) { 981 chunk_err(leaf, chunk, key->offset, 982 "invalid chunk item size: have %u expect %lu", 983 btrfs_item_size(leaf, slot), 984 btrfs_chunk_item_size(num_stripes)); 985 return -EUCLEAN; 986 } 987 out: 988 return btrfs_check_chunk_valid(leaf, chunk, key->offset); 989 } 990 991 __printf(3, 4) 992 __cold 993 static void dev_item_err(const struct extent_buffer *eb, int slot, 994 const char *fmt, ...) 995 { 996 struct btrfs_key key; 997 struct va_format vaf; 998 va_list args; 999 1000 btrfs_item_key_to_cpu(eb, &key, slot); 1001 va_start(args, fmt); 1002 1003 vaf.fmt = fmt; 1004 vaf.va = &args; 1005 1006 btrfs_crit(eb->fs_info, 1007 "corrupt %s: root=%llu block=%llu slot=%d devid=%llu %pV", 1008 btrfs_header_level(eb) == 0 ? "leaf" : "node", 1009 btrfs_header_owner(eb), btrfs_header_bytenr(eb), slot, 1010 key.objectid, &vaf); 1011 va_end(args); 1012 } 1013 1014 static int check_dev_item(struct extent_buffer *leaf, 1015 struct btrfs_key *key, int slot) 1016 { 1017 struct btrfs_dev_item *ditem; 1018 const u32 item_size = btrfs_item_size(leaf, slot); 1019 1020 if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) { 1021 dev_item_err(leaf, slot, 1022 "invalid objectid: has=%llu expect=%llu", 1023 key->objectid, BTRFS_DEV_ITEMS_OBJECTID); 1024 return -EUCLEAN; 1025 } 1026 1027 if (unlikely(item_size != sizeof(*ditem))) { 1028 dev_item_err(leaf, slot, "invalid item size: has %u expect %zu", 1029 item_size, sizeof(*ditem)); 1030 return -EUCLEAN; 1031 } 1032 1033 ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); 1034 if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) { 1035 dev_item_err(leaf, slot, 1036 "devid mismatch: key has=%llu item has=%llu", 1037 key->offset, btrfs_device_id(leaf, ditem)); 1038 return -EUCLEAN; 1039 } 1040 1041 /* 1042 * For device total_bytes, we don't have reliable way to check it, as 1043 * it can be 0 for device removal. Device size check can only be done 1044 * by dev extents check. 1045 */ 1046 if (unlikely(btrfs_device_bytes_used(leaf, ditem) > 1047 btrfs_device_total_bytes(leaf, ditem))) { 1048 dev_item_err(leaf, slot, 1049 "invalid bytes used: have %llu expect [0, %llu]", 1050 btrfs_device_bytes_used(leaf, ditem), 1051 btrfs_device_total_bytes(leaf, ditem)); 1052 return -EUCLEAN; 1053 } 1054 /* 1055 * Remaining members like io_align/type/gen/dev_group aren't really 1056 * utilized. Skip them to make later usage of them easier. 1057 */ 1058 return 0; 1059 } 1060 1061 static int check_inode_item(struct extent_buffer *leaf, 1062 struct btrfs_key *key, int slot) 1063 { 1064 struct btrfs_fs_info *fs_info = leaf->fs_info; 1065 struct btrfs_inode_item *iitem; 1066 u64 super_gen = btrfs_super_generation(fs_info->super_copy); 1067 u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); 1068 const u32 item_size = btrfs_item_size(leaf, slot); 1069 u32 mode; 1070 int ret; 1071 u32 flags; 1072 u32 ro_flags; 1073 1074 ret = check_inode_key(leaf, key, slot); 1075 if (unlikely(ret < 0)) 1076 return ret; 1077 1078 if (unlikely(item_size != sizeof(*iitem))) { 1079 generic_err(leaf, slot, "invalid item size: has %u expect %zu", 1080 item_size, sizeof(*iitem)); 1081 return -EUCLEAN; 1082 } 1083 1084 iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); 1085 1086 /* Here we use super block generation + 1 to handle log tree */ 1087 if (unlikely(btrfs_inode_generation(leaf, iitem) > super_gen + 1)) { 1088 inode_item_err(leaf, slot, 1089 "invalid inode generation: has %llu expect (0, %llu]", 1090 btrfs_inode_generation(leaf, iitem), 1091 super_gen + 1); 1092 return -EUCLEAN; 1093 } 1094 /* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */ 1095 if (unlikely(btrfs_inode_transid(leaf, iitem) > super_gen + 1)) { 1096 inode_item_err(leaf, slot, 1097 "invalid inode transid: has %llu expect [0, %llu]", 1098 btrfs_inode_transid(leaf, iitem), super_gen + 1); 1099 return -EUCLEAN; 1100 } 1101 1102 /* 1103 * For size and nbytes it's better not to be too strict, as for dir 1104 * item its size/nbytes can easily get wrong, but doesn't affect 1105 * anything in the fs. So here we skip the check. 1106 */ 1107 mode = btrfs_inode_mode(leaf, iitem); 1108 if (unlikely(mode & ~valid_mask)) { 1109 inode_item_err(leaf, slot, 1110 "unknown mode bit detected: 0x%x", 1111 mode & ~valid_mask); 1112 return -EUCLEAN; 1113 } 1114 1115 /* 1116 * S_IFMT is not bit mapped so we can't completely rely on 1117 * is_power_of_2/has_single_bit_set, but it can save us from checking 1118 * FIFO/CHR/DIR/REG. Only needs to check BLK, LNK and SOCKS 1119 */ 1120 if (!has_single_bit_set(mode & S_IFMT)) { 1121 if (unlikely(!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode))) { 1122 inode_item_err(leaf, slot, 1123 "invalid mode: has 0%o expect valid S_IF* bit(s)", 1124 mode & S_IFMT); 1125 return -EUCLEAN; 1126 } 1127 } 1128 if (unlikely(S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1)) { 1129 inode_item_err(leaf, slot, 1130 "invalid nlink: has %u expect no more than 1 for dir", 1131 btrfs_inode_nlink(leaf, iitem)); 1132 return -EUCLEAN; 1133 } 1134 btrfs_inode_split_flags(btrfs_inode_flags(leaf, iitem), &flags, &ro_flags); 1135 if (unlikely(flags & ~BTRFS_INODE_FLAG_MASK)) { 1136 inode_item_err(leaf, slot, 1137 "unknown incompat flags detected: 0x%x", flags); 1138 return -EUCLEAN; 1139 } 1140 if (unlikely(!sb_rdonly(fs_info->sb) && 1141 (ro_flags & ~BTRFS_INODE_RO_FLAG_MASK))) { 1142 inode_item_err(leaf, slot, 1143 "unknown ro-compat flags detected on writeable mount: 0x%x", 1144 ro_flags); 1145 return -EUCLEAN; 1146 } 1147 return 0; 1148 } 1149 1150 static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key, 1151 int slot) 1152 { 1153 struct btrfs_fs_info *fs_info = leaf->fs_info; 1154 struct btrfs_root_item ri = { 0 }; 1155 const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY | 1156 BTRFS_ROOT_SUBVOL_DEAD; 1157 int ret; 1158 1159 ret = check_root_key(leaf, key, slot); 1160 if (unlikely(ret < 0)) 1161 return ret; 1162 1163 if (unlikely(btrfs_item_size(leaf, slot) != sizeof(ri) && 1164 btrfs_item_size(leaf, slot) != 1165 btrfs_legacy_root_item_size())) { 1166 generic_err(leaf, slot, 1167 "invalid root item size, have %u expect %zu or %u", 1168 btrfs_item_size(leaf, slot), sizeof(ri), 1169 btrfs_legacy_root_item_size()); 1170 return -EUCLEAN; 1171 } 1172 1173 /* 1174 * For legacy root item, the members starting at generation_v2 will be 1175 * all filled with 0. 1176 * And since we allow geneartion_v2 as 0, it will still pass the check. 1177 */ 1178 read_extent_buffer(leaf, &ri, btrfs_item_ptr_offset(leaf, slot), 1179 btrfs_item_size(leaf, slot)); 1180 1181 /* Generation related */ 1182 if (unlikely(btrfs_root_generation(&ri) > 1183 btrfs_super_generation(fs_info->super_copy) + 1)) { 1184 generic_err(leaf, slot, 1185 "invalid root generation, have %llu expect (0, %llu]", 1186 btrfs_root_generation(&ri), 1187 btrfs_super_generation(fs_info->super_copy) + 1); 1188 return -EUCLEAN; 1189 } 1190 if (unlikely(btrfs_root_generation_v2(&ri) > 1191 btrfs_super_generation(fs_info->super_copy) + 1)) { 1192 generic_err(leaf, slot, 1193 "invalid root v2 generation, have %llu expect (0, %llu]", 1194 btrfs_root_generation_v2(&ri), 1195 btrfs_super_generation(fs_info->super_copy) + 1); 1196 return -EUCLEAN; 1197 } 1198 if (unlikely(btrfs_root_last_snapshot(&ri) > 1199 btrfs_super_generation(fs_info->super_copy) + 1)) { 1200 generic_err(leaf, slot, 1201 "invalid root last_snapshot, have %llu expect (0, %llu]", 1202 btrfs_root_last_snapshot(&ri), 1203 btrfs_super_generation(fs_info->super_copy) + 1); 1204 return -EUCLEAN; 1205 } 1206 1207 /* Alignment and level check */ 1208 if (unlikely(!IS_ALIGNED(btrfs_root_bytenr(&ri), fs_info->sectorsize))) { 1209 generic_err(leaf, slot, 1210 "invalid root bytenr, have %llu expect to be aligned to %u", 1211 btrfs_root_bytenr(&ri), fs_info->sectorsize); 1212 return -EUCLEAN; 1213 } 1214 if (unlikely(btrfs_root_level(&ri) >= BTRFS_MAX_LEVEL)) { 1215 generic_err(leaf, slot, 1216 "invalid root level, have %u expect [0, %u]", 1217 btrfs_root_level(&ri), BTRFS_MAX_LEVEL - 1); 1218 return -EUCLEAN; 1219 } 1220 if (unlikely(btrfs_root_drop_level(&ri) >= BTRFS_MAX_LEVEL)) { 1221 generic_err(leaf, slot, 1222 "invalid root level, have %u expect [0, %u]", 1223 btrfs_root_drop_level(&ri), BTRFS_MAX_LEVEL - 1); 1224 return -EUCLEAN; 1225 } 1226 1227 /* Flags check */ 1228 if (unlikely(btrfs_root_flags(&ri) & ~valid_root_flags)) { 1229 generic_err(leaf, slot, 1230 "invalid root flags, have 0x%llx expect mask 0x%llx", 1231 btrfs_root_flags(&ri), valid_root_flags); 1232 return -EUCLEAN; 1233 } 1234 return 0; 1235 } 1236 1237 __printf(3,4) 1238 __cold 1239 static void extent_err(const struct extent_buffer *eb, int slot, 1240 const char *fmt, ...) 1241 { 1242 struct btrfs_key key; 1243 struct va_format vaf; 1244 va_list args; 1245 u64 bytenr; 1246 u64 len; 1247 1248 btrfs_item_key_to_cpu(eb, &key, slot); 1249 bytenr = key.objectid; 1250 if (key.type == BTRFS_METADATA_ITEM_KEY || 1251 key.type == BTRFS_TREE_BLOCK_REF_KEY || 1252 key.type == BTRFS_SHARED_BLOCK_REF_KEY) 1253 len = eb->fs_info->nodesize; 1254 else 1255 len = key.offset; 1256 va_start(args, fmt); 1257 1258 vaf.fmt = fmt; 1259 vaf.va = &args; 1260 1261 btrfs_crit(eb->fs_info, 1262 "corrupt %s: block=%llu slot=%d extent bytenr=%llu len=%llu %pV", 1263 btrfs_header_level(eb) == 0 ? "leaf" : "node", 1264 eb->start, slot, bytenr, len, &vaf); 1265 va_end(args); 1266 } 1267 1268 static int check_extent_item(struct extent_buffer *leaf, 1269 struct btrfs_key *key, int slot, 1270 struct btrfs_key *prev_key) 1271 { 1272 struct btrfs_fs_info *fs_info = leaf->fs_info; 1273 struct btrfs_extent_item *ei; 1274 bool is_tree_block = false; 1275 unsigned long ptr; /* Current pointer inside inline refs */ 1276 unsigned long end; /* Extent item end */ 1277 const u32 item_size = btrfs_item_size(leaf, slot); 1278 u8 last_type = 0; 1279 u64 last_seq = U64_MAX; 1280 u64 flags; 1281 u64 generation; 1282 u64 total_refs; /* Total refs in btrfs_extent_item */ 1283 u64 inline_refs = 0; /* found total inline refs */ 1284 1285 if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY && 1286 !btrfs_fs_incompat(fs_info, SKINNY_METADATA))) { 1287 generic_err(leaf, slot, 1288 "invalid key type, METADATA_ITEM type invalid when SKINNY_METADATA feature disabled"); 1289 return -EUCLEAN; 1290 } 1291 /* key->objectid is the bytenr for both key types */ 1292 if (unlikely(!IS_ALIGNED(key->objectid, fs_info->sectorsize))) { 1293 generic_err(leaf, slot, 1294 "invalid key objectid, have %llu expect to be aligned to %u", 1295 key->objectid, fs_info->sectorsize); 1296 return -EUCLEAN; 1297 } 1298 1299 /* key->offset is tree level for METADATA_ITEM_KEY */ 1300 if (unlikely(key->type == BTRFS_METADATA_ITEM_KEY && 1301 key->offset >= BTRFS_MAX_LEVEL)) { 1302 extent_err(leaf, slot, 1303 "invalid tree level, have %llu expect [0, %u]", 1304 key->offset, BTRFS_MAX_LEVEL - 1); 1305 return -EUCLEAN; 1306 } 1307 1308 /* 1309 * EXTENT/METADATA_ITEM consists of: 1310 * 1) One btrfs_extent_item 1311 * Records the total refs, type and generation of the extent. 1312 * 1313 * 2) One btrfs_tree_block_info (for EXTENT_ITEM and tree backref only) 1314 * Records the first key and level of the tree block. 1315 * 1316 * 2) Zero or more btrfs_extent_inline_ref(s) 1317 * Each inline ref has one btrfs_extent_inline_ref shows: 1318 * 2.1) The ref type, one of the 4 1319 * TREE_BLOCK_REF Tree block only 1320 * SHARED_BLOCK_REF Tree block only 1321 * EXTENT_DATA_REF Data only 1322 * SHARED_DATA_REF Data only 1323 * 2.2) Ref type specific data 1324 * Either using btrfs_extent_inline_ref::offset, or specific 1325 * data structure. 1326 * 1327 * All above inline items should follow the order: 1328 * 1329 * - All btrfs_extent_inline_ref::type should be in an ascending 1330 * order 1331 * 1332 * - Within the same type, the items should follow a descending 1333 * order by their sequence number. The sequence number is 1334 * determined by: 1335 * * btrfs_extent_inline_ref::offset for all types other than 1336 * EXTENT_DATA_REF 1337 * * hash_extent_data_ref() for EXTENT_DATA_REF 1338 */ 1339 if (unlikely(item_size < sizeof(*ei))) { 1340 extent_err(leaf, slot, 1341 "invalid item size, have %u expect [%zu, %u)", 1342 item_size, sizeof(*ei), 1343 BTRFS_LEAF_DATA_SIZE(fs_info)); 1344 return -EUCLEAN; 1345 } 1346 end = item_size + btrfs_item_ptr_offset(leaf, slot); 1347 1348 /* Checks against extent_item */ 1349 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 1350 flags = btrfs_extent_flags(leaf, ei); 1351 total_refs = btrfs_extent_refs(leaf, ei); 1352 generation = btrfs_extent_generation(leaf, ei); 1353 if (unlikely(generation > 1354 btrfs_super_generation(fs_info->super_copy) + 1)) { 1355 extent_err(leaf, slot, 1356 "invalid generation, have %llu expect (0, %llu]", 1357 generation, 1358 btrfs_super_generation(fs_info->super_copy) + 1); 1359 return -EUCLEAN; 1360 } 1361 if (unlikely(!has_single_bit_set(flags & (BTRFS_EXTENT_FLAG_DATA | 1362 BTRFS_EXTENT_FLAG_TREE_BLOCK)))) { 1363 extent_err(leaf, slot, 1364 "invalid extent flag, have 0x%llx expect 1 bit set in 0x%llx", 1365 flags, BTRFS_EXTENT_FLAG_DATA | 1366 BTRFS_EXTENT_FLAG_TREE_BLOCK); 1367 return -EUCLEAN; 1368 } 1369 is_tree_block = !!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK); 1370 if (is_tree_block) { 1371 if (unlikely(key->type == BTRFS_EXTENT_ITEM_KEY && 1372 key->offset != fs_info->nodesize)) { 1373 extent_err(leaf, slot, 1374 "invalid extent length, have %llu expect %u", 1375 key->offset, fs_info->nodesize); 1376 return -EUCLEAN; 1377 } 1378 } else { 1379 if (unlikely(key->type != BTRFS_EXTENT_ITEM_KEY)) { 1380 extent_err(leaf, slot, 1381 "invalid key type, have %u expect %u for data backref", 1382 key->type, BTRFS_EXTENT_ITEM_KEY); 1383 return -EUCLEAN; 1384 } 1385 if (unlikely(!IS_ALIGNED(key->offset, fs_info->sectorsize))) { 1386 extent_err(leaf, slot, 1387 "invalid extent length, have %llu expect aligned to %u", 1388 key->offset, fs_info->sectorsize); 1389 return -EUCLEAN; 1390 } 1391 if (unlikely(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 1392 extent_err(leaf, slot, 1393 "invalid extent flag, data has full backref set"); 1394 return -EUCLEAN; 1395 } 1396 } 1397 ptr = (unsigned long)(struct btrfs_extent_item *)(ei + 1); 1398 1399 /* Check the special case of btrfs_tree_block_info */ 1400 if (is_tree_block && key->type != BTRFS_METADATA_ITEM_KEY) { 1401 struct btrfs_tree_block_info *info; 1402 1403 info = (struct btrfs_tree_block_info *)ptr; 1404 if (unlikely(btrfs_tree_block_level(leaf, info) >= BTRFS_MAX_LEVEL)) { 1405 extent_err(leaf, slot, 1406 "invalid tree block info level, have %u expect [0, %u]", 1407 btrfs_tree_block_level(leaf, info), 1408 BTRFS_MAX_LEVEL - 1); 1409 return -EUCLEAN; 1410 } 1411 ptr = (unsigned long)(struct btrfs_tree_block_info *)(info + 1); 1412 } 1413 1414 /* Check inline refs */ 1415 while (ptr < end) { 1416 struct btrfs_extent_inline_ref *iref; 1417 struct btrfs_extent_data_ref *dref; 1418 struct btrfs_shared_data_ref *sref; 1419 u64 seq; 1420 u64 dref_offset; 1421 u64 inline_offset; 1422 u8 inline_type; 1423 1424 if (unlikely(ptr + sizeof(*iref) > end)) { 1425 extent_err(leaf, slot, 1426 "inline ref item overflows extent item, ptr %lu iref size %zu end %lu", 1427 ptr, sizeof(*iref), end); 1428 return -EUCLEAN; 1429 } 1430 iref = (struct btrfs_extent_inline_ref *)ptr; 1431 inline_type = btrfs_extent_inline_ref_type(leaf, iref); 1432 inline_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1433 seq = inline_offset; 1434 if (unlikely(ptr + btrfs_extent_inline_ref_size(inline_type) > end)) { 1435 extent_err(leaf, slot, 1436 "inline ref item overflows extent item, ptr %lu iref size %u end %lu", 1437 ptr, btrfs_extent_inline_ref_size(inline_type), end); 1438 return -EUCLEAN; 1439 } 1440 1441 switch (inline_type) { 1442 /* inline_offset is subvolid of the owner, no need to check */ 1443 case BTRFS_TREE_BLOCK_REF_KEY: 1444 inline_refs++; 1445 break; 1446 /* Contains parent bytenr */ 1447 case BTRFS_SHARED_BLOCK_REF_KEY: 1448 if (unlikely(!IS_ALIGNED(inline_offset, 1449 fs_info->sectorsize))) { 1450 extent_err(leaf, slot, 1451 "invalid tree parent bytenr, have %llu expect aligned to %u", 1452 inline_offset, fs_info->sectorsize); 1453 return -EUCLEAN; 1454 } 1455 inline_refs++; 1456 break; 1457 /* 1458 * Contains owner subvolid, owner key objectid, adjusted offset. 1459 * The only obvious corruption can happen in that offset. 1460 */ 1461 case BTRFS_EXTENT_DATA_REF_KEY: 1462 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1463 dref_offset = btrfs_extent_data_ref_offset(leaf, dref); 1464 seq = hash_extent_data_ref( 1465 btrfs_extent_data_ref_root(leaf, dref), 1466 btrfs_extent_data_ref_objectid(leaf, dref), 1467 btrfs_extent_data_ref_offset(leaf, dref)); 1468 if (unlikely(!IS_ALIGNED(dref_offset, 1469 fs_info->sectorsize))) { 1470 extent_err(leaf, slot, 1471 "invalid data ref offset, have %llu expect aligned to %u", 1472 dref_offset, fs_info->sectorsize); 1473 return -EUCLEAN; 1474 } 1475 inline_refs += btrfs_extent_data_ref_count(leaf, dref); 1476 break; 1477 /* Contains parent bytenr and ref count */ 1478 case BTRFS_SHARED_DATA_REF_KEY: 1479 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1480 if (unlikely(!IS_ALIGNED(inline_offset, 1481 fs_info->sectorsize))) { 1482 extent_err(leaf, slot, 1483 "invalid data parent bytenr, have %llu expect aligned to %u", 1484 inline_offset, fs_info->sectorsize); 1485 return -EUCLEAN; 1486 } 1487 inline_refs += btrfs_shared_data_ref_count(leaf, sref); 1488 break; 1489 default: 1490 extent_err(leaf, slot, "unknown inline ref type: %u", 1491 inline_type); 1492 return -EUCLEAN; 1493 } 1494 if (inline_type < last_type) { 1495 extent_err(leaf, slot, 1496 "inline ref out-of-order: has type %u, prev type %u", 1497 inline_type, last_type); 1498 return -EUCLEAN; 1499 } 1500 /* Type changed, allow the sequence starts from U64_MAX again. */ 1501 if (inline_type > last_type) 1502 last_seq = U64_MAX; 1503 if (seq > last_seq) { 1504 extent_err(leaf, slot, 1505 "inline ref out-of-order: has type %u offset %llu seq 0x%llx, prev type %u seq 0x%llx", 1506 inline_type, inline_offset, seq, 1507 last_type, last_seq); 1508 return -EUCLEAN; 1509 } 1510 last_type = inline_type; 1511 last_seq = seq; 1512 ptr += btrfs_extent_inline_ref_size(inline_type); 1513 } 1514 /* No padding is allowed */ 1515 if (unlikely(ptr != end)) { 1516 extent_err(leaf, slot, 1517 "invalid extent item size, padding bytes found"); 1518 return -EUCLEAN; 1519 } 1520 1521 /* Finally, check the inline refs against total refs */ 1522 if (unlikely(inline_refs > total_refs)) { 1523 extent_err(leaf, slot, 1524 "invalid extent refs, have %llu expect >= inline %llu", 1525 total_refs, inline_refs); 1526 return -EUCLEAN; 1527 } 1528 1529 if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) || 1530 (prev_key->type == BTRFS_METADATA_ITEM_KEY)) { 1531 u64 prev_end = prev_key->objectid; 1532 1533 if (prev_key->type == BTRFS_METADATA_ITEM_KEY) 1534 prev_end += fs_info->nodesize; 1535 else 1536 prev_end += prev_key->offset; 1537 1538 if (unlikely(prev_end > key->objectid)) { 1539 extent_err(leaf, slot, 1540 "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]", 1541 prev_key->objectid, prev_key->type, 1542 prev_key->offset, key->objectid, key->type, 1543 key->offset); 1544 return -EUCLEAN; 1545 } 1546 } 1547 1548 return 0; 1549 } 1550 1551 static int check_simple_keyed_refs(struct extent_buffer *leaf, 1552 struct btrfs_key *key, int slot) 1553 { 1554 u32 expect_item_size = 0; 1555 1556 if (key->type == BTRFS_SHARED_DATA_REF_KEY) 1557 expect_item_size = sizeof(struct btrfs_shared_data_ref); 1558 1559 if (unlikely(btrfs_item_size(leaf, slot) != expect_item_size)) { 1560 generic_err(leaf, slot, 1561 "invalid item size, have %u expect %u for key type %u", 1562 btrfs_item_size(leaf, slot), 1563 expect_item_size, key->type); 1564 return -EUCLEAN; 1565 } 1566 if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { 1567 generic_err(leaf, slot, 1568 "invalid key objectid for shared block ref, have %llu expect aligned to %u", 1569 key->objectid, leaf->fs_info->sectorsize); 1570 return -EUCLEAN; 1571 } 1572 if (unlikely(key->type != BTRFS_TREE_BLOCK_REF_KEY && 1573 !IS_ALIGNED(key->offset, leaf->fs_info->sectorsize))) { 1574 extent_err(leaf, slot, 1575 "invalid tree parent bytenr, have %llu expect aligned to %u", 1576 key->offset, leaf->fs_info->sectorsize); 1577 return -EUCLEAN; 1578 } 1579 return 0; 1580 } 1581 1582 static int check_extent_data_ref(struct extent_buffer *leaf, 1583 struct btrfs_key *key, int slot) 1584 { 1585 struct btrfs_extent_data_ref *dref; 1586 unsigned long ptr = btrfs_item_ptr_offset(leaf, slot); 1587 const unsigned long end = ptr + btrfs_item_size(leaf, slot); 1588 1589 if (unlikely(btrfs_item_size(leaf, slot) % sizeof(*dref) != 0)) { 1590 generic_err(leaf, slot, 1591 "invalid item size, have %u expect aligned to %zu for key type %u", 1592 btrfs_item_size(leaf, slot), 1593 sizeof(*dref), key->type); 1594 return -EUCLEAN; 1595 } 1596 if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) { 1597 generic_err(leaf, slot, 1598 "invalid key objectid for shared block ref, have %llu expect aligned to %u", 1599 key->objectid, leaf->fs_info->sectorsize); 1600 return -EUCLEAN; 1601 } 1602 for (; ptr < end; ptr += sizeof(*dref)) { 1603 u64 offset; 1604 1605 /* 1606 * We cannot check the extent_data_ref hash due to possible 1607 * overflow from the leaf due to hash collisions. 1608 */ 1609 dref = (struct btrfs_extent_data_ref *)ptr; 1610 offset = btrfs_extent_data_ref_offset(leaf, dref); 1611 if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { 1612 extent_err(leaf, slot, 1613 "invalid extent data backref offset, have %llu expect aligned to %u", 1614 offset, leaf->fs_info->sectorsize); 1615 return -EUCLEAN; 1616 } 1617 } 1618 return 0; 1619 } 1620 1621 #define inode_ref_err(eb, slot, fmt, args...) \ 1622 inode_item_err(eb, slot, fmt, ##args) 1623 static int check_inode_ref(struct extent_buffer *leaf, 1624 struct btrfs_key *key, struct btrfs_key *prev_key, 1625 int slot) 1626 { 1627 struct btrfs_inode_ref *iref; 1628 unsigned long ptr; 1629 unsigned long end; 1630 1631 if (unlikely(!check_prev_ino(leaf, key, slot, prev_key))) 1632 return -EUCLEAN; 1633 /* namelen can't be 0, so item_size == sizeof() is also invalid */ 1634 if (unlikely(btrfs_item_size(leaf, slot) <= sizeof(*iref))) { 1635 inode_ref_err(leaf, slot, 1636 "invalid item size, have %u expect (%zu, %u)", 1637 btrfs_item_size(leaf, slot), 1638 sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info)); 1639 return -EUCLEAN; 1640 } 1641 1642 ptr = btrfs_item_ptr_offset(leaf, slot); 1643 end = ptr + btrfs_item_size(leaf, slot); 1644 while (ptr < end) { 1645 u16 namelen; 1646 1647 if (unlikely(ptr + sizeof(iref) > end)) { 1648 inode_ref_err(leaf, slot, 1649 "inode ref overflow, ptr %lu end %lu inode_ref_size %zu", 1650 ptr, end, sizeof(iref)); 1651 return -EUCLEAN; 1652 } 1653 1654 iref = (struct btrfs_inode_ref *)ptr; 1655 namelen = btrfs_inode_ref_name_len(leaf, iref); 1656 if (unlikely(ptr + sizeof(*iref) + namelen > end)) { 1657 inode_ref_err(leaf, slot, 1658 "inode ref overflow, ptr %lu end %lu namelen %u", 1659 ptr, end, namelen); 1660 return -EUCLEAN; 1661 } 1662 1663 /* 1664 * NOTE: In theory we should record all found index numbers 1665 * to find any duplicated indexes, but that will be too time 1666 * consuming for inodes with too many hard links. 1667 */ 1668 ptr += sizeof(*iref) + namelen; 1669 } 1670 return 0; 1671 } 1672 1673 /* 1674 * Common point to switch the item-specific validation. 1675 */ 1676 static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, 1677 struct btrfs_key *key, 1678 int slot, 1679 struct btrfs_key *prev_key) 1680 { 1681 int ret = 0; 1682 struct btrfs_chunk *chunk; 1683 1684 switch (key->type) { 1685 case BTRFS_EXTENT_DATA_KEY: 1686 ret = check_extent_data_item(leaf, key, slot, prev_key); 1687 break; 1688 case BTRFS_EXTENT_CSUM_KEY: 1689 ret = check_csum_item(leaf, key, slot, prev_key); 1690 break; 1691 case BTRFS_DIR_ITEM_KEY: 1692 case BTRFS_DIR_INDEX_KEY: 1693 case BTRFS_XATTR_ITEM_KEY: 1694 ret = check_dir_item(leaf, key, prev_key, slot); 1695 break; 1696 case BTRFS_INODE_REF_KEY: 1697 ret = check_inode_ref(leaf, key, prev_key, slot); 1698 break; 1699 case BTRFS_BLOCK_GROUP_ITEM_KEY: 1700 ret = check_block_group_item(leaf, key, slot); 1701 break; 1702 case BTRFS_CHUNK_ITEM_KEY: 1703 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 1704 ret = check_leaf_chunk_item(leaf, chunk, key, slot); 1705 break; 1706 case BTRFS_DEV_ITEM_KEY: 1707 ret = check_dev_item(leaf, key, slot); 1708 break; 1709 case BTRFS_INODE_ITEM_KEY: 1710 ret = check_inode_item(leaf, key, slot); 1711 break; 1712 case BTRFS_ROOT_ITEM_KEY: 1713 ret = check_root_item(leaf, key, slot); 1714 break; 1715 case BTRFS_EXTENT_ITEM_KEY: 1716 case BTRFS_METADATA_ITEM_KEY: 1717 ret = check_extent_item(leaf, key, slot, prev_key); 1718 break; 1719 case BTRFS_TREE_BLOCK_REF_KEY: 1720 case BTRFS_SHARED_DATA_REF_KEY: 1721 case BTRFS_SHARED_BLOCK_REF_KEY: 1722 ret = check_simple_keyed_refs(leaf, key, slot); 1723 break; 1724 case BTRFS_EXTENT_DATA_REF_KEY: 1725 ret = check_extent_data_ref(leaf, key, slot); 1726 break; 1727 } 1728 1729 if (ret) 1730 return BTRFS_TREE_BLOCK_INVALID_ITEM; 1731 return BTRFS_TREE_BLOCK_CLEAN; 1732 } 1733 1734 enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf) 1735 { 1736 struct btrfs_fs_info *fs_info = leaf->fs_info; 1737 /* No valid key type is 0, so all key should be larger than this key */ 1738 struct btrfs_key prev_key = {0, 0, 0}; 1739 struct btrfs_key key; 1740 u32 nritems = btrfs_header_nritems(leaf); 1741 int slot; 1742 1743 if (unlikely(btrfs_header_level(leaf) != 0)) { 1744 generic_err(leaf, 0, 1745 "invalid level for leaf, have %d expect 0", 1746 btrfs_header_level(leaf)); 1747 return BTRFS_TREE_BLOCK_INVALID_LEVEL; 1748 } 1749 1750 /* 1751 * Extent buffers from a relocation tree have a owner field that 1752 * corresponds to the subvolume tree they are based on. So just from an 1753 * extent buffer alone we can not find out what is the id of the 1754 * corresponding subvolume tree, so we can not figure out if the extent 1755 * buffer corresponds to the root of the relocation tree or not. So 1756 * skip this check for relocation trees. 1757 */ 1758 if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { 1759 u64 owner = btrfs_header_owner(leaf); 1760 1761 /* These trees must never be empty */ 1762 if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID || 1763 owner == BTRFS_CHUNK_TREE_OBJECTID || 1764 owner == BTRFS_DEV_TREE_OBJECTID || 1765 owner == BTRFS_FS_TREE_OBJECTID || 1766 owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) { 1767 generic_err(leaf, 0, 1768 "invalid root, root %llu must never be empty", 1769 owner); 1770 return BTRFS_TREE_BLOCK_INVALID_NRITEMS; 1771 } 1772 1773 /* Unknown tree */ 1774 if (unlikely(owner == 0)) { 1775 generic_err(leaf, 0, 1776 "invalid owner, root 0 is not defined"); 1777 return BTRFS_TREE_BLOCK_INVALID_OWNER; 1778 } 1779 1780 /* EXTENT_TREE_V2 can have empty extent trees. */ 1781 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) 1782 return BTRFS_TREE_BLOCK_CLEAN; 1783 1784 if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) { 1785 generic_err(leaf, 0, 1786 "invalid root, root %llu must never be empty", 1787 owner); 1788 return BTRFS_TREE_BLOCK_INVALID_NRITEMS; 1789 } 1790 1791 return BTRFS_TREE_BLOCK_CLEAN; 1792 } 1793 1794 if (unlikely(nritems == 0)) 1795 return BTRFS_TREE_BLOCK_CLEAN; 1796 1797 /* 1798 * Check the following things to make sure this is a good leaf, and 1799 * leaf users won't need to bother with similar sanity checks: 1800 * 1801 * 1) key ordering 1802 * 2) item offset and size 1803 * No overlap, no hole, all inside the leaf. 1804 * 3) item content 1805 * If possible, do comprehensive sanity check. 1806 * NOTE: All checks must only rely on the item data itself. 1807 */ 1808 for (slot = 0; slot < nritems; slot++) { 1809 u32 item_end_expected; 1810 u64 item_data_end; 1811 1812 btrfs_item_key_to_cpu(leaf, &key, slot); 1813 1814 /* Make sure the keys are in the right order */ 1815 if (unlikely(btrfs_comp_cpu_keys(&prev_key, &key) >= 0)) { 1816 generic_err(leaf, slot, 1817 "bad key order, prev (%llu %u %llu) current (%llu %u %llu)", 1818 prev_key.objectid, prev_key.type, 1819 prev_key.offset, key.objectid, key.type, 1820 key.offset); 1821 return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; 1822 } 1823 1824 item_data_end = (u64)btrfs_item_offset(leaf, slot) + 1825 btrfs_item_size(leaf, slot); 1826 /* 1827 * Make sure the offset and ends are right, remember that the 1828 * item data starts at the end of the leaf and grows towards the 1829 * front. 1830 */ 1831 if (slot == 0) 1832 item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); 1833 else 1834 item_end_expected = btrfs_item_offset(leaf, 1835 slot - 1); 1836 if (unlikely(item_data_end != item_end_expected)) { 1837 generic_err(leaf, slot, 1838 "unexpected item end, have %llu expect %u", 1839 item_data_end, item_end_expected); 1840 return BTRFS_TREE_BLOCK_INVALID_OFFSETS; 1841 } 1842 1843 /* 1844 * Check to make sure that we don't point outside of the leaf, 1845 * just in case all the items are consistent to each other, but 1846 * all point outside of the leaf. 1847 */ 1848 if (unlikely(item_data_end > BTRFS_LEAF_DATA_SIZE(fs_info))) { 1849 generic_err(leaf, slot, 1850 "slot end outside of leaf, have %llu expect range [0, %u]", 1851 item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info)); 1852 return BTRFS_TREE_BLOCK_INVALID_OFFSETS; 1853 } 1854 1855 /* Also check if the item pointer overlaps with btrfs item. */ 1856 if (unlikely(btrfs_item_ptr_offset(leaf, slot) < 1857 btrfs_item_nr_offset(leaf, slot) + sizeof(struct btrfs_item))) { 1858 generic_err(leaf, slot, 1859 "slot overlaps with its data, item end %lu data start %lu", 1860 btrfs_item_nr_offset(leaf, slot) + 1861 sizeof(struct btrfs_item), 1862 btrfs_item_ptr_offset(leaf, slot)); 1863 return BTRFS_TREE_BLOCK_INVALID_OFFSETS; 1864 } 1865 1866 /* 1867 * We only want to do this if WRITTEN is set, otherwise the leaf 1868 * may be in some intermediate state and won't appear valid. 1869 */ 1870 if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) { 1871 enum btrfs_tree_block_status ret; 1872 1873 /* 1874 * Check if the item size and content meet other 1875 * criteria 1876 */ 1877 ret = check_leaf_item(leaf, &key, slot, &prev_key); 1878 if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) 1879 return ret; 1880 } 1881 1882 prev_key.objectid = key.objectid; 1883 prev_key.type = key.type; 1884 prev_key.offset = key.offset; 1885 } 1886 1887 return BTRFS_TREE_BLOCK_CLEAN; 1888 } 1889 1890 int btrfs_check_leaf(struct extent_buffer *leaf) 1891 { 1892 enum btrfs_tree_block_status ret; 1893 1894 ret = __btrfs_check_leaf(leaf); 1895 if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) 1896 return -EUCLEAN; 1897 return 0; 1898 } 1899 ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO); 1900 1901 enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node) 1902 { 1903 struct btrfs_fs_info *fs_info = node->fs_info; 1904 unsigned long nr = btrfs_header_nritems(node); 1905 struct btrfs_key key, next_key; 1906 int slot; 1907 int level = btrfs_header_level(node); 1908 u64 bytenr; 1909 1910 if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) { 1911 generic_err(node, 0, 1912 "invalid level for node, have %d expect [1, %d]", 1913 level, BTRFS_MAX_LEVEL - 1); 1914 return BTRFS_TREE_BLOCK_INVALID_LEVEL; 1915 } 1916 if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) { 1917 btrfs_crit(fs_info, 1918 "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", 1919 btrfs_header_owner(node), node->start, 1920 nr == 0 ? "small" : "large", nr, 1921 BTRFS_NODEPTRS_PER_BLOCK(fs_info)); 1922 return BTRFS_TREE_BLOCK_INVALID_NRITEMS; 1923 } 1924 1925 for (slot = 0; slot < nr - 1; slot++) { 1926 bytenr = btrfs_node_blockptr(node, slot); 1927 btrfs_node_key_to_cpu(node, &key, slot); 1928 btrfs_node_key_to_cpu(node, &next_key, slot + 1); 1929 1930 if (unlikely(!bytenr)) { 1931 generic_err(node, slot, 1932 "invalid NULL node pointer"); 1933 return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR; 1934 } 1935 if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) { 1936 generic_err(node, slot, 1937 "unaligned pointer, have %llu should be aligned to %u", 1938 bytenr, fs_info->sectorsize); 1939 return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR; 1940 } 1941 1942 if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) { 1943 generic_err(node, slot, 1944 "bad key order, current (%llu %u %llu) next (%llu %u %llu)", 1945 key.objectid, key.type, key.offset, 1946 next_key.objectid, next_key.type, 1947 next_key.offset); 1948 return BTRFS_TREE_BLOCK_BAD_KEY_ORDER; 1949 } 1950 } 1951 return BTRFS_TREE_BLOCK_CLEAN; 1952 } 1953 1954 int btrfs_check_node(struct extent_buffer *node) 1955 { 1956 enum btrfs_tree_block_status ret; 1957 1958 ret = __btrfs_check_node(node); 1959 if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) 1960 return -EUCLEAN; 1961 return 0; 1962 } 1963 ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO); 1964 1965 int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner) 1966 { 1967 const bool is_subvol = is_fstree(root_owner); 1968 const u64 eb_owner = btrfs_header_owner(eb); 1969 1970 /* 1971 * Skip dummy fs, as selftests don't create unique ebs for each dummy 1972 * root. 1973 */ 1974 if (test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &eb->fs_info->fs_state)) 1975 return 0; 1976 /* 1977 * There are several call sites (backref walking, qgroup, and data 1978 * reloc) passing 0 as @root_owner, as they are not holding the 1979 * tree root. In that case, we can not do a reliable ownership check, 1980 * so just exit. 1981 */ 1982 if (root_owner == 0) 1983 return 0; 1984 /* 1985 * These trees use key.offset as their owner, our callers don't have 1986 * the extra capacity to pass key.offset here. So we just skip them. 1987 */ 1988 if (root_owner == BTRFS_TREE_LOG_OBJECTID || 1989 root_owner == BTRFS_TREE_RELOC_OBJECTID) 1990 return 0; 1991 1992 if (!is_subvol) { 1993 /* For non-subvolume trees, the eb owner should match root owner */ 1994 if (unlikely(root_owner != eb_owner)) { 1995 btrfs_crit(eb->fs_info, 1996 "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect %llu", 1997 btrfs_header_level(eb) == 0 ? "leaf" : "node", 1998 root_owner, btrfs_header_bytenr(eb), eb_owner, 1999 root_owner); 2000 return -EUCLEAN; 2001 } 2002 return 0; 2003 } 2004 2005 /* 2006 * For subvolume trees, owners can mismatch, but they should all belong 2007 * to subvolume trees. 2008 */ 2009 if (unlikely(is_subvol != is_fstree(eb_owner))) { 2010 btrfs_crit(eb->fs_info, 2011 "corrupted %s, root=%llu block=%llu owner mismatch, have %llu expect [%llu, %llu]", 2012 btrfs_header_level(eb) == 0 ? "leaf" : "node", 2013 root_owner, btrfs_header_bytenr(eb), eb_owner, 2014 BTRFS_FIRST_FREE_OBJECTID, BTRFS_LAST_FREE_OBJECTID); 2015 return -EUCLEAN; 2016 } 2017 return 0; 2018 } 2019 2020 int btrfs_verify_level_key(struct extent_buffer *eb, int level, 2021 struct btrfs_key *first_key, u64 parent_transid) 2022 { 2023 struct btrfs_fs_info *fs_info = eb->fs_info; 2024 int found_level; 2025 struct btrfs_key found_key; 2026 int ret; 2027 2028 found_level = btrfs_header_level(eb); 2029 if (found_level != level) { 2030 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), 2031 KERN_ERR "BTRFS: tree level check failed\n"); 2032 btrfs_err(fs_info, 2033 "tree level mismatch detected, bytenr=%llu level expected=%u has=%u", 2034 eb->start, level, found_level); 2035 return -EIO; 2036 } 2037 2038 if (!first_key) 2039 return 0; 2040 2041 /* 2042 * For live tree block (new tree blocks in current transaction), 2043 * we need proper lock context to avoid race, which is impossible here. 2044 * So we only checks tree blocks which is read from disk, whose 2045 * generation <= fs_info->last_trans_committed. 2046 */ 2047 if (btrfs_header_generation(eb) > fs_info->last_trans_committed) 2048 return 0; 2049 2050 /* We have @first_key, so this @eb must have at least one item */ 2051 if (btrfs_header_nritems(eb) == 0) { 2052 btrfs_err(fs_info, 2053 "invalid tree nritems, bytenr=%llu nritems=0 expect >0", 2054 eb->start); 2055 WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); 2056 return -EUCLEAN; 2057 } 2058 2059 if (found_level) 2060 btrfs_node_key_to_cpu(eb, &found_key, 0); 2061 else 2062 btrfs_item_key_to_cpu(eb, &found_key, 0); 2063 ret = btrfs_comp_cpu_keys(first_key, &found_key); 2064 2065 if (ret) { 2066 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), 2067 KERN_ERR "BTRFS: tree first key check failed\n"); 2068 btrfs_err(fs_info, 2069 "tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", 2070 eb->start, parent_transid, first_key->objectid, 2071 first_key->type, first_key->offset, 2072 found_key.objectid, found_key.type, 2073 found_key.offset); 2074 } 2075 return ret; 2076 } 2077