1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) STRATO AG 2011. All rights reserved. 4 */ 5 6 /* 7 * This module can be used to catch cases when the btrfs kernel 8 * code executes write requests to the disk that bring the file 9 * system in an inconsistent state. In such a state, a power-loss 10 * or kernel panic event would cause that the data on disk is 11 * lost or at least damaged. 12 * 13 * Code is added that examines all block write requests during 14 * runtime (including writes of the super block). Three rules 15 * are verified and an error is printed on violation of the 16 * rules: 17 * 1. It is not allowed to write a disk block which is 18 * currently referenced by the super block (either directly 19 * or indirectly). 20 * 2. When a super block is written, it is verified that all 21 * referenced (directly or indirectly) blocks fulfill the 22 * following requirements: 23 * 2a. All referenced blocks have either been present when 24 * the file system was mounted, (i.e., they have been 25 * referenced by the super block) or they have been 26 * written since then and the write completion callback 27 * was called and no write error was indicated and a 28 * FLUSH request to the device where these blocks are 29 * located was received and completed. 30 * 2b. All referenced blocks need to have a generation 31 * number which is equal to the parent's number. 32 * 33 * One issue that was found using this module was that the log 34 * tree on disk became temporarily corrupted because disk blocks 35 * that had been in use for the log tree had been freed and 36 * reused too early, while being referenced by the written super 37 * block. 38 * 39 * The search term in the kernel log that can be used to filter 40 * on the existence of detected integrity issues is 41 * "btrfs: attempt". 42 * 43 * The integrity check is enabled via mount options. These 44 * mount options are only supported if the integrity check 45 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. 46 * 47 * Example #1, apply integrity checks to all metadata: 48 * mount /dev/sdb1 /mnt -o check_int 49 * 50 * Example #2, apply integrity checks to all metadata and 51 * to data extents: 52 * mount /dev/sdb1 /mnt -o check_int_data 53 * 54 * Example #3, apply integrity checks to all metadata and dump 55 * the tree that the super block references to kernel messages 56 * each time after a super block was written: 57 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 58 * 59 * If the integrity check tool is included and activated in 60 * the mount options, plenty of kernel memory is used, and 61 * plenty of additional CPU cycles are spent. Enabling this 62 * functionality is not intended for normal use. In most 63 * cases, unless you are a btrfs developer who needs to verify 64 * the integrity of (super)-block write requests, do not 65 * enable the config option BTRFS_FS_CHECK_INTEGRITY to 66 * include and compile the integrity check tool. 67 * 68 * Expect millions of lines of information in the kernel log with an 69 * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the 70 * kernel config to at least 26 (which is 64MB). Usually the value is 71 * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be 72 * changed like this before LOG_BUF_SHIFT can be set to a high value: 73 * config LOG_BUF_SHIFT 74 * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" 75 * range 12 30 76 */ 77 78 #include <linux/sched.h> 79 #include <linux/slab.h> 80 #include <linux/mutex.h> 81 #include <linux/genhd.h> 82 #include <linux/blkdev.h> 83 #include <linux/mm.h> 84 #include <linux/string.h> 85 #include <crypto/hash.h> 86 #include "ctree.h" 87 #include "disk-io.h" 88 #include "transaction.h" 89 #include "extent_io.h" 90 #include "volumes.h" 91 #include "print-tree.h" 92 #include "locking.h" 93 #include "check-integrity.h" 94 #include "rcu-string.h" 95 #include "compression.h" 96 97 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 98 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 99 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 100 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 101 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 102 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 103 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 104 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 105 * excluding " [...]" */ 106 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 107 108 /* 109 * The definition of the bitmask fields for the print_mask. 110 * They are specified with the mount option check_integrity_print_mask. 111 */ 112 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 113 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 114 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 115 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 116 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 117 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 118 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 119 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 120 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 121 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 122 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 123 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 124 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 125 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000 126 127 struct btrfsic_dev_state; 128 struct btrfsic_state; 129 130 struct btrfsic_block { 131 u32 magic_num; /* only used for debug purposes */ 132 unsigned int is_metadata:1; /* if it is meta-data, not data-data */ 133 unsigned int is_superblock:1; /* if it is one of the superblocks */ 134 unsigned int is_iodone:1; /* if is done by lower subsystem */ 135 unsigned int iodone_w_error:1; /* error was indicated to endio */ 136 unsigned int never_written:1; /* block was added because it was 137 * referenced, not because it was 138 * written */ 139 unsigned int mirror_num; /* large enough to hold 140 * BTRFS_SUPER_MIRROR_MAX */ 141 struct btrfsic_dev_state *dev_state; 142 u64 dev_bytenr; /* key, physical byte num on disk */ 143 u64 logical_bytenr; /* logical byte num on disk */ 144 u64 generation; 145 struct btrfs_disk_key disk_key; /* extra info to print in case of 146 * issues, will not always be correct */ 147 struct list_head collision_resolving_node; /* list node */ 148 struct list_head all_blocks_node; /* list node */ 149 150 /* the following two lists contain block_link items */ 151 struct list_head ref_to_list; /* list */ 152 struct list_head ref_from_list; /* list */ 153 struct btrfsic_block *next_in_same_bio; 154 void *orig_bio_private; 155 bio_end_io_t *orig_bio_end_io; 156 int submit_bio_bh_rw; 157 u64 flush_gen; /* only valid if !never_written */ 158 }; 159 160 /* 161 * Elements of this type are allocated dynamically and required because 162 * each block object can refer to and can be ref from multiple blocks. 163 * The key to lookup them in the hashtable is the dev_bytenr of 164 * the block ref to plus the one from the block referred from. 165 * The fact that they are searchable via a hashtable and that a 166 * ref_cnt is maintained is not required for the btrfs integrity 167 * check algorithm itself, it is only used to make the output more 168 * beautiful in case that an error is detected (an error is defined 169 * as a write operation to a block while that block is still referenced). 170 */ 171 struct btrfsic_block_link { 172 u32 magic_num; /* only used for debug purposes */ 173 u32 ref_cnt; 174 struct list_head node_ref_to; /* list node */ 175 struct list_head node_ref_from; /* list node */ 176 struct list_head collision_resolving_node; /* list node */ 177 struct btrfsic_block *block_ref_to; 178 struct btrfsic_block *block_ref_from; 179 u64 parent_generation; 180 }; 181 182 struct btrfsic_dev_state { 183 u32 magic_num; /* only used for debug purposes */ 184 struct block_device *bdev; 185 struct btrfsic_state *state; 186 struct list_head collision_resolving_node; /* list node */ 187 struct btrfsic_block dummy_block_for_bio_bh_flush; 188 u64 last_flush_gen; 189 }; 190 191 struct btrfsic_block_hashtable { 192 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; 193 }; 194 195 struct btrfsic_block_link_hashtable { 196 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; 197 }; 198 199 struct btrfsic_dev_state_hashtable { 200 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; 201 }; 202 203 struct btrfsic_block_data_ctx { 204 u64 start; /* virtual bytenr */ 205 u64 dev_bytenr; /* physical bytenr on device */ 206 u32 len; 207 struct btrfsic_dev_state *dev; 208 char **datav; 209 struct page **pagev; 210 void *mem_to_free; 211 }; 212 213 /* This structure is used to implement recursion without occupying 214 * any stack space, refer to btrfsic_process_metablock() */ 215 struct btrfsic_stack_frame { 216 u32 magic; 217 u32 nr; 218 int error; 219 int i; 220 int limit_nesting; 221 int num_copies; 222 int mirror_num; 223 struct btrfsic_block *block; 224 struct btrfsic_block_data_ctx *block_ctx; 225 struct btrfsic_block *next_block; 226 struct btrfsic_block_data_ctx next_block_ctx; 227 struct btrfs_header *hdr; 228 struct btrfsic_stack_frame *prev; 229 }; 230 231 /* Some state per mounted filesystem */ 232 struct btrfsic_state { 233 u32 print_mask; 234 int include_extent_data; 235 struct list_head all_blocks_list; 236 struct btrfsic_block_hashtable block_hashtable; 237 struct btrfsic_block_link_hashtable block_link_hashtable; 238 struct btrfs_fs_info *fs_info; 239 u64 max_superblock_generation; 240 struct btrfsic_block *latest_superblock; 241 u32 metablock_size; 242 u32 datablock_size; 243 }; 244 245 static int btrfsic_process_metablock(struct btrfsic_state *state, 246 struct btrfsic_block *block, 247 struct btrfsic_block_data_ctx *block_ctx, 248 int limit_nesting, int force_iodone_flag); 249 static void btrfsic_read_from_block_data( 250 struct btrfsic_block_data_ctx *block_ctx, 251 void *dst, u32 offset, size_t len); 252 static int btrfsic_create_link_to_next_block( 253 struct btrfsic_state *state, 254 struct btrfsic_block *block, 255 struct btrfsic_block_data_ctx 256 *block_ctx, u64 next_bytenr, 257 int limit_nesting, 258 struct btrfsic_block_data_ctx *next_block_ctx, 259 struct btrfsic_block **next_blockp, 260 int force_iodone_flag, 261 int *num_copiesp, int *mirror_nump, 262 struct btrfs_disk_key *disk_key, 263 u64 parent_generation); 264 static int btrfsic_handle_extent_data(struct btrfsic_state *state, 265 struct btrfsic_block *block, 266 struct btrfsic_block_data_ctx *block_ctx, 267 u32 item_offset, int force_iodone_flag); 268 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 269 struct btrfsic_block_data_ctx *block_ctx_out, 270 int mirror_num); 271 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); 272 static int btrfsic_read_block(struct btrfsic_state *state, 273 struct btrfsic_block_data_ctx *block_ctx); 274 static int btrfsic_process_written_superblock( 275 struct btrfsic_state *state, 276 struct btrfsic_block *const block, 277 struct btrfs_super_block *const super_hdr); 278 static void btrfsic_bio_end_io(struct bio *bp); 279 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, 280 const struct btrfsic_block *block, 281 int recursion_level); 282 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 283 struct btrfsic_block *const block, 284 int recursion_level); 285 static void btrfsic_print_add_link(const struct btrfsic_state *state, 286 const struct btrfsic_block_link *l); 287 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 288 const struct btrfsic_block_link *l); 289 static char btrfsic_get_block_type(const struct btrfsic_state *state, 290 const struct btrfsic_block *block); 291 static void btrfsic_dump_tree(const struct btrfsic_state *state); 292 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 293 const struct btrfsic_block *block, 294 int indent_level); 295 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 296 struct btrfsic_state *state, 297 struct btrfsic_block_data_ctx *next_block_ctx, 298 struct btrfsic_block *next_block, 299 struct btrfsic_block *from_block, 300 u64 parent_generation); 301 static struct btrfsic_block *btrfsic_block_lookup_or_add( 302 struct btrfsic_state *state, 303 struct btrfsic_block_data_ctx *block_ctx, 304 const char *additional_string, 305 int is_metadata, 306 int is_iodone, 307 int never_written, 308 int mirror_num, 309 int *was_created); 310 static int btrfsic_process_superblock_dev_mirror( 311 struct btrfsic_state *state, 312 struct btrfsic_dev_state *dev_state, 313 struct btrfs_device *device, 314 int superblock_mirror_num, 315 struct btrfsic_dev_state **selected_dev_state, 316 struct btrfs_super_block *selected_super); 317 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev); 318 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 319 u64 bytenr, 320 struct btrfsic_dev_state *dev_state, 321 u64 dev_bytenr); 322 323 static struct mutex btrfsic_mutex; 324 static int btrfsic_is_initialized; 325 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; 326 327 328 static void btrfsic_block_init(struct btrfsic_block *b) 329 { 330 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; 331 b->dev_state = NULL; 332 b->dev_bytenr = 0; 333 b->logical_bytenr = 0; 334 b->generation = BTRFSIC_GENERATION_UNKNOWN; 335 b->disk_key.objectid = 0; 336 b->disk_key.type = 0; 337 b->disk_key.offset = 0; 338 b->is_metadata = 0; 339 b->is_superblock = 0; 340 b->is_iodone = 0; 341 b->iodone_w_error = 0; 342 b->never_written = 0; 343 b->mirror_num = 0; 344 b->next_in_same_bio = NULL; 345 b->orig_bio_private = NULL; 346 b->orig_bio_end_io = NULL; 347 INIT_LIST_HEAD(&b->collision_resolving_node); 348 INIT_LIST_HEAD(&b->all_blocks_node); 349 INIT_LIST_HEAD(&b->ref_to_list); 350 INIT_LIST_HEAD(&b->ref_from_list); 351 b->submit_bio_bh_rw = 0; 352 b->flush_gen = 0; 353 } 354 355 static struct btrfsic_block *btrfsic_block_alloc(void) 356 { 357 struct btrfsic_block *b; 358 359 b = kzalloc(sizeof(*b), GFP_NOFS); 360 if (NULL != b) 361 btrfsic_block_init(b); 362 363 return b; 364 } 365 366 static void btrfsic_block_free(struct btrfsic_block *b) 367 { 368 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); 369 kfree(b); 370 } 371 372 static void btrfsic_block_link_init(struct btrfsic_block_link *l) 373 { 374 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; 375 l->ref_cnt = 1; 376 INIT_LIST_HEAD(&l->node_ref_to); 377 INIT_LIST_HEAD(&l->node_ref_from); 378 INIT_LIST_HEAD(&l->collision_resolving_node); 379 l->block_ref_to = NULL; 380 l->block_ref_from = NULL; 381 } 382 383 static struct btrfsic_block_link *btrfsic_block_link_alloc(void) 384 { 385 struct btrfsic_block_link *l; 386 387 l = kzalloc(sizeof(*l), GFP_NOFS); 388 if (NULL != l) 389 btrfsic_block_link_init(l); 390 391 return l; 392 } 393 394 static void btrfsic_block_link_free(struct btrfsic_block_link *l) 395 { 396 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); 397 kfree(l); 398 } 399 400 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) 401 { 402 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; 403 ds->bdev = NULL; 404 ds->state = NULL; 405 INIT_LIST_HEAD(&ds->collision_resolving_node); 406 ds->last_flush_gen = 0; 407 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); 408 ds->dummy_block_for_bio_bh_flush.is_iodone = 1; 409 ds->dummy_block_for_bio_bh_flush.dev_state = ds; 410 } 411 412 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) 413 { 414 struct btrfsic_dev_state *ds; 415 416 ds = kzalloc(sizeof(*ds), GFP_NOFS); 417 if (NULL != ds) 418 btrfsic_dev_state_init(ds); 419 420 return ds; 421 } 422 423 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) 424 { 425 BUG_ON(!(NULL == ds || 426 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); 427 kfree(ds); 428 } 429 430 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) 431 { 432 int i; 433 434 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) 435 INIT_LIST_HEAD(h->table + i); 436 } 437 438 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 439 struct btrfsic_block_hashtable *h) 440 { 441 const unsigned int hashval = 442 (((unsigned int)(b->dev_bytenr >> 16)) ^ 443 ((unsigned int)((uintptr_t)b->dev_state->bdev))) & 444 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 445 446 list_add(&b->collision_resolving_node, h->table + hashval); 447 } 448 449 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) 450 { 451 list_del(&b->collision_resolving_node); 452 } 453 454 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 455 struct block_device *bdev, 456 u64 dev_bytenr, 457 struct btrfsic_block_hashtable *h) 458 { 459 const unsigned int hashval = 460 (((unsigned int)(dev_bytenr >> 16)) ^ 461 ((unsigned int)((uintptr_t)bdev))) & 462 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 463 struct btrfsic_block *b; 464 465 list_for_each_entry(b, h->table + hashval, collision_resolving_node) { 466 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) 467 return b; 468 } 469 470 return NULL; 471 } 472 473 static void btrfsic_block_link_hashtable_init( 474 struct btrfsic_block_link_hashtable *h) 475 { 476 int i; 477 478 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) 479 INIT_LIST_HEAD(h->table + i); 480 } 481 482 static void btrfsic_block_link_hashtable_add( 483 struct btrfsic_block_link *l, 484 struct btrfsic_block_link_hashtable *h) 485 { 486 const unsigned int hashval = 487 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ 488 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ 489 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ 490 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) 491 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 492 493 BUG_ON(NULL == l->block_ref_to); 494 BUG_ON(NULL == l->block_ref_from); 495 list_add(&l->collision_resolving_node, h->table + hashval); 496 } 497 498 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) 499 { 500 list_del(&l->collision_resolving_node); 501 } 502 503 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 504 struct block_device *bdev_ref_to, 505 u64 dev_bytenr_ref_to, 506 struct block_device *bdev_ref_from, 507 u64 dev_bytenr_ref_from, 508 struct btrfsic_block_link_hashtable *h) 509 { 510 const unsigned int hashval = 511 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ 512 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ 513 ((unsigned int)((uintptr_t)bdev_ref_to)) ^ 514 ((unsigned int)((uintptr_t)bdev_ref_from))) & 515 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 516 struct btrfsic_block_link *l; 517 518 list_for_each_entry(l, h->table + hashval, collision_resolving_node) { 519 BUG_ON(NULL == l->block_ref_to); 520 BUG_ON(NULL == l->block_ref_from); 521 if (l->block_ref_to->dev_state->bdev == bdev_ref_to && 522 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && 523 l->block_ref_from->dev_state->bdev == bdev_ref_from && 524 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) 525 return l; 526 } 527 528 return NULL; 529 } 530 531 static void btrfsic_dev_state_hashtable_init( 532 struct btrfsic_dev_state_hashtable *h) 533 { 534 int i; 535 536 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) 537 INIT_LIST_HEAD(h->table + i); 538 } 539 540 static void btrfsic_dev_state_hashtable_add( 541 struct btrfsic_dev_state *ds, 542 struct btrfsic_dev_state_hashtable *h) 543 { 544 const unsigned int hashval = 545 (((unsigned int)((uintptr_t)ds->bdev->bd_dev)) & 546 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 547 548 list_add(&ds->collision_resolving_node, h->table + hashval); 549 } 550 551 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) 552 { 553 list_del(&ds->collision_resolving_node); 554 } 555 556 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, 557 struct btrfsic_dev_state_hashtable *h) 558 { 559 const unsigned int hashval = 560 dev & (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1); 561 struct btrfsic_dev_state *ds; 562 563 list_for_each_entry(ds, h->table + hashval, collision_resolving_node) { 564 if (ds->bdev->bd_dev == dev) 565 return ds; 566 } 567 568 return NULL; 569 } 570 571 static int btrfsic_process_superblock(struct btrfsic_state *state, 572 struct btrfs_fs_devices *fs_devices) 573 { 574 struct btrfs_super_block *selected_super; 575 struct list_head *dev_head = &fs_devices->devices; 576 struct btrfs_device *device; 577 struct btrfsic_dev_state *selected_dev_state = NULL; 578 int ret = 0; 579 int pass; 580 581 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); 582 if (!selected_super) 583 return -ENOMEM; 584 585 list_for_each_entry(device, dev_head, dev_list) { 586 int i; 587 struct btrfsic_dev_state *dev_state; 588 589 if (!device->bdev || !device->name) 590 continue; 591 592 dev_state = btrfsic_dev_state_lookup(device->bdev->bd_dev); 593 BUG_ON(NULL == dev_state); 594 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 595 ret = btrfsic_process_superblock_dev_mirror( 596 state, dev_state, device, i, 597 &selected_dev_state, selected_super); 598 if (0 != ret && 0 == i) { 599 kfree(selected_super); 600 return ret; 601 } 602 } 603 } 604 605 if (NULL == state->latest_superblock) { 606 pr_info("btrfsic: no superblock found!\n"); 607 kfree(selected_super); 608 return -1; 609 } 610 611 for (pass = 0; pass < 3; pass++) { 612 int num_copies; 613 int mirror_num; 614 u64 next_bytenr; 615 616 switch (pass) { 617 case 0: 618 next_bytenr = btrfs_super_root(selected_super); 619 if (state->print_mask & 620 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 621 pr_info("root@%llu\n", next_bytenr); 622 break; 623 case 1: 624 next_bytenr = btrfs_super_chunk_root(selected_super); 625 if (state->print_mask & 626 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 627 pr_info("chunk@%llu\n", next_bytenr); 628 break; 629 case 2: 630 next_bytenr = btrfs_super_log_root(selected_super); 631 if (0 == next_bytenr) 632 continue; 633 if (state->print_mask & 634 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 635 pr_info("log@%llu\n", next_bytenr); 636 break; 637 } 638 639 num_copies = btrfs_num_copies(state->fs_info, next_bytenr, 640 state->metablock_size); 641 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 642 pr_info("num_copies(log_bytenr=%llu) = %d\n", 643 next_bytenr, num_copies); 644 645 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 646 struct btrfsic_block *next_block; 647 struct btrfsic_block_data_ctx tmp_next_block_ctx; 648 struct btrfsic_block_link *l; 649 650 ret = btrfsic_map_block(state, next_bytenr, 651 state->metablock_size, 652 &tmp_next_block_ctx, 653 mirror_num); 654 if (ret) { 655 pr_info("btrfsic: btrfsic_map_block(root @%llu, mirror %d) failed!\n", 656 next_bytenr, mirror_num); 657 kfree(selected_super); 658 return -1; 659 } 660 661 next_block = btrfsic_block_hashtable_lookup( 662 tmp_next_block_ctx.dev->bdev, 663 tmp_next_block_ctx.dev_bytenr, 664 &state->block_hashtable); 665 BUG_ON(NULL == next_block); 666 667 l = btrfsic_block_link_hashtable_lookup( 668 tmp_next_block_ctx.dev->bdev, 669 tmp_next_block_ctx.dev_bytenr, 670 state->latest_superblock->dev_state-> 671 bdev, 672 state->latest_superblock->dev_bytenr, 673 &state->block_link_hashtable); 674 BUG_ON(NULL == l); 675 676 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 677 if (ret < (int)PAGE_SIZE) { 678 pr_info("btrfsic: read @logical %llu failed!\n", 679 tmp_next_block_ctx.start); 680 btrfsic_release_block_ctx(&tmp_next_block_ctx); 681 kfree(selected_super); 682 return -1; 683 } 684 685 ret = btrfsic_process_metablock(state, 686 next_block, 687 &tmp_next_block_ctx, 688 BTRFS_MAX_LEVEL + 3, 1); 689 btrfsic_release_block_ctx(&tmp_next_block_ctx); 690 } 691 } 692 693 kfree(selected_super); 694 return ret; 695 } 696 697 static int btrfsic_process_superblock_dev_mirror( 698 struct btrfsic_state *state, 699 struct btrfsic_dev_state *dev_state, 700 struct btrfs_device *device, 701 int superblock_mirror_num, 702 struct btrfsic_dev_state **selected_dev_state, 703 struct btrfs_super_block *selected_super) 704 { 705 struct btrfs_fs_info *fs_info = state->fs_info; 706 struct btrfs_super_block *super_tmp; 707 u64 dev_bytenr; 708 struct btrfsic_block *superblock_tmp; 709 int pass; 710 struct block_device *const superblock_bdev = device->bdev; 711 struct page *page; 712 struct address_space *mapping = superblock_bdev->bd_inode->i_mapping; 713 int ret = 0; 714 715 /* super block bytenr is always the unmapped device bytenr */ 716 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 717 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes) 718 return -1; 719 720 page = read_cache_page_gfp(mapping, dev_bytenr >> PAGE_SHIFT, GFP_NOFS); 721 if (IS_ERR(page)) 722 return -1; 723 724 super_tmp = page_address(page); 725 726 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 727 btrfs_super_magic(super_tmp) != BTRFS_MAGIC || 728 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 729 btrfs_super_nodesize(super_tmp) != state->metablock_size || 730 btrfs_super_sectorsize(super_tmp) != state->datablock_size) { 731 ret = 0; 732 goto out; 733 } 734 735 superblock_tmp = 736 btrfsic_block_hashtable_lookup(superblock_bdev, 737 dev_bytenr, 738 &state->block_hashtable); 739 if (NULL == superblock_tmp) { 740 superblock_tmp = btrfsic_block_alloc(); 741 if (NULL == superblock_tmp) { 742 ret = -1; 743 goto out; 744 } 745 /* for superblock, only the dev_bytenr makes sense */ 746 superblock_tmp->dev_bytenr = dev_bytenr; 747 superblock_tmp->dev_state = dev_state; 748 superblock_tmp->logical_bytenr = dev_bytenr; 749 superblock_tmp->generation = btrfs_super_generation(super_tmp); 750 superblock_tmp->is_metadata = 1; 751 superblock_tmp->is_superblock = 1; 752 superblock_tmp->is_iodone = 1; 753 superblock_tmp->never_written = 0; 754 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 755 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 756 btrfs_info_in_rcu(fs_info, 757 "new initial S-block (bdev %p, %s) @%llu (%pg/%llu/%d)", 758 superblock_bdev, 759 rcu_str_deref(device->name), dev_bytenr, 760 dev_state->bdev, dev_bytenr, 761 superblock_mirror_num); 762 list_add(&superblock_tmp->all_blocks_node, 763 &state->all_blocks_list); 764 btrfsic_block_hashtable_add(superblock_tmp, 765 &state->block_hashtable); 766 } 767 768 /* select the one with the highest generation field */ 769 if (btrfs_super_generation(super_tmp) > 770 state->max_superblock_generation || 771 0 == state->max_superblock_generation) { 772 memcpy(selected_super, super_tmp, sizeof(*selected_super)); 773 *selected_dev_state = dev_state; 774 state->max_superblock_generation = 775 btrfs_super_generation(super_tmp); 776 state->latest_superblock = superblock_tmp; 777 } 778 779 for (pass = 0; pass < 3; pass++) { 780 u64 next_bytenr; 781 int num_copies; 782 int mirror_num; 783 const char *additional_string = NULL; 784 struct btrfs_disk_key tmp_disk_key; 785 786 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 787 tmp_disk_key.offset = 0; 788 switch (pass) { 789 case 0: 790 btrfs_set_disk_key_objectid(&tmp_disk_key, 791 BTRFS_ROOT_TREE_OBJECTID); 792 additional_string = "initial root "; 793 next_bytenr = btrfs_super_root(super_tmp); 794 break; 795 case 1: 796 btrfs_set_disk_key_objectid(&tmp_disk_key, 797 BTRFS_CHUNK_TREE_OBJECTID); 798 additional_string = "initial chunk "; 799 next_bytenr = btrfs_super_chunk_root(super_tmp); 800 break; 801 case 2: 802 btrfs_set_disk_key_objectid(&tmp_disk_key, 803 BTRFS_TREE_LOG_OBJECTID); 804 additional_string = "initial log "; 805 next_bytenr = btrfs_super_log_root(super_tmp); 806 if (0 == next_bytenr) 807 continue; 808 break; 809 } 810 811 num_copies = btrfs_num_copies(fs_info, next_bytenr, 812 state->metablock_size); 813 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 814 pr_info("num_copies(log_bytenr=%llu) = %d\n", 815 next_bytenr, num_copies); 816 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 817 struct btrfsic_block *next_block; 818 struct btrfsic_block_data_ctx tmp_next_block_ctx; 819 struct btrfsic_block_link *l; 820 821 if (btrfsic_map_block(state, next_bytenr, 822 state->metablock_size, 823 &tmp_next_block_ctx, 824 mirror_num)) { 825 pr_info("btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n", 826 next_bytenr, mirror_num); 827 ret = -1; 828 goto out; 829 } 830 831 next_block = btrfsic_block_lookup_or_add( 832 state, &tmp_next_block_ctx, 833 additional_string, 1, 1, 0, 834 mirror_num, NULL); 835 if (NULL == next_block) { 836 btrfsic_release_block_ctx(&tmp_next_block_ctx); 837 ret = -1; 838 goto out; 839 } 840 841 next_block->disk_key = tmp_disk_key; 842 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 843 l = btrfsic_block_link_lookup_or_add( 844 state, &tmp_next_block_ctx, 845 next_block, superblock_tmp, 846 BTRFSIC_GENERATION_UNKNOWN); 847 btrfsic_release_block_ctx(&tmp_next_block_ctx); 848 if (NULL == l) { 849 ret = -1; 850 goto out; 851 } 852 } 853 } 854 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) 855 btrfsic_dump_tree_sub(state, superblock_tmp, 0); 856 857 out: 858 put_page(page); 859 return ret; 860 } 861 862 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) 863 { 864 struct btrfsic_stack_frame *sf; 865 866 sf = kzalloc(sizeof(*sf), GFP_NOFS); 867 if (sf) 868 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; 869 return sf; 870 } 871 872 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) 873 { 874 BUG_ON(!(NULL == sf || 875 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); 876 kfree(sf); 877 } 878 879 static noinline_for_stack int btrfsic_process_metablock( 880 struct btrfsic_state *state, 881 struct btrfsic_block *const first_block, 882 struct btrfsic_block_data_ctx *const first_block_ctx, 883 int first_limit_nesting, int force_iodone_flag) 884 { 885 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 886 struct btrfsic_stack_frame *sf; 887 struct btrfsic_stack_frame *next_stack; 888 struct btrfs_header *const first_hdr = 889 (struct btrfs_header *)first_block_ctx->datav[0]; 890 891 BUG_ON(!first_hdr); 892 sf = &initial_stack_frame; 893 sf->error = 0; 894 sf->i = -1; 895 sf->limit_nesting = first_limit_nesting; 896 sf->block = first_block; 897 sf->block_ctx = first_block_ctx; 898 sf->next_block = NULL; 899 sf->hdr = first_hdr; 900 sf->prev = NULL; 901 902 continue_with_new_stack_frame: 903 sf->block->generation = btrfs_stack_header_generation(sf->hdr); 904 if (0 == sf->hdr->level) { 905 struct btrfs_leaf *const leafhdr = 906 (struct btrfs_leaf *)sf->hdr; 907 908 if (-1 == sf->i) { 909 sf->nr = btrfs_stack_header_nritems(&leafhdr->header); 910 911 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 912 pr_info("leaf %llu items %d generation %llu owner %llu\n", 913 sf->block_ctx->start, sf->nr, 914 btrfs_stack_header_generation( 915 &leafhdr->header), 916 btrfs_stack_header_owner( 917 &leafhdr->header)); 918 } 919 920 continue_with_current_leaf_stack_frame: 921 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 922 sf->i++; 923 sf->num_copies = 0; 924 } 925 926 if (sf->i < sf->nr) { 927 struct btrfs_item disk_item; 928 u32 disk_item_offset = 929 (uintptr_t)(leafhdr->items + sf->i) - 930 (uintptr_t)leafhdr; 931 struct btrfs_disk_key *disk_key; 932 u8 type; 933 u32 item_offset; 934 u32 item_size; 935 936 if (disk_item_offset + sizeof(struct btrfs_item) > 937 sf->block_ctx->len) { 938 leaf_item_out_of_bounce_error: 939 pr_info( 940 "btrfsic: leaf item out of bounce at logical %llu, dev %pg\n", 941 sf->block_ctx->start, 942 sf->block_ctx->dev->bdev); 943 goto one_stack_frame_backwards; 944 } 945 btrfsic_read_from_block_data(sf->block_ctx, 946 &disk_item, 947 disk_item_offset, 948 sizeof(struct btrfs_item)); 949 item_offset = btrfs_stack_item_offset(&disk_item); 950 item_size = btrfs_stack_item_size(&disk_item); 951 disk_key = &disk_item.key; 952 type = btrfs_disk_key_type(disk_key); 953 954 if (BTRFS_ROOT_ITEM_KEY == type) { 955 struct btrfs_root_item root_item; 956 u32 root_item_offset; 957 u64 next_bytenr; 958 959 root_item_offset = item_offset + 960 offsetof(struct btrfs_leaf, items); 961 if (root_item_offset + item_size > 962 sf->block_ctx->len) 963 goto leaf_item_out_of_bounce_error; 964 btrfsic_read_from_block_data( 965 sf->block_ctx, &root_item, 966 root_item_offset, 967 item_size); 968 next_bytenr = btrfs_root_bytenr(&root_item); 969 970 sf->error = 971 btrfsic_create_link_to_next_block( 972 state, 973 sf->block, 974 sf->block_ctx, 975 next_bytenr, 976 sf->limit_nesting, 977 &sf->next_block_ctx, 978 &sf->next_block, 979 force_iodone_flag, 980 &sf->num_copies, 981 &sf->mirror_num, 982 disk_key, 983 btrfs_root_generation( 984 &root_item)); 985 if (sf->error) 986 goto one_stack_frame_backwards; 987 988 if (NULL != sf->next_block) { 989 struct btrfs_header *const next_hdr = 990 (struct btrfs_header *) 991 sf->next_block_ctx.datav[0]; 992 993 next_stack = 994 btrfsic_stack_frame_alloc(); 995 if (NULL == next_stack) { 996 sf->error = -1; 997 btrfsic_release_block_ctx( 998 &sf-> 999 next_block_ctx); 1000 goto one_stack_frame_backwards; 1001 } 1002 1003 next_stack->i = -1; 1004 next_stack->block = sf->next_block; 1005 next_stack->block_ctx = 1006 &sf->next_block_ctx; 1007 next_stack->next_block = NULL; 1008 next_stack->hdr = next_hdr; 1009 next_stack->limit_nesting = 1010 sf->limit_nesting - 1; 1011 next_stack->prev = sf; 1012 sf = next_stack; 1013 goto continue_with_new_stack_frame; 1014 } 1015 } else if (BTRFS_EXTENT_DATA_KEY == type && 1016 state->include_extent_data) { 1017 sf->error = btrfsic_handle_extent_data( 1018 state, 1019 sf->block, 1020 sf->block_ctx, 1021 item_offset, 1022 force_iodone_flag); 1023 if (sf->error) 1024 goto one_stack_frame_backwards; 1025 } 1026 1027 goto continue_with_current_leaf_stack_frame; 1028 } 1029 } else { 1030 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1031 1032 if (-1 == sf->i) { 1033 sf->nr = btrfs_stack_header_nritems(&nodehdr->header); 1034 1035 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1036 pr_info("node %llu level %d items %d generation %llu owner %llu\n", 1037 sf->block_ctx->start, 1038 nodehdr->header.level, sf->nr, 1039 btrfs_stack_header_generation( 1040 &nodehdr->header), 1041 btrfs_stack_header_owner( 1042 &nodehdr->header)); 1043 } 1044 1045 continue_with_current_node_stack_frame: 1046 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1047 sf->i++; 1048 sf->num_copies = 0; 1049 } 1050 1051 if (sf->i < sf->nr) { 1052 struct btrfs_key_ptr key_ptr; 1053 u32 key_ptr_offset; 1054 u64 next_bytenr; 1055 1056 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - 1057 (uintptr_t)nodehdr; 1058 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > 1059 sf->block_ctx->len) { 1060 pr_info( 1061 "btrfsic: node item out of bounce at logical %llu, dev %pg\n", 1062 sf->block_ctx->start, 1063 sf->block_ctx->dev->bdev); 1064 goto one_stack_frame_backwards; 1065 } 1066 btrfsic_read_from_block_data( 1067 sf->block_ctx, &key_ptr, key_ptr_offset, 1068 sizeof(struct btrfs_key_ptr)); 1069 next_bytenr = btrfs_stack_key_blockptr(&key_ptr); 1070 1071 sf->error = btrfsic_create_link_to_next_block( 1072 state, 1073 sf->block, 1074 sf->block_ctx, 1075 next_bytenr, 1076 sf->limit_nesting, 1077 &sf->next_block_ctx, 1078 &sf->next_block, 1079 force_iodone_flag, 1080 &sf->num_copies, 1081 &sf->mirror_num, 1082 &key_ptr.key, 1083 btrfs_stack_key_generation(&key_ptr)); 1084 if (sf->error) 1085 goto one_stack_frame_backwards; 1086 1087 if (NULL != sf->next_block) { 1088 struct btrfs_header *const next_hdr = 1089 (struct btrfs_header *) 1090 sf->next_block_ctx.datav[0]; 1091 1092 next_stack = btrfsic_stack_frame_alloc(); 1093 if (NULL == next_stack) { 1094 sf->error = -1; 1095 goto one_stack_frame_backwards; 1096 } 1097 1098 next_stack->i = -1; 1099 next_stack->block = sf->next_block; 1100 next_stack->block_ctx = &sf->next_block_ctx; 1101 next_stack->next_block = NULL; 1102 next_stack->hdr = next_hdr; 1103 next_stack->limit_nesting = 1104 sf->limit_nesting - 1; 1105 next_stack->prev = sf; 1106 sf = next_stack; 1107 goto continue_with_new_stack_frame; 1108 } 1109 1110 goto continue_with_current_node_stack_frame; 1111 } 1112 } 1113 1114 one_stack_frame_backwards: 1115 if (NULL != sf->prev) { 1116 struct btrfsic_stack_frame *const prev = sf->prev; 1117 1118 /* the one for the initial block is freed in the caller */ 1119 btrfsic_release_block_ctx(sf->block_ctx); 1120 1121 if (sf->error) { 1122 prev->error = sf->error; 1123 btrfsic_stack_frame_free(sf); 1124 sf = prev; 1125 goto one_stack_frame_backwards; 1126 } 1127 1128 btrfsic_stack_frame_free(sf); 1129 sf = prev; 1130 goto continue_with_new_stack_frame; 1131 } else { 1132 BUG_ON(&initial_stack_frame != sf); 1133 } 1134 1135 return sf->error; 1136 } 1137 1138 static void btrfsic_read_from_block_data( 1139 struct btrfsic_block_data_ctx *block_ctx, 1140 void *dstv, u32 offset, size_t len) 1141 { 1142 size_t cur; 1143 size_t pgoff; 1144 char *kaddr; 1145 char *dst = (char *)dstv; 1146 size_t start_offset = offset_in_page(block_ctx->start); 1147 unsigned long i = (start_offset + offset) >> PAGE_SHIFT; 1148 1149 WARN_ON(offset + len > block_ctx->len); 1150 pgoff = offset_in_page(start_offset + offset); 1151 1152 while (len > 0) { 1153 cur = min(len, ((size_t)PAGE_SIZE - pgoff)); 1154 BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE)); 1155 kaddr = block_ctx->datav[i]; 1156 memcpy(dst, kaddr + pgoff, cur); 1157 1158 dst += cur; 1159 len -= cur; 1160 pgoff = 0; 1161 i++; 1162 } 1163 } 1164 1165 static int btrfsic_create_link_to_next_block( 1166 struct btrfsic_state *state, 1167 struct btrfsic_block *block, 1168 struct btrfsic_block_data_ctx *block_ctx, 1169 u64 next_bytenr, 1170 int limit_nesting, 1171 struct btrfsic_block_data_ctx *next_block_ctx, 1172 struct btrfsic_block **next_blockp, 1173 int force_iodone_flag, 1174 int *num_copiesp, int *mirror_nump, 1175 struct btrfs_disk_key *disk_key, 1176 u64 parent_generation) 1177 { 1178 struct btrfs_fs_info *fs_info = state->fs_info; 1179 struct btrfsic_block *next_block = NULL; 1180 int ret; 1181 struct btrfsic_block_link *l; 1182 int did_alloc_block_link; 1183 int block_was_created; 1184 1185 *next_blockp = NULL; 1186 if (0 == *num_copiesp) { 1187 *num_copiesp = btrfs_num_copies(fs_info, next_bytenr, 1188 state->metablock_size); 1189 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1190 pr_info("num_copies(log_bytenr=%llu) = %d\n", 1191 next_bytenr, *num_copiesp); 1192 *mirror_nump = 1; 1193 } 1194 1195 if (*mirror_nump > *num_copiesp) 1196 return 0; 1197 1198 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1199 pr_info("btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1200 *mirror_nump); 1201 ret = btrfsic_map_block(state, next_bytenr, 1202 state->metablock_size, 1203 next_block_ctx, *mirror_nump); 1204 if (ret) { 1205 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1206 next_bytenr, *mirror_nump); 1207 btrfsic_release_block_ctx(next_block_ctx); 1208 *next_blockp = NULL; 1209 return -1; 1210 } 1211 1212 next_block = btrfsic_block_lookup_or_add(state, 1213 next_block_ctx, "referenced ", 1214 1, force_iodone_flag, 1215 !force_iodone_flag, 1216 *mirror_nump, 1217 &block_was_created); 1218 if (NULL == next_block) { 1219 btrfsic_release_block_ctx(next_block_ctx); 1220 *next_blockp = NULL; 1221 return -1; 1222 } 1223 if (block_was_created) { 1224 l = NULL; 1225 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 1226 } else { 1227 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) { 1228 if (next_block->logical_bytenr != next_bytenr && 1229 !(!next_block->is_metadata && 1230 0 == next_block->logical_bytenr)) 1231 pr_info( 1232 "referenced block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n", 1233 next_bytenr, next_block_ctx->dev->bdev, 1234 next_block_ctx->dev_bytenr, *mirror_nump, 1235 btrfsic_get_block_type(state, 1236 next_block), 1237 next_block->logical_bytenr); 1238 else 1239 pr_info( 1240 "referenced block @%llu (%pg/%llu/%d) found in hash table, %c\n", 1241 next_bytenr, next_block_ctx->dev->bdev, 1242 next_block_ctx->dev_bytenr, *mirror_nump, 1243 btrfsic_get_block_type(state, 1244 next_block)); 1245 } 1246 next_block->logical_bytenr = next_bytenr; 1247 1248 next_block->mirror_num = *mirror_nump; 1249 l = btrfsic_block_link_hashtable_lookup( 1250 next_block_ctx->dev->bdev, 1251 next_block_ctx->dev_bytenr, 1252 block_ctx->dev->bdev, 1253 block_ctx->dev_bytenr, 1254 &state->block_link_hashtable); 1255 } 1256 1257 next_block->disk_key = *disk_key; 1258 if (NULL == l) { 1259 l = btrfsic_block_link_alloc(); 1260 if (NULL == l) { 1261 btrfsic_release_block_ctx(next_block_ctx); 1262 *next_blockp = NULL; 1263 return -1; 1264 } 1265 1266 did_alloc_block_link = 1; 1267 l->block_ref_to = next_block; 1268 l->block_ref_from = block; 1269 l->ref_cnt = 1; 1270 l->parent_generation = parent_generation; 1271 1272 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1273 btrfsic_print_add_link(state, l); 1274 1275 list_add(&l->node_ref_to, &block->ref_to_list); 1276 list_add(&l->node_ref_from, &next_block->ref_from_list); 1277 1278 btrfsic_block_link_hashtable_add(l, 1279 &state->block_link_hashtable); 1280 } else { 1281 did_alloc_block_link = 0; 1282 if (0 == limit_nesting) { 1283 l->ref_cnt++; 1284 l->parent_generation = parent_generation; 1285 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1286 btrfsic_print_add_link(state, l); 1287 } 1288 } 1289 1290 if (limit_nesting > 0 && did_alloc_block_link) { 1291 ret = btrfsic_read_block(state, next_block_ctx); 1292 if (ret < (int)next_block_ctx->len) { 1293 pr_info("btrfsic: read block @logical %llu failed!\n", 1294 next_bytenr); 1295 btrfsic_release_block_ctx(next_block_ctx); 1296 *next_blockp = NULL; 1297 return -1; 1298 } 1299 1300 *next_blockp = next_block; 1301 } else { 1302 *next_blockp = NULL; 1303 } 1304 (*mirror_nump)++; 1305 1306 return 0; 1307 } 1308 1309 static int btrfsic_handle_extent_data( 1310 struct btrfsic_state *state, 1311 struct btrfsic_block *block, 1312 struct btrfsic_block_data_ctx *block_ctx, 1313 u32 item_offset, int force_iodone_flag) 1314 { 1315 struct btrfs_fs_info *fs_info = state->fs_info; 1316 struct btrfs_file_extent_item file_extent_item; 1317 u64 file_extent_item_offset; 1318 u64 next_bytenr; 1319 u64 num_bytes; 1320 u64 generation; 1321 struct btrfsic_block_link *l; 1322 int ret; 1323 1324 file_extent_item_offset = offsetof(struct btrfs_leaf, items) + 1325 item_offset; 1326 if (file_extent_item_offset + 1327 offsetof(struct btrfs_file_extent_item, disk_num_bytes) > 1328 block_ctx->len) { 1329 pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n", 1330 block_ctx->start, block_ctx->dev->bdev); 1331 return -1; 1332 } 1333 1334 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1335 file_extent_item_offset, 1336 offsetof(struct btrfs_file_extent_item, disk_num_bytes)); 1337 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1338 btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) { 1339 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1340 pr_info("extent_data: type %u, disk_bytenr = %llu\n", 1341 file_extent_item.type, 1342 btrfs_stack_file_extent_disk_bytenr( 1343 &file_extent_item)); 1344 return 0; 1345 } 1346 1347 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > 1348 block_ctx->len) { 1349 pr_info("btrfsic: file item out of bounce at logical %llu, dev %pg\n", 1350 block_ctx->start, block_ctx->dev->bdev); 1351 return -1; 1352 } 1353 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1354 file_extent_item_offset, 1355 sizeof(struct btrfs_file_extent_item)); 1356 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item); 1357 if (btrfs_stack_file_extent_compression(&file_extent_item) == 1358 BTRFS_COMPRESS_NONE) { 1359 next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item); 1360 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item); 1361 } else { 1362 num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item); 1363 } 1364 generation = btrfs_stack_file_extent_generation(&file_extent_item); 1365 1366 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1367 pr_info("extent_data: type %u, disk_bytenr = %llu, offset = %llu, num_bytes = %llu\n", 1368 file_extent_item.type, 1369 btrfs_stack_file_extent_disk_bytenr(&file_extent_item), 1370 btrfs_stack_file_extent_offset(&file_extent_item), 1371 num_bytes); 1372 while (num_bytes > 0) { 1373 u32 chunk_len; 1374 int num_copies; 1375 int mirror_num; 1376 1377 if (num_bytes > state->datablock_size) 1378 chunk_len = state->datablock_size; 1379 else 1380 chunk_len = num_bytes; 1381 1382 num_copies = btrfs_num_copies(fs_info, next_bytenr, 1383 state->datablock_size); 1384 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1385 pr_info("num_copies(log_bytenr=%llu) = %d\n", 1386 next_bytenr, num_copies); 1387 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1388 struct btrfsic_block_data_ctx next_block_ctx; 1389 struct btrfsic_block *next_block; 1390 int block_was_created; 1391 1392 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1393 pr_info("btrfsic_handle_extent_data(mirror_num=%d)\n", 1394 mirror_num); 1395 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1396 pr_info("\tdisk_bytenr = %llu, num_bytes %u\n", 1397 next_bytenr, chunk_len); 1398 ret = btrfsic_map_block(state, next_bytenr, 1399 chunk_len, &next_block_ctx, 1400 mirror_num); 1401 if (ret) { 1402 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1403 next_bytenr, mirror_num); 1404 return -1; 1405 } 1406 1407 next_block = btrfsic_block_lookup_or_add( 1408 state, 1409 &next_block_ctx, 1410 "referenced ", 1411 0, 1412 force_iodone_flag, 1413 !force_iodone_flag, 1414 mirror_num, 1415 &block_was_created); 1416 if (NULL == next_block) { 1417 btrfsic_release_block_ctx(&next_block_ctx); 1418 return -1; 1419 } 1420 if (!block_was_created) { 1421 if ((state->print_mask & 1422 BTRFSIC_PRINT_MASK_VERBOSE) && 1423 next_block->logical_bytenr != next_bytenr && 1424 !(!next_block->is_metadata && 1425 0 == next_block->logical_bytenr)) { 1426 pr_info( 1427 "referenced block @%llu (%pg/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu)\n", 1428 next_bytenr, 1429 next_block_ctx.dev->bdev, 1430 next_block_ctx.dev_bytenr, 1431 mirror_num, 1432 next_block->logical_bytenr); 1433 } 1434 next_block->logical_bytenr = next_bytenr; 1435 next_block->mirror_num = mirror_num; 1436 } 1437 1438 l = btrfsic_block_link_lookup_or_add(state, 1439 &next_block_ctx, 1440 next_block, block, 1441 generation); 1442 btrfsic_release_block_ctx(&next_block_ctx); 1443 if (NULL == l) 1444 return -1; 1445 } 1446 1447 next_bytenr += chunk_len; 1448 num_bytes -= chunk_len; 1449 } 1450 1451 return 0; 1452 } 1453 1454 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 1455 struct btrfsic_block_data_ctx *block_ctx_out, 1456 int mirror_num) 1457 { 1458 struct btrfs_fs_info *fs_info = state->fs_info; 1459 int ret; 1460 u64 length; 1461 struct btrfs_io_context *multi = NULL; 1462 struct btrfs_device *device; 1463 1464 length = len; 1465 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, 1466 bytenr, &length, &multi, mirror_num); 1467 1468 if (ret) { 1469 block_ctx_out->start = 0; 1470 block_ctx_out->dev_bytenr = 0; 1471 block_ctx_out->len = 0; 1472 block_ctx_out->dev = NULL; 1473 block_ctx_out->datav = NULL; 1474 block_ctx_out->pagev = NULL; 1475 block_ctx_out->mem_to_free = NULL; 1476 1477 return ret; 1478 } 1479 1480 device = multi->stripes[0].dev; 1481 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) || 1482 !device->bdev || !device->name) 1483 block_ctx_out->dev = NULL; 1484 else 1485 block_ctx_out->dev = btrfsic_dev_state_lookup( 1486 device->bdev->bd_dev); 1487 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1488 block_ctx_out->start = bytenr; 1489 block_ctx_out->len = len; 1490 block_ctx_out->datav = NULL; 1491 block_ctx_out->pagev = NULL; 1492 block_ctx_out->mem_to_free = NULL; 1493 1494 kfree(multi); 1495 if (NULL == block_ctx_out->dev) { 1496 ret = -ENXIO; 1497 pr_info("btrfsic: error, cannot lookup dev (#1)!\n"); 1498 } 1499 1500 return ret; 1501 } 1502 1503 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1504 { 1505 if (block_ctx->mem_to_free) { 1506 unsigned int num_pages; 1507 1508 BUG_ON(!block_ctx->datav); 1509 BUG_ON(!block_ctx->pagev); 1510 num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >> 1511 PAGE_SHIFT; 1512 /* Pages must be unmapped in reverse order */ 1513 while (num_pages > 0) { 1514 num_pages--; 1515 if (block_ctx->datav[num_pages]) 1516 block_ctx->datav[num_pages] = NULL; 1517 if (block_ctx->pagev[num_pages]) { 1518 __free_page(block_ctx->pagev[num_pages]); 1519 block_ctx->pagev[num_pages] = NULL; 1520 } 1521 } 1522 1523 kfree(block_ctx->mem_to_free); 1524 block_ctx->mem_to_free = NULL; 1525 block_ctx->pagev = NULL; 1526 block_ctx->datav = NULL; 1527 } 1528 } 1529 1530 static int btrfsic_read_block(struct btrfsic_state *state, 1531 struct btrfsic_block_data_ctx *block_ctx) 1532 { 1533 unsigned int num_pages; 1534 unsigned int i; 1535 size_t size; 1536 u64 dev_bytenr; 1537 int ret; 1538 1539 BUG_ON(block_ctx->datav); 1540 BUG_ON(block_ctx->pagev); 1541 BUG_ON(block_ctx->mem_to_free); 1542 if (!PAGE_ALIGNED(block_ctx->dev_bytenr)) { 1543 pr_info("btrfsic: read_block() with unaligned bytenr %llu\n", 1544 block_ctx->dev_bytenr); 1545 return -1; 1546 } 1547 1548 num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >> 1549 PAGE_SHIFT; 1550 size = sizeof(*block_ctx->datav) + sizeof(*block_ctx->pagev); 1551 block_ctx->mem_to_free = kcalloc(num_pages, size, GFP_NOFS); 1552 if (!block_ctx->mem_to_free) 1553 return -ENOMEM; 1554 block_ctx->datav = block_ctx->mem_to_free; 1555 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); 1556 for (i = 0; i < num_pages; i++) { 1557 block_ctx->pagev[i] = alloc_page(GFP_NOFS); 1558 if (!block_ctx->pagev[i]) 1559 return -1; 1560 } 1561 1562 dev_bytenr = block_ctx->dev_bytenr; 1563 for (i = 0; i < num_pages;) { 1564 struct bio *bio; 1565 unsigned int j; 1566 1567 bio = btrfs_bio_alloc(num_pages - i); 1568 bio_set_dev(bio, block_ctx->dev->bdev); 1569 bio->bi_iter.bi_sector = dev_bytenr >> 9; 1570 bio->bi_opf = REQ_OP_READ; 1571 1572 for (j = i; j < num_pages; j++) { 1573 ret = bio_add_page(bio, block_ctx->pagev[j], 1574 PAGE_SIZE, 0); 1575 if (PAGE_SIZE != ret) 1576 break; 1577 } 1578 if (j == i) { 1579 pr_info("btrfsic: error, failed to add a single page!\n"); 1580 return -1; 1581 } 1582 if (submit_bio_wait(bio)) { 1583 pr_info("btrfsic: read error at logical %llu dev %pg!\n", 1584 block_ctx->start, block_ctx->dev->bdev); 1585 bio_put(bio); 1586 return -1; 1587 } 1588 bio_put(bio); 1589 dev_bytenr += (j - i) * PAGE_SIZE; 1590 i = j; 1591 } 1592 for (i = 0; i < num_pages; i++) 1593 block_ctx->datav[i] = page_address(block_ctx->pagev[i]); 1594 1595 return block_ctx->len; 1596 } 1597 1598 static void btrfsic_dump_database(struct btrfsic_state *state) 1599 { 1600 const struct btrfsic_block *b_all; 1601 1602 BUG_ON(NULL == state); 1603 1604 pr_info("all_blocks_list:\n"); 1605 list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) { 1606 const struct btrfsic_block_link *l; 1607 1608 pr_info("%c-block @%llu (%pg/%llu/%d)\n", 1609 btrfsic_get_block_type(state, b_all), 1610 b_all->logical_bytenr, b_all->dev_state->bdev, 1611 b_all->dev_bytenr, b_all->mirror_num); 1612 1613 list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) { 1614 pr_info( 1615 " %c @%llu (%pg/%llu/%d) refers %u* to %c @%llu (%pg/%llu/%d)\n", 1616 btrfsic_get_block_type(state, b_all), 1617 b_all->logical_bytenr, b_all->dev_state->bdev, 1618 b_all->dev_bytenr, b_all->mirror_num, 1619 l->ref_cnt, 1620 btrfsic_get_block_type(state, l->block_ref_to), 1621 l->block_ref_to->logical_bytenr, 1622 l->block_ref_to->dev_state->bdev, 1623 l->block_ref_to->dev_bytenr, 1624 l->block_ref_to->mirror_num); 1625 } 1626 1627 list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) { 1628 pr_info( 1629 " %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n", 1630 btrfsic_get_block_type(state, b_all), 1631 b_all->logical_bytenr, b_all->dev_state->bdev, 1632 b_all->dev_bytenr, b_all->mirror_num, 1633 l->ref_cnt, 1634 btrfsic_get_block_type(state, l->block_ref_from), 1635 l->block_ref_from->logical_bytenr, 1636 l->block_ref_from->dev_state->bdev, 1637 l->block_ref_from->dev_bytenr, 1638 l->block_ref_from->mirror_num); 1639 } 1640 1641 pr_info("\n"); 1642 } 1643 } 1644 1645 /* 1646 * Test whether the disk block contains a tree block (leaf or node) 1647 * (note that this test fails for the super block) 1648 */ 1649 static noinline_for_stack int btrfsic_test_for_metadata( 1650 struct btrfsic_state *state, 1651 char **datav, unsigned int num_pages) 1652 { 1653 struct btrfs_fs_info *fs_info = state->fs_info; 1654 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 1655 struct btrfs_header *h; 1656 u8 csum[BTRFS_CSUM_SIZE]; 1657 unsigned int i; 1658 1659 if (num_pages * PAGE_SIZE < state->metablock_size) 1660 return 1; /* not metadata */ 1661 num_pages = state->metablock_size >> PAGE_SHIFT; 1662 h = (struct btrfs_header *)datav[0]; 1663 1664 if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE)) 1665 return 1; 1666 1667 shash->tfm = fs_info->csum_shash; 1668 crypto_shash_init(shash); 1669 1670 for (i = 0; i < num_pages; i++) { 1671 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); 1672 size_t sublen = i ? PAGE_SIZE : 1673 (PAGE_SIZE - BTRFS_CSUM_SIZE); 1674 1675 crypto_shash_update(shash, data, sublen); 1676 } 1677 crypto_shash_final(shash, csum); 1678 if (memcmp(csum, h->csum, fs_info->csum_size)) 1679 return 1; 1680 1681 return 0; /* is metadata */ 1682 } 1683 1684 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1685 u64 dev_bytenr, char **mapped_datav, 1686 unsigned int num_pages, 1687 struct bio *bio, int *bio_is_patched, 1688 int submit_bio_bh_rw) 1689 { 1690 int is_metadata; 1691 struct btrfsic_block *block; 1692 struct btrfsic_block_data_ctx block_ctx; 1693 int ret; 1694 struct btrfsic_state *state = dev_state->state; 1695 struct block_device *bdev = dev_state->bdev; 1696 unsigned int processed_len; 1697 1698 if (NULL != bio_is_patched) 1699 *bio_is_patched = 0; 1700 1701 again: 1702 if (num_pages == 0) 1703 return; 1704 1705 processed_len = 0; 1706 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, 1707 num_pages)); 1708 1709 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1710 &state->block_hashtable); 1711 if (NULL != block) { 1712 u64 bytenr = 0; 1713 struct btrfsic_block_link *l, *tmp; 1714 1715 if (block->is_superblock) { 1716 bytenr = btrfs_super_bytenr((struct btrfs_super_block *) 1717 mapped_datav[0]); 1718 if (num_pages * PAGE_SIZE < 1719 BTRFS_SUPER_INFO_SIZE) { 1720 pr_info("btrfsic: cannot work with too short bios!\n"); 1721 return; 1722 } 1723 is_metadata = 1; 1724 BUG_ON(!PAGE_ALIGNED(BTRFS_SUPER_INFO_SIZE)); 1725 processed_len = BTRFS_SUPER_INFO_SIZE; 1726 if (state->print_mask & 1727 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1728 pr_info("[before new superblock is written]:\n"); 1729 btrfsic_dump_tree_sub(state, block, 0); 1730 } 1731 } 1732 if (is_metadata) { 1733 if (!block->is_superblock) { 1734 if (num_pages * PAGE_SIZE < 1735 state->metablock_size) { 1736 pr_info("btrfsic: cannot work with too short bios!\n"); 1737 return; 1738 } 1739 processed_len = state->metablock_size; 1740 bytenr = btrfs_stack_header_bytenr( 1741 (struct btrfs_header *) 1742 mapped_datav[0]); 1743 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1744 dev_state, 1745 dev_bytenr); 1746 } 1747 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) { 1748 if (block->logical_bytenr != bytenr && 1749 !(!block->is_metadata && 1750 block->logical_bytenr == 0)) 1751 pr_info( 1752 "written block @%llu (%pg/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu)\n", 1753 bytenr, dev_state->bdev, 1754 dev_bytenr, 1755 block->mirror_num, 1756 btrfsic_get_block_type(state, 1757 block), 1758 block->logical_bytenr); 1759 else 1760 pr_info( 1761 "written block @%llu (%pg/%llu/%d) found in hash table, %c\n", 1762 bytenr, dev_state->bdev, 1763 dev_bytenr, block->mirror_num, 1764 btrfsic_get_block_type(state, 1765 block)); 1766 } 1767 block->logical_bytenr = bytenr; 1768 } else { 1769 if (num_pages * PAGE_SIZE < 1770 state->datablock_size) { 1771 pr_info("btrfsic: cannot work with too short bios!\n"); 1772 return; 1773 } 1774 processed_len = state->datablock_size; 1775 bytenr = block->logical_bytenr; 1776 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1777 pr_info( 1778 "written block @%llu (%pg/%llu/%d) found in hash table, %c\n", 1779 bytenr, dev_state->bdev, dev_bytenr, 1780 block->mirror_num, 1781 btrfsic_get_block_type(state, block)); 1782 } 1783 1784 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1785 pr_info("ref_to_list: %cE, ref_from_list: %cE\n", 1786 list_empty(&block->ref_to_list) ? ' ' : '!', 1787 list_empty(&block->ref_from_list) ? ' ' : '!'); 1788 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { 1789 pr_info( 1790 "btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n", 1791 btrfsic_get_block_type(state, block), bytenr, 1792 dev_state->bdev, dev_bytenr, block->mirror_num, 1793 block->generation, 1794 btrfs_disk_key_objectid(&block->disk_key), 1795 block->disk_key.type, 1796 btrfs_disk_key_offset(&block->disk_key), 1797 btrfs_stack_header_generation( 1798 (struct btrfs_header *) mapped_datav[0]), 1799 state->max_superblock_generation); 1800 btrfsic_dump_tree(state); 1801 } 1802 1803 if (!block->is_iodone && !block->never_written) { 1804 pr_info( 1805 "btrfs: attempt to overwrite %c-block @%llu (%pg/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n", 1806 btrfsic_get_block_type(state, block), bytenr, 1807 dev_state->bdev, dev_bytenr, block->mirror_num, 1808 block->generation, 1809 btrfs_stack_header_generation( 1810 (struct btrfs_header *) 1811 mapped_datav[0])); 1812 /* it would not be safe to go on */ 1813 btrfsic_dump_tree(state); 1814 goto continue_loop; 1815 } 1816 1817 /* 1818 * Clear all references of this block. Do not free 1819 * the block itself even if is not referenced anymore 1820 * because it still carries valuable information 1821 * like whether it was ever written and IO completed. 1822 */ 1823 list_for_each_entry_safe(l, tmp, &block->ref_to_list, 1824 node_ref_to) { 1825 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1826 btrfsic_print_rem_link(state, l); 1827 l->ref_cnt--; 1828 if (0 == l->ref_cnt) { 1829 list_del(&l->node_ref_to); 1830 list_del(&l->node_ref_from); 1831 btrfsic_block_link_hashtable_remove(l); 1832 btrfsic_block_link_free(l); 1833 } 1834 } 1835 1836 block_ctx.dev = dev_state; 1837 block_ctx.dev_bytenr = dev_bytenr; 1838 block_ctx.start = bytenr; 1839 block_ctx.len = processed_len; 1840 block_ctx.pagev = NULL; 1841 block_ctx.mem_to_free = NULL; 1842 block_ctx.datav = mapped_datav; 1843 1844 if (is_metadata || state->include_extent_data) { 1845 block->never_written = 0; 1846 block->iodone_w_error = 0; 1847 if (NULL != bio) { 1848 block->is_iodone = 0; 1849 BUG_ON(NULL == bio_is_patched); 1850 if (!*bio_is_patched) { 1851 block->orig_bio_private = 1852 bio->bi_private; 1853 block->orig_bio_end_io = 1854 bio->bi_end_io; 1855 block->next_in_same_bio = NULL; 1856 bio->bi_private = block; 1857 bio->bi_end_io = btrfsic_bio_end_io; 1858 *bio_is_patched = 1; 1859 } else { 1860 struct btrfsic_block *chained_block = 1861 (struct btrfsic_block *) 1862 bio->bi_private; 1863 1864 BUG_ON(NULL == chained_block); 1865 block->orig_bio_private = 1866 chained_block->orig_bio_private; 1867 block->orig_bio_end_io = 1868 chained_block->orig_bio_end_io; 1869 block->next_in_same_bio = chained_block; 1870 bio->bi_private = block; 1871 } 1872 } else { 1873 block->is_iodone = 1; 1874 block->orig_bio_private = NULL; 1875 block->orig_bio_end_io = NULL; 1876 block->next_in_same_bio = NULL; 1877 } 1878 } 1879 1880 block->flush_gen = dev_state->last_flush_gen + 1; 1881 block->submit_bio_bh_rw = submit_bio_bh_rw; 1882 if (is_metadata) { 1883 block->logical_bytenr = bytenr; 1884 block->is_metadata = 1; 1885 if (block->is_superblock) { 1886 BUG_ON(PAGE_SIZE != 1887 BTRFS_SUPER_INFO_SIZE); 1888 ret = btrfsic_process_written_superblock( 1889 state, 1890 block, 1891 (struct btrfs_super_block *) 1892 mapped_datav[0]); 1893 if (state->print_mask & 1894 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 1895 pr_info("[after new superblock is written]:\n"); 1896 btrfsic_dump_tree_sub(state, block, 0); 1897 } 1898 } else { 1899 block->mirror_num = 0; /* unknown */ 1900 ret = btrfsic_process_metablock( 1901 state, 1902 block, 1903 &block_ctx, 1904 0, 0); 1905 } 1906 if (ret) 1907 pr_info("btrfsic: btrfsic_process_metablock(root @%llu) failed!\n", 1908 dev_bytenr); 1909 } else { 1910 block->is_metadata = 0; 1911 block->mirror_num = 0; /* unknown */ 1912 block->generation = BTRFSIC_GENERATION_UNKNOWN; 1913 if (!state->include_extent_data 1914 && list_empty(&block->ref_from_list)) { 1915 /* 1916 * disk block is overwritten with extent 1917 * data (not meta data) and we are configured 1918 * to not include extent data: take the 1919 * chance and free the block's memory 1920 */ 1921 btrfsic_block_hashtable_remove(block); 1922 list_del(&block->all_blocks_node); 1923 btrfsic_block_free(block); 1924 } 1925 } 1926 btrfsic_release_block_ctx(&block_ctx); 1927 } else { 1928 /* block has not been found in hash table */ 1929 u64 bytenr; 1930 1931 if (!is_metadata) { 1932 processed_len = state->datablock_size; 1933 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1934 pr_info( 1935 "written block (%pg/%llu/?) !found in hash table, D\n", 1936 dev_state->bdev, dev_bytenr); 1937 if (!state->include_extent_data) { 1938 /* ignore that written D block */ 1939 goto continue_loop; 1940 } 1941 1942 /* this is getting ugly for the 1943 * include_extent_data case... */ 1944 bytenr = 0; /* unknown */ 1945 } else { 1946 processed_len = state->metablock_size; 1947 bytenr = btrfs_stack_header_bytenr( 1948 (struct btrfs_header *) 1949 mapped_datav[0]); 1950 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 1951 dev_bytenr); 1952 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1953 pr_info( 1954 "written block @%llu (%pg/%llu/?) !found in hash table, M\n", 1955 bytenr, dev_state->bdev, dev_bytenr); 1956 } 1957 1958 block_ctx.dev = dev_state; 1959 block_ctx.dev_bytenr = dev_bytenr; 1960 block_ctx.start = bytenr; 1961 block_ctx.len = processed_len; 1962 block_ctx.pagev = NULL; 1963 block_ctx.mem_to_free = NULL; 1964 block_ctx.datav = mapped_datav; 1965 1966 block = btrfsic_block_alloc(); 1967 if (NULL == block) { 1968 btrfsic_release_block_ctx(&block_ctx); 1969 goto continue_loop; 1970 } 1971 block->dev_state = dev_state; 1972 block->dev_bytenr = dev_bytenr; 1973 block->logical_bytenr = bytenr; 1974 block->is_metadata = is_metadata; 1975 block->never_written = 0; 1976 block->iodone_w_error = 0; 1977 block->mirror_num = 0; /* unknown */ 1978 block->flush_gen = dev_state->last_flush_gen + 1; 1979 block->submit_bio_bh_rw = submit_bio_bh_rw; 1980 if (NULL != bio) { 1981 block->is_iodone = 0; 1982 BUG_ON(NULL == bio_is_patched); 1983 if (!*bio_is_patched) { 1984 block->orig_bio_private = bio->bi_private; 1985 block->orig_bio_end_io = bio->bi_end_io; 1986 block->next_in_same_bio = NULL; 1987 bio->bi_private = block; 1988 bio->bi_end_io = btrfsic_bio_end_io; 1989 *bio_is_patched = 1; 1990 } else { 1991 struct btrfsic_block *chained_block = 1992 (struct btrfsic_block *) 1993 bio->bi_private; 1994 1995 BUG_ON(NULL == chained_block); 1996 block->orig_bio_private = 1997 chained_block->orig_bio_private; 1998 block->orig_bio_end_io = 1999 chained_block->orig_bio_end_io; 2000 block->next_in_same_bio = chained_block; 2001 bio->bi_private = block; 2002 } 2003 } else { 2004 block->is_iodone = 1; 2005 block->orig_bio_private = NULL; 2006 block->orig_bio_end_io = NULL; 2007 block->next_in_same_bio = NULL; 2008 } 2009 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2010 pr_info("new written %c-block @%llu (%pg/%llu/%d)\n", 2011 is_metadata ? 'M' : 'D', 2012 block->logical_bytenr, block->dev_state->bdev, 2013 block->dev_bytenr, block->mirror_num); 2014 list_add(&block->all_blocks_node, &state->all_blocks_list); 2015 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2016 2017 if (is_metadata) { 2018 ret = btrfsic_process_metablock(state, block, 2019 &block_ctx, 0, 0); 2020 if (ret) 2021 pr_info("btrfsic: process_metablock(root @%llu) failed!\n", 2022 dev_bytenr); 2023 } 2024 btrfsic_release_block_ctx(&block_ctx); 2025 } 2026 2027 continue_loop: 2028 BUG_ON(!processed_len); 2029 dev_bytenr += processed_len; 2030 mapped_datav += processed_len >> PAGE_SHIFT; 2031 num_pages -= processed_len >> PAGE_SHIFT; 2032 goto again; 2033 } 2034 2035 static void btrfsic_bio_end_io(struct bio *bp) 2036 { 2037 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; 2038 int iodone_w_error; 2039 2040 /* mutex is not held! This is not save if IO is not yet completed 2041 * on umount */ 2042 iodone_w_error = 0; 2043 if (bp->bi_status) 2044 iodone_w_error = 1; 2045 2046 BUG_ON(NULL == block); 2047 bp->bi_private = block->orig_bio_private; 2048 bp->bi_end_io = block->orig_bio_end_io; 2049 2050 do { 2051 struct btrfsic_block *next_block; 2052 struct btrfsic_dev_state *const dev_state = block->dev_state; 2053 2054 if ((dev_state->state->print_mask & 2055 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2056 pr_info("bio_end_io(err=%d) for %c @%llu (%pg/%llu/%d)\n", 2057 bp->bi_status, 2058 btrfsic_get_block_type(dev_state->state, block), 2059 block->logical_bytenr, dev_state->bdev, 2060 block->dev_bytenr, block->mirror_num); 2061 next_block = block->next_in_same_bio; 2062 block->iodone_w_error = iodone_w_error; 2063 if (block->submit_bio_bh_rw & REQ_PREFLUSH) { 2064 dev_state->last_flush_gen++; 2065 if ((dev_state->state->print_mask & 2066 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2067 pr_info("bio_end_io() new %pg flush_gen=%llu\n", 2068 dev_state->bdev, 2069 dev_state->last_flush_gen); 2070 } 2071 if (block->submit_bio_bh_rw & REQ_FUA) 2072 block->flush_gen = 0; /* FUA completed means block is 2073 * on disk */ 2074 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2075 block = next_block; 2076 } while (NULL != block); 2077 2078 bp->bi_end_io(bp); 2079 } 2080 2081 static int btrfsic_process_written_superblock( 2082 struct btrfsic_state *state, 2083 struct btrfsic_block *const superblock, 2084 struct btrfs_super_block *const super_hdr) 2085 { 2086 struct btrfs_fs_info *fs_info = state->fs_info; 2087 int pass; 2088 2089 superblock->generation = btrfs_super_generation(super_hdr); 2090 if (!(superblock->generation > state->max_superblock_generation || 2091 0 == state->max_superblock_generation)) { 2092 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2093 pr_info( 2094 "btrfsic: superblock @%llu (%pg/%llu/%d) with old gen %llu <= %llu\n", 2095 superblock->logical_bytenr, 2096 superblock->dev_state->bdev, 2097 superblock->dev_bytenr, superblock->mirror_num, 2098 btrfs_super_generation(super_hdr), 2099 state->max_superblock_generation); 2100 } else { 2101 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2102 pr_info( 2103 "btrfsic: got new superblock @%llu (%pg/%llu/%d) with new gen %llu > %llu\n", 2104 superblock->logical_bytenr, 2105 superblock->dev_state->bdev, 2106 superblock->dev_bytenr, superblock->mirror_num, 2107 btrfs_super_generation(super_hdr), 2108 state->max_superblock_generation); 2109 2110 state->max_superblock_generation = 2111 btrfs_super_generation(super_hdr); 2112 state->latest_superblock = superblock; 2113 } 2114 2115 for (pass = 0; pass < 3; pass++) { 2116 int ret; 2117 u64 next_bytenr; 2118 struct btrfsic_block *next_block; 2119 struct btrfsic_block_data_ctx tmp_next_block_ctx; 2120 struct btrfsic_block_link *l; 2121 int num_copies; 2122 int mirror_num; 2123 const char *additional_string = NULL; 2124 struct btrfs_disk_key tmp_disk_key = {0}; 2125 2126 btrfs_set_disk_key_objectid(&tmp_disk_key, 2127 BTRFS_ROOT_ITEM_KEY); 2128 btrfs_set_disk_key_objectid(&tmp_disk_key, 0); 2129 2130 switch (pass) { 2131 case 0: 2132 btrfs_set_disk_key_objectid(&tmp_disk_key, 2133 BTRFS_ROOT_TREE_OBJECTID); 2134 additional_string = "root "; 2135 next_bytenr = btrfs_super_root(super_hdr); 2136 if (state->print_mask & 2137 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2138 pr_info("root@%llu\n", next_bytenr); 2139 break; 2140 case 1: 2141 btrfs_set_disk_key_objectid(&tmp_disk_key, 2142 BTRFS_CHUNK_TREE_OBJECTID); 2143 additional_string = "chunk "; 2144 next_bytenr = btrfs_super_chunk_root(super_hdr); 2145 if (state->print_mask & 2146 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2147 pr_info("chunk@%llu\n", next_bytenr); 2148 break; 2149 case 2: 2150 btrfs_set_disk_key_objectid(&tmp_disk_key, 2151 BTRFS_TREE_LOG_OBJECTID); 2152 additional_string = "log "; 2153 next_bytenr = btrfs_super_log_root(super_hdr); 2154 if (0 == next_bytenr) 2155 continue; 2156 if (state->print_mask & 2157 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2158 pr_info("log@%llu\n", next_bytenr); 2159 break; 2160 } 2161 2162 num_copies = btrfs_num_copies(fs_info, next_bytenr, 2163 BTRFS_SUPER_INFO_SIZE); 2164 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2165 pr_info("num_copies(log_bytenr=%llu) = %d\n", 2166 next_bytenr, num_copies); 2167 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2168 int was_created; 2169 2170 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2171 pr_info("btrfsic_process_written_superblock(mirror_num=%d)\n", mirror_num); 2172 ret = btrfsic_map_block(state, next_bytenr, 2173 BTRFS_SUPER_INFO_SIZE, 2174 &tmp_next_block_ctx, 2175 mirror_num); 2176 if (ret) { 2177 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 2178 next_bytenr, mirror_num); 2179 return -1; 2180 } 2181 2182 next_block = btrfsic_block_lookup_or_add( 2183 state, 2184 &tmp_next_block_ctx, 2185 additional_string, 2186 1, 0, 1, 2187 mirror_num, 2188 &was_created); 2189 if (NULL == next_block) { 2190 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2191 return -1; 2192 } 2193 2194 next_block->disk_key = tmp_disk_key; 2195 if (was_created) 2196 next_block->generation = 2197 BTRFSIC_GENERATION_UNKNOWN; 2198 l = btrfsic_block_link_lookup_or_add( 2199 state, 2200 &tmp_next_block_ctx, 2201 next_block, 2202 superblock, 2203 BTRFSIC_GENERATION_UNKNOWN); 2204 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2205 if (NULL == l) 2206 return -1; 2207 } 2208 } 2209 2210 if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0))) 2211 btrfsic_dump_tree(state); 2212 2213 return 0; 2214 } 2215 2216 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 2217 struct btrfsic_block *const block, 2218 int recursion_level) 2219 { 2220 const struct btrfsic_block_link *l; 2221 int ret = 0; 2222 2223 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2224 /* 2225 * Note that this situation can happen and does not 2226 * indicate an error in regular cases. It happens 2227 * when disk blocks are freed and later reused. 2228 * The check-integrity module is not aware of any 2229 * block free operations, it just recognizes block 2230 * write operations. Therefore it keeps the linkage 2231 * information for a block until a block is 2232 * rewritten. This can temporarily cause incorrect 2233 * and even circular linkage information. This 2234 * causes no harm unless such blocks are referenced 2235 * by the most recent super block. 2236 */ 2237 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2238 pr_info("btrfsic: abort cyclic linkage (case 1).\n"); 2239 2240 return ret; 2241 } 2242 2243 /* 2244 * This algorithm is recursive because the amount of used stack 2245 * space is very small and the max recursion depth is limited. 2246 */ 2247 list_for_each_entry(l, &block->ref_to_list, node_ref_to) { 2248 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2249 pr_info( 2250 "rl=%d, %c @%llu (%pg/%llu/%d) %u* refers to %c @%llu (%pg/%llu/%d)\n", 2251 recursion_level, 2252 btrfsic_get_block_type(state, block), 2253 block->logical_bytenr, block->dev_state->bdev, 2254 block->dev_bytenr, block->mirror_num, 2255 l->ref_cnt, 2256 btrfsic_get_block_type(state, l->block_ref_to), 2257 l->block_ref_to->logical_bytenr, 2258 l->block_ref_to->dev_state->bdev, 2259 l->block_ref_to->dev_bytenr, 2260 l->block_ref_to->mirror_num); 2261 if (l->block_ref_to->never_written) { 2262 pr_info( 2263 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is never written!\n", 2264 btrfsic_get_block_type(state, l->block_ref_to), 2265 l->block_ref_to->logical_bytenr, 2266 l->block_ref_to->dev_state->bdev, 2267 l->block_ref_to->dev_bytenr, 2268 l->block_ref_to->mirror_num); 2269 ret = -1; 2270 } else if (!l->block_ref_to->is_iodone) { 2271 pr_info( 2272 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not yet iodone!\n", 2273 btrfsic_get_block_type(state, l->block_ref_to), 2274 l->block_ref_to->logical_bytenr, 2275 l->block_ref_to->dev_state->bdev, 2276 l->block_ref_to->dev_bytenr, 2277 l->block_ref_to->mirror_num); 2278 ret = -1; 2279 } else if (l->block_ref_to->iodone_w_error) { 2280 pr_info( 2281 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which has write error!\n", 2282 btrfsic_get_block_type(state, l->block_ref_to), 2283 l->block_ref_to->logical_bytenr, 2284 l->block_ref_to->dev_state->bdev, 2285 l->block_ref_to->dev_bytenr, 2286 l->block_ref_to->mirror_num); 2287 ret = -1; 2288 } else if (l->parent_generation != 2289 l->block_ref_to->generation && 2290 BTRFSIC_GENERATION_UNKNOWN != 2291 l->parent_generation && 2292 BTRFSIC_GENERATION_UNKNOWN != 2293 l->block_ref_to->generation) { 2294 pr_info( 2295 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) with generation %llu != parent generation %llu!\n", 2296 btrfsic_get_block_type(state, l->block_ref_to), 2297 l->block_ref_to->logical_bytenr, 2298 l->block_ref_to->dev_state->bdev, 2299 l->block_ref_to->dev_bytenr, 2300 l->block_ref_to->mirror_num, 2301 l->block_ref_to->generation, 2302 l->parent_generation); 2303 ret = -1; 2304 } else if (l->block_ref_to->flush_gen > 2305 l->block_ref_to->dev_state->last_flush_gen) { 2306 pr_info( 2307 "btrfs: attempt to write superblock which references block %c @%llu (%pg/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n", 2308 btrfsic_get_block_type(state, l->block_ref_to), 2309 l->block_ref_to->logical_bytenr, 2310 l->block_ref_to->dev_state->bdev, 2311 l->block_ref_to->dev_bytenr, 2312 l->block_ref_to->mirror_num, block->flush_gen, 2313 l->block_ref_to->dev_state->last_flush_gen); 2314 ret = -1; 2315 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2316 l->block_ref_to, 2317 recursion_level + 2318 1)) { 2319 ret = -1; 2320 } 2321 } 2322 2323 return ret; 2324 } 2325 2326 static int btrfsic_is_block_ref_by_superblock( 2327 const struct btrfsic_state *state, 2328 const struct btrfsic_block *block, 2329 int recursion_level) 2330 { 2331 const struct btrfsic_block_link *l; 2332 2333 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2334 /* refer to comment at "abort cyclic linkage (case 1)" */ 2335 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2336 pr_info("btrfsic: abort cyclic linkage (case 2).\n"); 2337 2338 return 0; 2339 } 2340 2341 /* 2342 * This algorithm is recursive because the amount of used stack space 2343 * is very small and the max recursion depth is limited. 2344 */ 2345 list_for_each_entry(l, &block->ref_from_list, node_ref_from) { 2346 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2347 pr_info( 2348 "rl=%d, %c @%llu (%pg/%llu/%d) is ref %u* from %c @%llu (%pg/%llu/%d)\n", 2349 recursion_level, 2350 btrfsic_get_block_type(state, block), 2351 block->logical_bytenr, block->dev_state->bdev, 2352 block->dev_bytenr, block->mirror_num, 2353 l->ref_cnt, 2354 btrfsic_get_block_type(state, l->block_ref_from), 2355 l->block_ref_from->logical_bytenr, 2356 l->block_ref_from->dev_state->bdev, 2357 l->block_ref_from->dev_bytenr, 2358 l->block_ref_from->mirror_num); 2359 if (l->block_ref_from->is_superblock && 2360 state->latest_superblock->dev_bytenr == 2361 l->block_ref_from->dev_bytenr && 2362 state->latest_superblock->dev_state->bdev == 2363 l->block_ref_from->dev_state->bdev) 2364 return 1; 2365 else if (btrfsic_is_block_ref_by_superblock(state, 2366 l->block_ref_from, 2367 recursion_level + 2368 1)) 2369 return 1; 2370 } 2371 2372 return 0; 2373 } 2374 2375 static void btrfsic_print_add_link(const struct btrfsic_state *state, 2376 const struct btrfsic_block_link *l) 2377 { 2378 pr_info("add %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n", 2379 l->ref_cnt, 2380 btrfsic_get_block_type(state, l->block_ref_from), 2381 l->block_ref_from->logical_bytenr, 2382 l->block_ref_from->dev_state->bdev, 2383 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, 2384 btrfsic_get_block_type(state, l->block_ref_to), 2385 l->block_ref_to->logical_bytenr, 2386 l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, 2387 l->block_ref_to->mirror_num); 2388 } 2389 2390 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 2391 const struct btrfsic_block_link *l) 2392 { 2393 pr_info("rem %u* link from %c @%llu (%pg/%llu/%d) to %c @%llu (%pg/%llu/%d)\n", 2394 l->ref_cnt, 2395 btrfsic_get_block_type(state, l->block_ref_from), 2396 l->block_ref_from->logical_bytenr, 2397 l->block_ref_from->dev_state->bdev, 2398 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, 2399 btrfsic_get_block_type(state, l->block_ref_to), 2400 l->block_ref_to->logical_bytenr, 2401 l->block_ref_to->dev_state->bdev, l->block_ref_to->dev_bytenr, 2402 l->block_ref_to->mirror_num); 2403 } 2404 2405 static char btrfsic_get_block_type(const struct btrfsic_state *state, 2406 const struct btrfsic_block *block) 2407 { 2408 if (block->is_superblock && 2409 state->latest_superblock->dev_bytenr == block->dev_bytenr && 2410 state->latest_superblock->dev_state->bdev == block->dev_state->bdev) 2411 return 'S'; 2412 else if (block->is_superblock) 2413 return 's'; 2414 else if (block->is_metadata) 2415 return 'M'; 2416 else 2417 return 'D'; 2418 } 2419 2420 static void btrfsic_dump_tree(const struct btrfsic_state *state) 2421 { 2422 btrfsic_dump_tree_sub(state, state->latest_superblock, 0); 2423 } 2424 2425 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 2426 const struct btrfsic_block *block, 2427 int indent_level) 2428 { 2429 const struct btrfsic_block_link *l; 2430 int indent_add; 2431 static char buf[80]; 2432 int cursor_position; 2433 2434 /* 2435 * Should better fill an on-stack buffer with a complete line and 2436 * dump it at once when it is time to print a newline character. 2437 */ 2438 2439 /* 2440 * This algorithm is recursive because the amount of used stack space 2441 * is very small and the max recursion depth is limited. 2442 */ 2443 indent_add = sprintf(buf, "%c-%llu(%pg/%llu/%u)", 2444 btrfsic_get_block_type(state, block), 2445 block->logical_bytenr, block->dev_state->bdev, 2446 block->dev_bytenr, block->mirror_num); 2447 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2448 printk("[...]\n"); 2449 return; 2450 } 2451 printk(buf); 2452 indent_level += indent_add; 2453 if (list_empty(&block->ref_to_list)) { 2454 printk("\n"); 2455 return; 2456 } 2457 if (block->mirror_num > 1 && 2458 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { 2459 printk(" [...]\n"); 2460 return; 2461 } 2462 2463 cursor_position = indent_level; 2464 list_for_each_entry(l, &block->ref_to_list, node_ref_to) { 2465 while (cursor_position < indent_level) { 2466 printk(" "); 2467 cursor_position++; 2468 } 2469 if (l->ref_cnt > 1) 2470 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); 2471 else 2472 indent_add = sprintf(buf, " --> "); 2473 if (indent_level + indent_add > 2474 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2475 printk("[...]\n"); 2476 cursor_position = 0; 2477 continue; 2478 } 2479 2480 printk(buf); 2481 2482 btrfsic_dump_tree_sub(state, l->block_ref_to, 2483 indent_level + indent_add); 2484 cursor_position = 0; 2485 } 2486 } 2487 2488 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 2489 struct btrfsic_state *state, 2490 struct btrfsic_block_data_ctx *next_block_ctx, 2491 struct btrfsic_block *next_block, 2492 struct btrfsic_block *from_block, 2493 u64 parent_generation) 2494 { 2495 struct btrfsic_block_link *l; 2496 2497 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, 2498 next_block_ctx->dev_bytenr, 2499 from_block->dev_state->bdev, 2500 from_block->dev_bytenr, 2501 &state->block_link_hashtable); 2502 if (NULL == l) { 2503 l = btrfsic_block_link_alloc(); 2504 if (!l) 2505 return NULL; 2506 2507 l->block_ref_to = next_block; 2508 l->block_ref_from = from_block; 2509 l->ref_cnt = 1; 2510 l->parent_generation = parent_generation; 2511 2512 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2513 btrfsic_print_add_link(state, l); 2514 2515 list_add(&l->node_ref_to, &from_block->ref_to_list); 2516 list_add(&l->node_ref_from, &next_block->ref_from_list); 2517 2518 btrfsic_block_link_hashtable_add(l, 2519 &state->block_link_hashtable); 2520 } else { 2521 l->ref_cnt++; 2522 l->parent_generation = parent_generation; 2523 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2524 btrfsic_print_add_link(state, l); 2525 } 2526 2527 return l; 2528 } 2529 2530 static struct btrfsic_block *btrfsic_block_lookup_or_add( 2531 struct btrfsic_state *state, 2532 struct btrfsic_block_data_ctx *block_ctx, 2533 const char *additional_string, 2534 int is_metadata, 2535 int is_iodone, 2536 int never_written, 2537 int mirror_num, 2538 int *was_created) 2539 { 2540 struct btrfsic_block *block; 2541 2542 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, 2543 block_ctx->dev_bytenr, 2544 &state->block_hashtable); 2545 if (NULL == block) { 2546 struct btrfsic_dev_state *dev_state; 2547 2548 block = btrfsic_block_alloc(); 2549 if (!block) 2550 return NULL; 2551 2552 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev); 2553 if (NULL == dev_state) { 2554 pr_info("btrfsic: error, lookup dev_state failed!\n"); 2555 btrfsic_block_free(block); 2556 return NULL; 2557 } 2558 block->dev_state = dev_state; 2559 block->dev_bytenr = block_ctx->dev_bytenr; 2560 block->logical_bytenr = block_ctx->start; 2561 block->is_metadata = is_metadata; 2562 block->is_iodone = is_iodone; 2563 block->never_written = never_written; 2564 block->mirror_num = mirror_num; 2565 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2566 pr_info("New %s%c-block @%llu (%pg/%llu/%d)\n", 2567 additional_string, 2568 btrfsic_get_block_type(state, block), 2569 block->logical_bytenr, dev_state->bdev, 2570 block->dev_bytenr, mirror_num); 2571 list_add(&block->all_blocks_node, &state->all_blocks_list); 2572 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2573 if (NULL != was_created) 2574 *was_created = 1; 2575 } else { 2576 if (NULL != was_created) 2577 *was_created = 0; 2578 } 2579 2580 return block; 2581 } 2582 2583 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2584 u64 bytenr, 2585 struct btrfsic_dev_state *dev_state, 2586 u64 dev_bytenr) 2587 { 2588 struct btrfs_fs_info *fs_info = state->fs_info; 2589 struct btrfsic_block_data_ctx block_ctx; 2590 int num_copies; 2591 int mirror_num; 2592 int match = 0; 2593 int ret; 2594 2595 num_copies = btrfs_num_copies(fs_info, bytenr, state->metablock_size); 2596 2597 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2598 ret = btrfsic_map_block(state, bytenr, state->metablock_size, 2599 &block_ctx, mirror_num); 2600 if (ret) { 2601 pr_info("btrfsic: btrfsic_map_block(logical @%llu, mirror %d) failed!\n", 2602 bytenr, mirror_num); 2603 continue; 2604 } 2605 2606 if (dev_state->bdev == block_ctx.dev->bdev && 2607 dev_bytenr == block_ctx.dev_bytenr) { 2608 match++; 2609 btrfsic_release_block_ctx(&block_ctx); 2610 break; 2611 } 2612 btrfsic_release_block_ctx(&block_ctx); 2613 } 2614 2615 if (WARN_ON(!match)) { 2616 pr_info( 2617 "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%pg, phys_bytenr=%llu)!\n", 2618 bytenr, dev_state->bdev, dev_bytenr); 2619 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2620 ret = btrfsic_map_block(state, bytenr, 2621 state->metablock_size, 2622 &block_ctx, mirror_num); 2623 if (ret) 2624 continue; 2625 2626 pr_info("read logical bytenr @%llu maps to (%pg/%llu/%d)\n", 2627 bytenr, block_ctx.dev->bdev, 2628 block_ctx.dev_bytenr, mirror_num); 2629 } 2630 } 2631 } 2632 2633 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev) 2634 { 2635 return btrfsic_dev_state_hashtable_lookup(dev, 2636 &btrfsic_dev_state_hashtable); 2637 } 2638 2639 static void __btrfsic_submit_bio(struct bio *bio) 2640 { 2641 struct btrfsic_dev_state *dev_state; 2642 2643 if (!btrfsic_is_initialized) 2644 return; 2645 2646 mutex_lock(&btrfsic_mutex); 2647 /* since btrfsic_submit_bio() is also called before 2648 * btrfsic_mount(), this might return NULL */ 2649 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev->bd_dev); 2650 if (NULL != dev_state && 2651 (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { 2652 int i = 0; 2653 u64 dev_bytenr; 2654 u64 cur_bytenr; 2655 struct bio_vec bvec; 2656 struct bvec_iter iter; 2657 int bio_is_patched; 2658 char **mapped_datav; 2659 unsigned int segs = bio_segments(bio); 2660 2661 dev_bytenr = 512 * bio->bi_iter.bi_sector; 2662 bio_is_patched = 0; 2663 if (dev_state->state->print_mask & 2664 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 2665 pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n", 2666 bio_op(bio), bio->bi_opf, segs, 2667 bio->bi_iter.bi_sector, dev_bytenr, bio->bi_bdev); 2668 2669 mapped_datav = kmalloc_array(segs, 2670 sizeof(*mapped_datav), GFP_NOFS); 2671 if (!mapped_datav) 2672 goto leave; 2673 cur_bytenr = dev_bytenr; 2674 2675 bio_for_each_segment(bvec, bio, iter) { 2676 BUG_ON(bvec.bv_len != PAGE_SIZE); 2677 mapped_datav[i] = page_address(bvec.bv_page); 2678 i++; 2679 2680 if (dev_state->state->print_mask & 2681 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) 2682 pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n", 2683 i, cur_bytenr, bvec.bv_len, bvec.bv_offset); 2684 cur_bytenr += bvec.bv_len; 2685 } 2686 btrfsic_process_written_block(dev_state, dev_bytenr, 2687 mapped_datav, segs, 2688 bio, &bio_is_patched, 2689 bio->bi_opf); 2690 kfree(mapped_datav); 2691 } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) { 2692 if (dev_state->state->print_mask & 2693 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 2694 pr_info("submit_bio(rw=%d,0x%x FLUSH, bdev=%p)\n", 2695 bio_op(bio), bio->bi_opf, bio->bi_bdev); 2696 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 2697 if ((dev_state->state->print_mask & 2698 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 2699 BTRFSIC_PRINT_MASK_VERBOSE))) 2700 pr_info( 2701 "btrfsic_submit_bio(%pg) with FLUSH but dummy block already in use (ignored)!\n", 2702 dev_state->bdev); 2703 } else { 2704 struct btrfsic_block *const block = 2705 &dev_state->dummy_block_for_bio_bh_flush; 2706 2707 block->is_iodone = 0; 2708 block->never_written = 0; 2709 block->iodone_w_error = 0; 2710 block->flush_gen = dev_state->last_flush_gen + 1; 2711 block->submit_bio_bh_rw = bio->bi_opf; 2712 block->orig_bio_private = bio->bi_private; 2713 block->orig_bio_end_io = bio->bi_end_io; 2714 block->next_in_same_bio = NULL; 2715 bio->bi_private = block; 2716 bio->bi_end_io = btrfsic_bio_end_io; 2717 } 2718 } 2719 leave: 2720 mutex_unlock(&btrfsic_mutex); 2721 } 2722 2723 void btrfsic_submit_bio(struct bio *bio) 2724 { 2725 __btrfsic_submit_bio(bio); 2726 submit_bio(bio); 2727 } 2728 2729 int btrfsic_submit_bio_wait(struct bio *bio) 2730 { 2731 __btrfsic_submit_bio(bio); 2732 return submit_bio_wait(bio); 2733 } 2734 2735 int btrfsic_mount(struct btrfs_fs_info *fs_info, 2736 struct btrfs_fs_devices *fs_devices, 2737 int including_extent_data, u32 print_mask) 2738 { 2739 int ret; 2740 struct btrfsic_state *state; 2741 struct list_head *dev_head = &fs_devices->devices; 2742 struct btrfs_device *device; 2743 2744 if (!PAGE_ALIGNED(fs_info->nodesize)) { 2745 pr_info("btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n", 2746 fs_info->nodesize, PAGE_SIZE); 2747 return -1; 2748 } 2749 if (!PAGE_ALIGNED(fs_info->sectorsize)) { 2750 pr_info("btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n", 2751 fs_info->sectorsize, PAGE_SIZE); 2752 return -1; 2753 } 2754 state = kvzalloc(sizeof(*state), GFP_KERNEL); 2755 if (!state) 2756 return -ENOMEM; 2757 2758 if (!btrfsic_is_initialized) { 2759 mutex_init(&btrfsic_mutex); 2760 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); 2761 btrfsic_is_initialized = 1; 2762 } 2763 mutex_lock(&btrfsic_mutex); 2764 state->fs_info = fs_info; 2765 state->print_mask = print_mask; 2766 state->include_extent_data = including_extent_data; 2767 state->metablock_size = fs_info->nodesize; 2768 state->datablock_size = fs_info->sectorsize; 2769 INIT_LIST_HEAD(&state->all_blocks_list); 2770 btrfsic_block_hashtable_init(&state->block_hashtable); 2771 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 2772 state->max_superblock_generation = 0; 2773 state->latest_superblock = NULL; 2774 2775 list_for_each_entry(device, dev_head, dev_list) { 2776 struct btrfsic_dev_state *ds; 2777 2778 if (!device->bdev || !device->name) 2779 continue; 2780 2781 ds = btrfsic_dev_state_alloc(); 2782 if (NULL == ds) { 2783 mutex_unlock(&btrfsic_mutex); 2784 return -ENOMEM; 2785 } 2786 ds->bdev = device->bdev; 2787 ds->state = state; 2788 btrfsic_dev_state_hashtable_add(ds, 2789 &btrfsic_dev_state_hashtable); 2790 } 2791 2792 ret = btrfsic_process_superblock(state, fs_devices); 2793 if (0 != ret) { 2794 mutex_unlock(&btrfsic_mutex); 2795 btrfsic_unmount(fs_devices); 2796 return ret; 2797 } 2798 2799 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) 2800 btrfsic_dump_database(state); 2801 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) 2802 btrfsic_dump_tree(state); 2803 2804 mutex_unlock(&btrfsic_mutex); 2805 return 0; 2806 } 2807 2808 void btrfsic_unmount(struct btrfs_fs_devices *fs_devices) 2809 { 2810 struct btrfsic_block *b_all, *tmp_all; 2811 struct btrfsic_state *state; 2812 struct list_head *dev_head = &fs_devices->devices; 2813 struct btrfs_device *device; 2814 2815 if (!btrfsic_is_initialized) 2816 return; 2817 2818 mutex_lock(&btrfsic_mutex); 2819 2820 state = NULL; 2821 list_for_each_entry(device, dev_head, dev_list) { 2822 struct btrfsic_dev_state *ds; 2823 2824 if (!device->bdev || !device->name) 2825 continue; 2826 2827 ds = btrfsic_dev_state_hashtable_lookup( 2828 device->bdev->bd_dev, 2829 &btrfsic_dev_state_hashtable); 2830 if (NULL != ds) { 2831 state = ds->state; 2832 btrfsic_dev_state_hashtable_remove(ds); 2833 btrfsic_dev_state_free(ds); 2834 } 2835 } 2836 2837 if (NULL == state) { 2838 pr_info("btrfsic: error, cannot find state information on umount!\n"); 2839 mutex_unlock(&btrfsic_mutex); 2840 return; 2841 } 2842 2843 /* 2844 * Don't care about keeping the lists' state up to date, 2845 * just free all memory that was allocated dynamically. 2846 * Free the blocks and the block_links. 2847 */ 2848 list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list, 2849 all_blocks_node) { 2850 struct btrfsic_block_link *l, *tmp; 2851 2852 list_for_each_entry_safe(l, tmp, &b_all->ref_to_list, 2853 node_ref_to) { 2854 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2855 btrfsic_print_rem_link(state, l); 2856 2857 l->ref_cnt--; 2858 if (0 == l->ref_cnt) 2859 btrfsic_block_link_free(l); 2860 } 2861 2862 if (b_all->is_iodone || b_all->never_written) 2863 btrfsic_block_free(b_all); 2864 else 2865 pr_info( 2866 "btrfs: attempt to free %c-block @%llu (%pg/%llu/%d) on umount which is not yet iodone!\n", 2867 btrfsic_get_block_type(state, b_all), 2868 b_all->logical_bytenr, b_all->dev_state->bdev, 2869 b_all->dev_bytenr, b_all->mirror_num); 2870 } 2871 2872 mutex_unlock(&btrfsic_mutex); 2873 2874 kvfree(state); 2875 } 2876