1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) STRATO AG 2011. All rights reserved. 4 */ 5 6 /* 7 * This module can be used to catch cases when the btrfs kernel 8 * code executes write requests to the disk that bring the file 9 * system in an inconsistent state. In such a state, a power-loss 10 * or kernel panic event would cause that the data on disk is 11 * lost or at least damaged. 12 * 13 * Code is added that examines all block write requests during 14 * runtime (including writes of the super block). Three rules 15 * are verified and an error is printed on violation of the 16 * rules: 17 * 1. It is not allowed to write a disk block which is 18 * currently referenced by the super block (either directly 19 * or indirectly). 20 * 2. When a super block is written, it is verified that all 21 * referenced (directly or indirectly) blocks fulfill the 22 * following requirements: 23 * 2a. All referenced blocks have either been present when 24 * the file system was mounted, (i.e., they have been 25 * referenced by the super block) or they have been 26 * written since then and the write completion callback 27 * was called and no write error was indicated and a 28 * FLUSH request to the device where these blocks are 29 * located was received and completed. 30 * 2b. All referenced blocks need to have a generation 31 * number which is equal to the parent's number. 32 * 33 * One issue that was found using this module was that the log 34 * tree on disk became temporarily corrupted because disk blocks 35 * that had been in use for the log tree had been freed and 36 * reused too early, while being referenced by the written super 37 * block. 38 * 39 * The search term in the kernel log that can be used to filter 40 * on the existence of detected integrity issues is 41 * "btrfs: attempt". 42 * 43 * The integrity check is enabled via mount options. These 44 * mount options are only supported if the integrity check 45 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. 46 * 47 * Example #1, apply integrity checks to all metadata: 48 * mount /dev/sdb1 /mnt -o check_int 49 * 50 * Example #2, apply integrity checks to all metadata and 51 * to data extents: 52 * mount /dev/sdb1 /mnt -o check_int_data 53 * 54 * Example #3, apply integrity checks to all metadata and dump 55 * the tree that the super block references to kernel messages 56 * each time after a super block was written: 57 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 58 * 59 * If the integrity check tool is included and activated in 60 * the mount options, plenty of kernel memory is used, and 61 * plenty of additional CPU cycles are spent. Enabling this 62 * functionality is not intended for normal use. In most 63 * cases, unless you are a btrfs developer who needs to verify 64 * the integrity of (super)-block write requests, do not 65 * enable the config option BTRFS_FS_CHECK_INTEGRITY to 66 * include and compile the integrity check tool. 67 * 68 * Expect millions of lines of information in the kernel log with an 69 * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the 70 * kernel config to at least 26 (which is 64MB). Usually the value is 71 * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be 72 * changed like this before LOG_BUF_SHIFT can be set to a high value: 73 * config LOG_BUF_SHIFT 74 * int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" 75 * range 12 30 76 */ 77 78 #include <linux/sched.h> 79 #include <linux/slab.h> 80 #include <linux/mutex.h> 81 #include <linux/genhd.h> 82 #include <linux/blkdev.h> 83 #include <linux/mm.h> 84 #include <linux/string.h> 85 #include <crypto/hash.h> 86 #include "ctree.h" 87 #include "disk-io.h" 88 #include "transaction.h" 89 #include "extent_io.h" 90 #include "volumes.h" 91 #include "print-tree.h" 92 #include "locking.h" 93 #include "check-integrity.h" 94 #include "rcu-string.h" 95 #include "compression.h" 96 97 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 98 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 99 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 100 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 101 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 102 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 103 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 104 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 105 * excluding " [...]" */ 106 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 107 108 /* 109 * The definition of the bitmask fields for the print_mask. 110 * They are specified with the mount option check_integrity_print_mask. 111 */ 112 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 113 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 114 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 115 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 116 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 117 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 118 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 119 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 120 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 121 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 122 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 123 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 124 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 125 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE 0x00002000 126 127 struct btrfsic_dev_state; 128 struct btrfsic_state; 129 130 struct btrfsic_block { 131 u32 magic_num; /* only used for debug purposes */ 132 unsigned int is_metadata:1; /* if it is meta-data, not data-data */ 133 unsigned int is_superblock:1; /* if it is one of the superblocks */ 134 unsigned int is_iodone:1; /* if is done by lower subsystem */ 135 unsigned int iodone_w_error:1; /* error was indicated to endio */ 136 unsigned int never_written:1; /* block was added because it was 137 * referenced, not because it was 138 * written */ 139 unsigned int mirror_num; /* large enough to hold 140 * BTRFS_SUPER_MIRROR_MAX */ 141 struct btrfsic_dev_state *dev_state; 142 u64 dev_bytenr; /* key, physical byte num on disk */ 143 u64 logical_bytenr; /* logical byte num on disk */ 144 u64 generation; 145 struct btrfs_disk_key disk_key; /* extra info to print in case of 146 * issues, will not always be correct */ 147 struct list_head collision_resolving_node; /* list node */ 148 struct list_head all_blocks_node; /* list node */ 149 150 /* the following two lists contain block_link items */ 151 struct list_head ref_to_list; /* list */ 152 struct list_head ref_from_list; /* list */ 153 struct btrfsic_block *next_in_same_bio; 154 void *orig_bio_private; 155 bio_end_io_t *orig_bio_end_io; 156 int submit_bio_bh_rw; 157 u64 flush_gen; /* only valid if !never_written */ 158 }; 159 160 /* 161 * Elements of this type are allocated dynamically and required because 162 * each block object can refer to and can be ref from multiple blocks. 163 * The key to lookup them in the hashtable is the dev_bytenr of 164 * the block ref to plus the one from the block referred from. 165 * The fact that they are searchable via a hashtable and that a 166 * ref_cnt is maintained is not required for the btrfs integrity 167 * check algorithm itself, it is only used to make the output more 168 * beautiful in case that an error is detected (an error is defined 169 * as a write operation to a block while that block is still referenced). 170 */ 171 struct btrfsic_block_link { 172 u32 magic_num; /* only used for debug purposes */ 173 u32 ref_cnt; 174 struct list_head node_ref_to; /* list node */ 175 struct list_head node_ref_from; /* list node */ 176 struct list_head collision_resolving_node; /* list node */ 177 struct btrfsic_block *block_ref_to; 178 struct btrfsic_block *block_ref_from; 179 u64 parent_generation; 180 }; 181 182 struct btrfsic_dev_state { 183 u32 magic_num; /* only used for debug purposes */ 184 struct block_device *bdev; 185 struct btrfsic_state *state; 186 struct list_head collision_resolving_node; /* list node */ 187 struct btrfsic_block dummy_block_for_bio_bh_flush; 188 u64 last_flush_gen; 189 char name[BDEVNAME_SIZE]; 190 }; 191 192 struct btrfsic_block_hashtable { 193 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; 194 }; 195 196 struct btrfsic_block_link_hashtable { 197 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; 198 }; 199 200 struct btrfsic_dev_state_hashtable { 201 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; 202 }; 203 204 struct btrfsic_block_data_ctx { 205 u64 start; /* virtual bytenr */ 206 u64 dev_bytenr; /* physical bytenr on device */ 207 u32 len; 208 struct btrfsic_dev_state *dev; 209 char **datav; 210 struct page **pagev; 211 void *mem_to_free; 212 }; 213 214 /* This structure is used to implement recursion without occupying 215 * any stack space, refer to btrfsic_process_metablock() */ 216 struct btrfsic_stack_frame { 217 u32 magic; 218 u32 nr; 219 int error; 220 int i; 221 int limit_nesting; 222 int num_copies; 223 int mirror_num; 224 struct btrfsic_block *block; 225 struct btrfsic_block_data_ctx *block_ctx; 226 struct btrfsic_block *next_block; 227 struct btrfsic_block_data_ctx next_block_ctx; 228 struct btrfs_header *hdr; 229 struct btrfsic_stack_frame *prev; 230 }; 231 232 /* Some state per mounted filesystem */ 233 struct btrfsic_state { 234 u32 print_mask; 235 int include_extent_data; 236 int csum_size; 237 struct list_head all_blocks_list; 238 struct btrfsic_block_hashtable block_hashtable; 239 struct btrfsic_block_link_hashtable block_link_hashtable; 240 struct btrfs_fs_info *fs_info; 241 u64 max_superblock_generation; 242 struct btrfsic_block *latest_superblock; 243 u32 metablock_size; 244 u32 datablock_size; 245 }; 246 247 static void btrfsic_block_init(struct btrfsic_block *b); 248 static struct btrfsic_block *btrfsic_block_alloc(void); 249 static void btrfsic_block_free(struct btrfsic_block *b); 250 static void btrfsic_block_link_init(struct btrfsic_block_link *n); 251 static struct btrfsic_block_link *btrfsic_block_link_alloc(void); 252 static void btrfsic_block_link_free(struct btrfsic_block_link *n); 253 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds); 254 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void); 255 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds); 256 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h); 257 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 258 struct btrfsic_block_hashtable *h); 259 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b); 260 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 261 struct block_device *bdev, 262 u64 dev_bytenr, 263 struct btrfsic_block_hashtable *h); 264 static void btrfsic_block_link_hashtable_init( 265 struct btrfsic_block_link_hashtable *h); 266 static void btrfsic_block_link_hashtable_add( 267 struct btrfsic_block_link *l, 268 struct btrfsic_block_link_hashtable *h); 269 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l); 270 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 271 struct block_device *bdev_ref_to, 272 u64 dev_bytenr_ref_to, 273 struct block_device *bdev_ref_from, 274 u64 dev_bytenr_ref_from, 275 struct btrfsic_block_link_hashtable *h); 276 static void btrfsic_dev_state_hashtable_init( 277 struct btrfsic_dev_state_hashtable *h); 278 static void btrfsic_dev_state_hashtable_add( 279 struct btrfsic_dev_state *ds, 280 struct btrfsic_dev_state_hashtable *h); 281 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds); 282 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, 283 struct btrfsic_dev_state_hashtable *h); 284 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void); 285 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf); 286 static int btrfsic_process_superblock(struct btrfsic_state *state, 287 struct btrfs_fs_devices *fs_devices); 288 static int btrfsic_process_metablock(struct btrfsic_state *state, 289 struct btrfsic_block *block, 290 struct btrfsic_block_data_ctx *block_ctx, 291 int limit_nesting, int force_iodone_flag); 292 static void btrfsic_read_from_block_data( 293 struct btrfsic_block_data_ctx *block_ctx, 294 void *dst, u32 offset, size_t len); 295 static int btrfsic_create_link_to_next_block( 296 struct btrfsic_state *state, 297 struct btrfsic_block *block, 298 struct btrfsic_block_data_ctx 299 *block_ctx, u64 next_bytenr, 300 int limit_nesting, 301 struct btrfsic_block_data_ctx *next_block_ctx, 302 struct btrfsic_block **next_blockp, 303 int force_iodone_flag, 304 int *num_copiesp, int *mirror_nump, 305 struct btrfs_disk_key *disk_key, 306 u64 parent_generation); 307 static int btrfsic_handle_extent_data(struct btrfsic_state *state, 308 struct btrfsic_block *block, 309 struct btrfsic_block_data_ctx *block_ctx, 310 u32 item_offset, int force_iodone_flag); 311 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 312 struct btrfsic_block_data_ctx *block_ctx_out, 313 int mirror_num); 314 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); 315 static int btrfsic_read_block(struct btrfsic_state *state, 316 struct btrfsic_block_data_ctx *block_ctx); 317 static void btrfsic_dump_database(struct btrfsic_state *state); 318 static int btrfsic_test_for_metadata(struct btrfsic_state *state, 319 char **datav, unsigned int num_pages); 320 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 321 u64 dev_bytenr, char **mapped_datav, 322 unsigned int num_pages, 323 struct bio *bio, int *bio_is_patched, 324 int submit_bio_bh_rw); 325 static int btrfsic_process_written_superblock( 326 struct btrfsic_state *state, 327 struct btrfsic_block *const block, 328 struct btrfs_super_block *const super_hdr); 329 static void btrfsic_bio_end_io(struct bio *bp); 330 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, 331 const struct btrfsic_block *block, 332 int recursion_level); 333 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 334 struct btrfsic_block *const block, 335 int recursion_level); 336 static void btrfsic_print_add_link(const struct btrfsic_state *state, 337 const struct btrfsic_block_link *l); 338 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 339 const struct btrfsic_block_link *l); 340 static char btrfsic_get_block_type(const struct btrfsic_state *state, 341 const struct btrfsic_block *block); 342 static void btrfsic_dump_tree(const struct btrfsic_state *state); 343 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 344 const struct btrfsic_block *block, 345 int indent_level); 346 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 347 struct btrfsic_state *state, 348 struct btrfsic_block_data_ctx *next_block_ctx, 349 struct btrfsic_block *next_block, 350 struct btrfsic_block *from_block, 351 u64 parent_generation); 352 static struct btrfsic_block *btrfsic_block_lookup_or_add( 353 struct btrfsic_state *state, 354 struct btrfsic_block_data_ctx *block_ctx, 355 const char *additional_string, 356 int is_metadata, 357 int is_iodone, 358 int never_written, 359 int mirror_num, 360 int *was_created); 361 static int btrfsic_process_superblock_dev_mirror( 362 struct btrfsic_state *state, 363 struct btrfsic_dev_state *dev_state, 364 struct btrfs_device *device, 365 int superblock_mirror_num, 366 struct btrfsic_dev_state **selected_dev_state, 367 struct btrfs_super_block *selected_super); 368 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev); 369 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 370 u64 bytenr, 371 struct btrfsic_dev_state *dev_state, 372 u64 dev_bytenr); 373 374 static struct mutex btrfsic_mutex; 375 static int btrfsic_is_initialized; 376 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; 377 378 379 static void btrfsic_block_init(struct btrfsic_block *b) 380 { 381 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; 382 b->dev_state = NULL; 383 b->dev_bytenr = 0; 384 b->logical_bytenr = 0; 385 b->generation = BTRFSIC_GENERATION_UNKNOWN; 386 b->disk_key.objectid = 0; 387 b->disk_key.type = 0; 388 b->disk_key.offset = 0; 389 b->is_metadata = 0; 390 b->is_superblock = 0; 391 b->is_iodone = 0; 392 b->iodone_w_error = 0; 393 b->never_written = 0; 394 b->mirror_num = 0; 395 b->next_in_same_bio = NULL; 396 b->orig_bio_private = NULL; 397 b->orig_bio_end_io = NULL; 398 INIT_LIST_HEAD(&b->collision_resolving_node); 399 INIT_LIST_HEAD(&b->all_blocks_node); 400 INIT_LIST_HEAD(&b->ref_to_list); 401 INIT_LIST_HEAD(&b->ref_from_list); 402 b->submit_bio_bh_rw = 0; 403 b->flush_gen = 0; 404 } 405 406 static struct btrfsic_block *btrfsic_block_alloc(void) 407 { 408 struct btrfsic_block *b; 409 410 b = kzalloc(sizeof(*b), GFP_NOFS); 411 if (NULL != b) 412 btrfsic_block_init(b); 413 414 return b; 415 } 416 417 static void btrfsic_block_free(struct btrfsic_block *b) 418 { 419 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); 420 kfree(b); 421 } 422 423 static void btrfsic_block_link_init(struct btrfsic_block_link *l) 424 { 425 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; 426 l->ref_cnt = 1; 427 INIT_LIST_HEAD(&l->node_ref_to); 428 INIT_LIST_HEAD(&l->node_ref_from); 429 INIT_LIST_HEAD(&l->collision_resolving_node); 430 l->block_ref_to = NULL; 431 l->block_ref_from = NULL; 432 } 433 434 static struct btrfsic_block_link *btrfsic_block_link_alloc(void) 435 { 436 struct btrfsic_block_link *l; 437 438 l = kzalloc(sizeof(*l), GFP_NOFS); 439 if (NULL != l) 440 btrfsic_block_link_init(l); 441 442 return l; 443 } 444 445 static void btrfsic_block_link_free(struct btrfsic_block_link *l) 446 { 447 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); 448 kfree(l); 449 } 450 451 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) 452 { 453 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; 454 ds->bdev = NULL; 455 ds->state = NULL; 456 ds->name[0] = '\0'; 457 INIT_LIST_HEAD(&ds->collision_resolving_node); 458 ds->last_flush_gen = 0; 459 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); 460 ds->dummy_block_for_bio_bh_flush.is_iodone = 1; 461 ds->dummy_block_for_bio_bh_flush.dev_state = ds; 462 } 463 464 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) 465 { 466 struct btrfsic_dev_state *ds; 467 468 ds = kzalloc(sizeof(*ds), GFP_NOFS); 469 if (NULL != ds) 470 btrfsic_dev_state_init(ds); 471 472 return ds; 473 } 474 475 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) 476 { 477 BUG_ON(!(NULL == ds || 478 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); 479 kfree(ds); 480 } 481 482 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) 483 { 484 int i; 485 486 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) 487 INIT_LIST_HEAD(h->table + i); 488 } 489 490 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 491 struct btrfsic_block_hashtable *h) 492 { 493 const unsigned int hashval = 494 (((unsigned int)(b->dev_bytenr >> 16)) ^ 495 ((unsigned int)((uintptr_t)b->dev_state->bdev))) & 496 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 497 498 list_add(&b->collision_resolving_node, h->table + hashval); 499 } 500 501 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) 502 { 503 list_del(&b->collision_resolving_node); 504 } 505 506 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 507 struct block_device *bdev, 508 u64 dev_bytenr, 509 struct btrfsic_block_hashtable *h) 510 { 511 const unsigned int hashval = 512 (((unsigned int)(dev_bytenr >> 16)) ^ 513 ((unsigned int)((uintptr_t)bdev))) & 514 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 515 struct btrfsic_block *b; 516 517 list_for_each_entry(b, h->table + hashval, collision_resolving_node) { 518 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) 519 return b; 520 } 521 522 return NULL; 523 } 524 525 static void btrfsic_block_link_hashtable_init( 526 struct btrfsic_block_link_hashtable *h) 527 { 528 int i; 529 530 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) 531 INIT_LIST_HEAD(h->table + i); 532 } 533 534 static void btrfsic_block_link_hashtable_add( 535 struct btrfsic_block_link *l, 536 struct btrfsic_block_link_hashtable *h) 537 { 538 const unsigned int hashval = 539 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ 540 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ 541 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ 542 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) 543 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 544 545 BUG_ON(NULL == l->block_ref_to); 546 BUG_ON(NULL == l->block_ref_from); 547 list_add(&l->collision_resolving_node, h->table + hashval); 548 } 549 550 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) 551 { 552 list_del(&l->collision_resolving_node); 553 } 554 555 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 556 struct block_device *bdev_ref_to, 557 u64 dev_bytenr_ref_to, 558 struct block_device *bdev_ref_from, 559 u64 dev_bytenr_ref_from, 560 struct btrfsic_block_link_hashtable *h) 561 { 562 const unsigned int hashval = 563 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ 564 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ 565 ((unsigned int)((uintptr_t)bdev_ref_to)) ^ 566 ((unsigned int)((uintptr_t)bdev_ref_from))) & 567 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 568 struct btrfsic_block_link *l; 569 570 list_for_each_entry(l, h->table + hashval, collision_resolving_node) { 571 BUG_ON(NULL == l->block_ref_to); 572 BUG_ON(NULL == l->block_ref_from); 573 if (l->block_ref_to->dev_state->bdev == bdev_ref_to && 574 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && 575 l->block_ref_from->dev_state->bdev == bdev_ref_from && 576 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) 577 return l; 578 } 579 580 return NULL; 581 } 582 583 static void btrfsic_dev_state_hashtable_init( 584 struct btrfsic_dev_state_hashtable *h) 585 { 586 int i; 587 588 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) 589 INIT_LIST_HEAD(h->table + i); 590 } 591 592 static void btrfsic_dev_state_hashtable_add( 593 struct btrfsic_dev_state *ds, 594 struct btrfsic_dev_state_hashtable *h) 595 { 596 const unsigned int hashval = 597 (((unsigned int)((uintptr_t)ds->bdev->bd_dev)) & 598 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 599 600 list_add(&ds->collision_resolving_node, h->table + hashval); 601 } 602 603 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) 604 { 605 list_del(&ds->collision_resolving_node); 606 } 607 608 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, 609 struct btrfsic_dev_state_hashtable *h) 610 { 611 const unsigned int hashval = 612 dev & (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1); 613 struct btrfsic_dev_state *ds; 614 615 list_for_each_entry(ds, h->table + hashval, collision_resolving_node) { 616 if (ds->bdev->bd_dev == dev) 617 return ds; 618 } 619 620 return NULL; 621 } 622 623 static int btrfsic_process_superblock(struct btrfsic_state *state, 624 struct btrfs_fs_devices *fs_devices) 625 { 626 struct btrfs_super_block *selected_super; 627 struct list_head *dev_head = &fs_devices->devices; 628 struct btrfs_device *device; 629 struct btrfsic_dev_state *selected_dev_state = NULL; 630 int ret = 0; 631 int pass; 632 633 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); 634 if (!selected_super) 635 return -ENOMEM; 636 637 list_for_each_entry(device, dev_head, dev_list) { 638 int i; 639 struct btrfsic_dev_state *dev_state; 640 641 if (!device->bdev || !device->name) 642 continue; 643 644 dev_state = btrfsic_dev_state_lookup(device->bdev->bd_dev); 645 BUG_ON(NULL == dev_state); 646 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 647 ret = btrfsic_process_superblock_dev_mirror( 648 state, dev_state, device, i, 649 &selected_dev_state, selected_super); 650 if (0 != ret && 0 == i) { 651 kfree(selected_super); 652 return ret; 653 } 654 } 655 } 656 657 if (NULL == state->latest_superblock) { 658 pr_info("btrfsic: no superblock found!\n"); 659 kfree(selected_super); 660 return -1; 661 } 662 663 state->csum_size = btrfs_super_csum_size(selected_super); 664 665 for (pass = 0; pass < 3; pass++) { 666 int num_copies; 667 int mirror_num; 668 u64 next_bytenr; 669 670 switch (pass) { 671 case 0: 672 next_bytenr = btrfs_super_root(selected_super); 673 if (state->print_mask & 674 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 675 pr_info("root@%llu\n", next_bytenr); 676 break; 677 case 1: 678 next_bytenr = btrfs_super_chunk_root(selected_super); 679 if (state->print_mask & 680 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 681 pr_info("chunk@%llu\n", next_bytenr); 682 break; 683 case 2: 684 next_bytenr = btrfs_super_log_root(selected_super); 685 if (0 == next_bytenr) 686 continue; 687 if (state->print_mask & 688 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 689 pr_info("log@%llu\n", next_bytenr); 690 break; 691 } 692 693 num_copies = btrfs_num_copies(state->fs_info, next_bytenr, 694 state->metablock_size); 695 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 696 pr_info("num_copies(log_bytenr=%llu) = %d\n", 697 next_bytenr, num_copies); 698 699 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 700 struct btrfsic_block *next_block; 701 struct btrfsic_block_data_ctx tmp_next_block_ctx; 702 struct btrfsic_block_link *l; 703 704 ret = btrfsic_map_block(state, next_bytenr, 705 state->metablock_size, 706 &tmp_next_block_ctx, 707 mirror_num); 708 if (ret) { 709 pr_info("btrfsic: btrfsic_map_block(root @%llu, mirror %d) failed!\n", 710 next_bytenr, mirror_num); 711 kfree(selected_super); 712 return -1; 713 } 714 715 next_block = btrfsic_block_hashtable_lookup( 716 tmp_next_block_ctx.dev->bdev, 717 tmp_next_block_ctx.dev_bytenr, 718 &state->block_hashtable); 719 BUG_ON(NULL == next_block); 720 721 l = btrfsic_block_link_hashtable_lookup( 722 tmp_next_block_ctx.dev->bdev, 723 tmp_next_block_ctx.dev_bytenr, 724 state->latest_superblock->dev_state-> 725 bdev, 726 state->latest_superblock->dev_bytenr, 727 &state->block_link_hashtable); 728 BUG_ON(NULL == l); 729 730 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 731 if (ret < (int)PAGE_SIZE) { 732 pr_info("btrfsic: read @logical %llu failed!\n", 733 tmp_next_block_ctx.start); 734 btrfsic_release_block_ctx(&tmp_next_block_ctx); 735 kfree(selected_super); 736 return -1; 737 } 738 739 ret = btrfsic_process_metablock(state, 740 next_block, 741 &tmp_next_block_ctx, 742 BTRFS_MAX_LEVEL + 3, 1); 743 btrfsic_release_block_ctx(&tmp_next_block_ctx); 744 } 745 } 746 747 kfree(selected_super); 748 return ret; 749 } 750 751 static int btrfsic_process_superblock_dev_mirror( 752 struct btrfsic_state *state, 753 struct btrfsic_dev_state *dev_state, 754 struct btrfs_device *device, 755 int superblock_mirror_num, 756 struct btrfsic_dev_state **selected_dev_state, 757 struct btrfs_super_block *selected_super) 758 { 759 struct btrfs_fs_info *fs_info = state->fs_info; 760 struct btrfs_super_block *super_tmp; 761 u64 dev_bytenr; 762 struct btrfsic_block *superblock_tmp; 763 int pass; 764 struct block_device *const superblock_bdev = device->bdev; 765 struct page *page; 766 struct address_space *mapping = superblock_bdev->bd_inode->i_mapping; 767 int ret = 0; 768 769 /* super block bytenr is always the unmapped device bytenr */ 770 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 771 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes) 772 return -1; 773 774 page = read_cache_page_gfp(mapping, dev_bytenr >> PAGE_SHIFT, GFP_NOFS); 775 if (IS_ERR(page)) 776 return -1; 777 778 super_tmp = page_address(page); 779 780 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 781 btrfs_super_magic(super_tmp) != BTRFS_MAGIC || 782 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 783 btrfs_super_nodesize(super_tmp) != state->metablock_size || 784 btrfs_super_sectorsize(super_tmp) != state->datablock_size) { 785 ret = 0; 786 goto out; 787 } 788 789 superblock_tmp = 790 btrfsic_block_hashtable_lookup(superblock_bdev, 791 dev_bytenr, 792 &state->block_hashtable); 793 if (NULL == superblock_tmp) { 794 superblock_tmp = btrfsic_block_alloc(); 795 if (NULL == superblock_tmp) { 796 ret = -1; 797 goto out; 798 } 799 /* for superblock, only the dev_bytenr makes sense */ 800 superblock_tmp->dev_bytenr = dev_bytenr; 801 superblock_tmp->dev_state = dev_state; 802 superblock_tmp->logical_bytenr = dev_bytenr; 803 superblock_tmp->generation = btrfs_super_generation(super_tmp); 804 superblock_tmp->is_metadata = 1; 805 superblock_tmp->is_superblock = 1; 806 superblock_tmp->is_iodone = 1; 807 superblock_tmp->never_written = 0; 808 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 809 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 810 btrfs_info_in_rcu(fs_info, 811 "new initial S-block (bdev %p, %s) @%llu (%s/%llu/%d)", 812 superblock_bdev, 813 rcu_str_deref(device->name), dev_bytenr, 814 dev_state->name, dev_bytenr, 815 superblock_mirror_num); 816 list_add(&superblock_tmp->all_blocks_node, 817 &state->all_blocks_list); 818 btrfsic_block_hashtable_add(superblock_tmp, 819 &state->block_hashtable); 820 } 821 822 /* select the one with the highest generation field */ 823 if (btrfs_super_generation(super_tmp) > 824 state->max_superblock_generation || 825 0 == state->max_superblock_generation) { 826 memcpy(selected_super, super_tmp, sizeof(*selected_super)); 827 *selected_dev_state = dev_state; 828 state->max_superblock_generation = 829 btrfs_super_generation(super_tmp); 830 state->latest_superblock = superblock_tmp; 831 } 832 833 for (pass = 0; pass < 3; pass++) { 834 u64 next_bytenr; 835 int num_copies; 836 int mirror_num; 837 const char *additional_string = NULL; 838 struct btrfs_disk_key tmp_disk_key; 839 840 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 841 tmp_disk_key.offset = 0; 842 switch (pass) { 843 case 0: 844 btrfs_set_disk_key_objectid(&tmp_disk_key, 845 BTRFS_ROOT_TREE_OBJECTID); 846 additional_string = "initial root "; 847 next_bytenr = btrfs_super_root(super_tmp); 848 break; 849 case 1: 850 btrfs_set_disk_key_objectid(&tmp_disk_key, 851 BTRFS_CHUNK_TREE_OBJECTID); 852 additional_string = "initial chunk "; 853 next_bytenr = btrfs_super_chunk_root(super_tmp); 854 break; 855 case 2: 856 btrfs_set_disk_key_objectid(&tmp_disk_key, 857 BTRFS_TREE_LOG_OBJECTID); 858 additional_string = "initial log "; 859 next_bytenr = btrfs_super_log_root(super_tmp); 860 if (0 == next_bytenr) 861 continue; 862 break; 863 } 864 865 num_copies = btrfs_num_copies(fs_info, next_bytenr, 866 state->metablock_size); 867 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 868 pr_info("num_copies(log_bytenr=%llu) = %d\n", 869 next_bytenr, num_copies); 870 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 871 struct btrfsic_block *next_block; 872 struct btrfsic_block_data_ctx tmp_next_block_ctx; 873 struct btrfsic_block_link *l; 874 875 if (btrfsic_map_block(state, next_bytenr, 876 state->metablock_size, 877 &tmp_next_block_ctx, 878 mirror_num)) { 879 pr_info("btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n", 880 next_bytenr, mirror_num); 881 ret = -1; 882 goto out; 883 } 884 885 next_block = btrfsic_block_lookup_or_add( 886 state, &tmp_next_block_ctx, 887 additional_string, 1, 1, 0, 888 mirror_num, NULL); 889 if (NULL == next_block) { 890 btrfsic_release_block_ctx(&tmp_next_block_ctx); 891 ret = -1; 892 goto out; 893 } 894 895 next_block->disk_key = tmp_disk_key; 896 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 897 l = btrfsic_block_link_lookup_or_add( 898 state, &tmp_next_block_ctx, 899 next_block, superblock_tmp, 900 BTRFSIC_GENERATION_UNKNOWN); 901 btrfsic_release_block_ctx(&tmp_next_block_ctx); 902 if (NULL == l) { 903 ret = -1; 904 goto out; 905 } 906 } 907 } 908 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) 909 btrfsic_dump_tree_sub(state, superblock_tmp, 0); 910 911 out: 912 put_page(page); 913 return ret; 914 } 915 916 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) 917 { 918 struct btrfsic_stack_frame *sf; 919 920 sf = kzalloc(sizeof(*sf), GFP_NOFS); 921 if (sf) 922 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; 923 return sf; 924 } 925 926 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) 927 { 928 BUG_ON(!(NULL == sf || 929 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); 930 kfree(sf); 931 } 932 933 static noinline_for_stack int btrfsic_process_metablock( 934 struct btrfsic_state *state, 935 struct btrfsic_block *const first_block, 936 struct btrfsic_block_data_ctx *const first_block_ctx, 937 int first_limit_nesting, int force_iodone_flag) 938 { 939 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 940 struct btrfsic_stack_frame *sf; 941 struct btrfsic_stack_frame *next_stack; 942 struct btrfs_header *const first_hdr = 943 (struct btrfs_header *)first_block_ctx->datav[0]; 944 945 BUG_ON(!first_hdr); 946 sf = &initial_stack_frame; 947 sf->error = 0; 948 sf->i = -1; 949 sf->limit_nesting = first_limit_nesting; 950 sf->block = first_block; 951 sf->block_ctx = first_block_ctx; 952 sf->next_block = NULL; 953 sf->hdr = first_hdr; 954 sf->prev = NULL; 955 956 continue_with_new_stack_frame: 957 sf->block->generation = le64_to_cpu(sf->hdr->generation); 958 if (0 == sf->hdr->level) { 959 struct btrfs_leaf *const leafhdr = 960 (struct btrfs_leaf *)sf->hdr; 961 962 if (-1 == sf->i) { 963 sf->nr = btrfs_stack_header_nritems(&leafhdr->header); 964 965 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 966 pr_info("leaf %llu items %d generation %llu owner %llu\n", 967 sf->block_ctx->start, sf->nr, 968 btrfs_stack_header_generation( 969 &leafhdr->header), 970 btrfs_stack_header_owner( 971 &leafhdr->header)); 972 } 973 974 continue_with_current_leaf_stack_frame: 975 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 976 sf->i++; 977 sf->num_copies = 0; 978 } 979 980 if (sf->i < sf->nr) { 981 struct btrfs_item disk_item; 982 u32 disk_item_offset = 983 (uintptr_t)(leafhdr->items + sf->i) - 984 (uintptr_t)leafhdr; 985 struct btrfs_disk_key *disk_key; 986 u8 type; 987 u32 item_offset; 988 u32 item_size; 989 990 if (disk_item_offset + sizeof(struct btrfs_item) > 991 sf->block_ctx->len) { 992 leaf_item_out_of_bounce_error: 993 pr_info("btrfsic: leaf item out of bounce at logical %llu, dev %s\n", 994 sf->block_ctx->start, 995 sf->block_ctx->dev->name); 996 goto one_stack_frame_backwards; 997 } 998 btrfsic_read_from_block_data(sf->block_ctx, 999 &disk_item, 1000 disk_item_offset, 1001 sizeof(struct btrfs_item)); 1002 item_offset = btrfs_stack_item_offset(&disk_item); 1003 item_size = btrfs_stack_item_size(&disk_item); 1004 disk_key = &disk_item.key; 1005 type = btrfs_disk_key_type(disk_key); 1006 1007 if (BTRFS_ROOT_ITEM_KEY == type) { 1008 struct btrfs_root_item root_item; 1009 u32 root_item_offset; 1010 u64 next_bytenr; 1011 1012 root_item_offset = item_offset + 1013 offsetof(struct btrfs_leaf, items); 1014 if (root_item_offset + item_size > 1015 sf->block_ctx->len) 1016 goto leaf_item_out_of_bounce_error; 1017 btrfsic_read_from_block_data( 1018 sf->block_ctx, &root_item, 1019 root_item_offset, 1020 item_size); 1021 next_bytenr = btrfs_root_bytenr(&root_item); 1022 1023 sf->error = 1024 btrfsic_create_link_to_next_block( 1025 state, 1026 sf->block, 1027 sf->block_ctx, 1028 next_bytenr, 1029 sf->limit_nesting, 1030 &sf->next_block_ctx, 1031 &sf->next_block, 1032 force_iodone_flag, 1033 &sf->num_copies, 1034 &sf->mirror_num, 1035 disk_key, 1036 btrfs_root_generation( 1037 &root_item)); 1038 if (sf->error) 1039 goto one_stack_frame_backwards; 1040 1041 if (NULL != sf->next_block) { 1042 struct btrfs_header *const next_hdr = 1043 (struct btrfs_header *) 1044 sf->next_block_ctx.datav[0]; 1045 1046 next_stack = 1047 btrfsic_stack_frame_alloc(); 1048 if (NULL == next_stack) { 1049 sf->error = -1; 1050 btrfsic_release_block_ctx( 1051 &sf-> 1052 next_block_ctx); 1053 goto one_stack_frame_backwards; 1054 } 1055 1056 next_stack->i = -1; 1057 next_stack->block = sf->next_block; 1058 next_stack->block_ctx = 1059 &sf->next_block_ctx; 1060 next_stack->next_block = NULL; 1061 next_stack->hdr = next_hdr; 1062 next_stack->limit_nesting = 1063 sf->limit_nesting - 1; 1064 next_stack->prev = sf; 1065 sf = next_stack; 1066 goto continue_with_new_stack_frame; 1067 } 1068 } else if (BTRFS_EXTENT_DATA_KEY == type && 1069 state->include_extent_data) { 1070 sf->error = btrfsic_handle_extent_data( 1071 state, 1072 sf->block, 1073 sf->block_ctx, 1074 item_offset, 1075 force_iodone_flag); 1076 if (sf->error) 1077 goto one_stack_frame_backwards; 1078 } 1079 1080 goto continue_with_current_leaf_stack_frame; 1081 } 1082 } else { 1083 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1084 1085 if (-1 == sf->i) { 1086 sf->nr = btrfs_stack_header_nritems(&nodehdr->header); 1087 1088 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1089 pr_info("node %llu level %d items %d generation %llu owner %llu\n", 1090 sf->block_ctx->start, 1091 nodehdr->header.level, sf->nr, 1092 btrfs_stack_header_generation( 1093 &nodehdr->header), 1094 btrfs_stack_header_owner( 1095 &nodehdr->header)); 1096 } 1097 1098 continue_with_current_node_stack_frame: 1099 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1100 sf->i++; 1101 sf->num_copies = 0; 1102 } 1103 1104 if (sf->i < sf->nr) { 1105 struct btrfs_key_ptr key_ptr; 1106 u32 key_ptr_offset; 1107 u64 next_bytenr; 1108 1109 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - 1110 (uintptr_t)nodehdr; 1111 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > 1112 sf->block_ctx->len) { 1113 pr_info("btrfsic: node item out of bounce at logical %llu, dev %s\n", 1114 sf->block_ctx->start, 1115 sf->block_ctx->dev->name); 1116 goto one_stack_frame_backwards; 1117 } 1118 btrfsic_read_from_block_data( 1119 sf->block_ctx, &key_ptr, key_ptr_offset, 1120 sizeof(struct btrfs_key_ptr)); 1121 next_bytenr = btrfs_stack_key_blockptr(&key_ptr); 1122 1123 sf->error = btrfsic_create_link_to_next_block( 1124 state, 1125 sf->block, 1126 sf->block_ctx, 1127 next_bytenr, 1128 sf->limit_nesting, 1129 &sf->next_block_ctx, 1130 &sf->next_block, 1131 force_iodone_flag, 1132 &sf->num_copies, 1133 &sf->mirror_num, 1134 &key_ptr.key, 1135 btrfs_stack_key_generation(&key_ptr)); 1136 if (sf->error) 1137 goto one_stack_frame_backwards; 1138 1139 if (NULL != sf->next_block) { 1140 struct btrfs_header *const next_hdr = 1141 (struct btrfs_header *) 1142 sf->next_block_ctx.datav[0]; 1143 1144 next_stack = btrfsic_stack_frame_alloc(); 1145 if (NULL == next_stack) { 1146 sf->error = -1; 1147 goto one_stack_frame_backwards; 1148 } 1149 1150 next_stack->i = -1; 1151 next_stack->block = sf->next_block; 1152 next_stack->block_ctx = &sf->next_block_ctx; 1153 next_stack->next_block = NULL; 1154 next_stack->hdr = next_hdr; 1155 next_stack->limit_nesting = 1156 sf->limit_nesting - 1; 1157 next_stack->prev = sf; 1158 sf = next_stack; 1159 goto continue_with_new_stack_frame; 1160 } 1161 1162 goto continue_with_current_node_stack_frame; 1163 } 1164 } 1165 1166 one_stack_frame_backwards: 1167 if (NULL != sf->prev) { 1168 struct btrfsic_stack_frame *const prev = sf->prev; 1169 1170 /* the one for the initial block is freed in the caller */ 1171 btrfsic_release_block_ctx(sf->block_ctx); 1172 1173 if (sf->error) { 1174 prev->error = sf->error; 1175 btrfsic_stack_frame_free(sf); 1176 sf = prev; 1177 goto one_stack_frame_backwards; 1178 } 1179 1180 btrfsic_stack_frame_free(sf); 1181 sf = prev; 1182 goto continue_with_new_stack_frame; 1183 } else { 1184 BUG_ON(&initial_stack_frame != sf); 1185 } 1186 1187 return sf->error; 1188 } 1189 1190 static void btrfsic_read_from_block_data( 1191 struct btrfsic_block_data_ctx *block_ctx, 1192 void *dstv, u32 offset, size_t len) 1193 { 1194 size_t cur; 1195 size_t pgoff; 1196 char *kaddr; 1197 char *dst = (char *)dstv; 1198 size_t start_offset = offset_in_page(block_ctx->start); 1199 unsigned long i = (start_offset + offset) >> PAGE_SHIFT; 1200 1201 WARN_ON(offset + len > block_ctx->len); 1202 pgoff = offset_in_page(start_offset + offset); 1203 1204 while (len > 0) { 1205 cur = min(len, ((size_t)PAGE_SIZE - pgoff)); 1206 BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE)); 1207 kaddr = block_ctx->datav[i]; 1208 memcpy(dst, kaddr + pgoff, cur); 1209 1210 dst += cur; 1211 len -= cur; 1212 pgoff = 0; 1213 i++; 1214 } 1215 } 1216 1217 static int btrfsic_create_link_to_next_block( 1218 struct btrfsic_state *state, 1219 struct btrfsic_block *block, 1220 struct btrfsic_block_data_ctx *block_ctx, 1221 u64 next_bytenr, 1222 int limit_nesting, 1223 struct btrfsic_block_data_ctx *next_block_ctx, 1224 struct btrfsic_block **next_blockp, 1225 int force_iodone_flag, 1226 int *num_copiesp, int *mirror_nump, 1227 struct btrfs_disk_key *disk_key, 1228 u64 parent_generation) 1229 { 1230 struct btrfs_fs_info *fs_info = state->fs_info; 1231 struct btrfsic_block *next_block = NULL; 1232 int ret; 1233 struct btrfsic_block_link *l; 1234 int did_alloc_block_link; 1235 int block_was_created; 1236 1237 *next_blockp = NULL; 1238 if (0 == *num_copiesp) { 1239 *num_copiesp = btrfs_num_copies(fs_info, next_bytenr, 1240 state->metablock_size); 1241 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1242 pr_info("num_copies(log_bytenr=%llu) = %d\n", 1243 next_bytenr, *num_copiesp); 1244 *mirror_nump = 1; 1245 } 1246 1247 if (*mirror_nump > *num_copiesp) 1248 return 0; 1249 1250 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1251 pr_info("btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1252 *mirror_nump); 1253 ret = btrfsic_map_block(state, next_bytenr, 1254 state->metablock_size, 1255 next_block_ctx, *mirror_nump); 1256 if (ret) { 1257 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1258 next_bytenr, *mirror_nump); 1259 btrfsic_release_block_ctx(next_block_ctx); 1260 *next_blockp = NULL; 1261 return -1; 1262 } 1263 1264 next_block = btrfsic_block_lookup_or_add(state, 1265 next_block_ctx, "referenced ", 1266 1, force_iodone_flag, 1267 !force_iodone_flag, 1268 *mirror_nump, 1269 &block_was_created); 1270 if (NULL == next_block) { 1271 btrfsic_release_block_ctx(next_block_ctx); 1272 *next_blockp = NULL; 1273 return -1; 1274 } 1275 if (block_was_created) { 1276 l = NULL; 1277 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 1278 } else { 1279 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) { 1280 if (next_block->logical_bytenr != next_bytenr && 1281 !(!next_block->is_metadata && 1282 0 == next_block->logical_bytenr)) 1283 pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", 1284 next_bytenr, next_block_ctx->dev->name, 1285 next_block_ctx->dev_bytenr, *mirror_nump, 1286 btrfsic_get_block_type(state, 1287 next_block), 1288 next_block->logical_bytenr); 1289 else 1290 pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, %c.\n", 1291 next_bytenr, next_block_ctx->dev->name, 1292 next_block_ctx->dev_bytenr, *mirror_nump, 1293 btrfsic_get_block_type(state, 1294 next_block)); 1295 } 1296 next_block->logical_bytenr = next_bytenr; 1297 1298 next_block->mirror_num = *mirror_nump; 1299 l = btrfsic_block_link_hashtable_lookup( 1300 next_block_ctx->dev->bdev, 1301 next_block_ctx->dev_bytenr, 1302 block_ctx->dev->bdev, 1303 block_ctx->dev_bytenr, 1304 &state->block_link_hashtable); 1305 } 1306 1307 next_block->disk_key = *disk_key; 1308 if (NULL == l) { 1309 l = btrfsic_block_link_alloc(); 1310 if (NULL == l) { 1311 btrfsic_release_block_ctx(next_block_ctx); 1312 *next_blockp = NULL; 1313 return -1; 1314 } 1315 1316 did_alloc_block_link = 1; 1317 l->block_ref_to = next_block; 1318 l->block_ref_from = block; 1319 l->ref_cnt = 1; 1320 l->parent_generation = parent_generation; 1321 1322 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1323 btrfsic_print_add_link(state, l); 1324 1325 list_add(&l->node_ref_to, &block->ref_to_list); 1326 list_add(&l->node_ref_from, &next_block->ref_from_list); 1327 1328 btrfsic_block_link_hashtable_add(l, 1329 &state->block_link_hashtable); 1330 } else { 1331 did_alloc_block_link = 0; 1332 if (0 == limit_nesting) { 1333 l->ref_cnt++; 1334 l->parent_generation = parent_generation; 1335 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1336 btrfsic_print_add_link(state, l); 1337 } 1338 } 1339 1340 if (limit_nesting > 0 && did_alloc_block_link) { 1341 ret = btrfsic_read_block(state, next_block_ctx); 1342 if (ret < (int)next_block_ctx->len) { 1343 pr_info("btrfsic: read block @logical %llu failed!\n", 1344 next_bytenr); 1345 btrfsic_release_block_ctx(next_block_ctx); 1346 *next_blockp = NULL; 1347 return -1; 1348 } 1349 1350 *next_blockp = next_block; 1351 } else { 1352 *next_blockp = NULL; 1353 } 1354 (*mirror_nump)++; 1355 1356 return 0; 1357 } 1358 1359 static int btrfsic_handle_extent_data( 1360 struct btrfsic_state *state, 1361 struct btrfsic_block *block, 1362 struct btrfsic_block_data_ctx *block_ctx, 1363 u32 item_offset, int force_iodone_flag) 1364 { 1365 struct btrfs_fs_info *fs_info = state->fs_info; 1366 struct btrfs_file_extent_item file_extent_item; 1367 u64 file_extent_item_offset; 1368 u64 next_bytenr; 1369 u64 num_bytes; 1370 u64 generation; 1371 struct btrfsic_block_link *l; 1372 int ret; 1373 1374 file_extent_item_offset = offsetof(struct btrfs_leaf, items) + 1375 item_offset; 1376 if (file_extent_item_offset + 1377 offsetof(struct btrfs_file_extent_item, disk_num_bytes) > 1378 block_ctx->len) { 1379 pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n", 1380 block_ctx->start, block_ctx->dev->name); 1381 return -1; 1382 } 1383 1384 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1385 file_extent_item_offset, 1386 offsetof(struct btrfs_file_extent_item, disk_num_bytes)); 1387 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1388 btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) { 1389 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1390 pr_info("extent_data: type %u, disk_bytenr = %llu\n", 1391 file_extent_item.type, 1392 btrfs_stack_file_extent_disk_bytenr( 1393 &file_extent_item)); 1394 return 0; 1395 } 1396 1397 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > 1398 block_ctx->len) { 1399 pr_info("btrfsic: file item out of bounce at logical %llu, dev %s\n", 1400 block_ctx->start, block_ctx->dev->name); 1401 return -1; 1402 } 1403 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1404 file_extent_item_offset, 1405 sizeof(struct btrfs_file_extent_item)); 1406 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item); 1407 if (btrfs_stack_file_extent_compression(&file_extent_item) == 1408 BTRFS_COMPRESS_NONE) { 1409 next_bytenr += btrfs_stack_file_extent_offset(&file_extent_item); 1410 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item); 1411 } else { 1412 num_bytes = btrfs_stack_file_extent_disk_num_bytes(&file_extent_item); 1413 } 1414 generation = btrfs_stack_file_extent_generation(&file_extent_item); 1415 1416 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1417 pr_info("extent_data: type %u, disk_bytenr = %llu, offset = %llu, num_bytes = %llu\n", 1418 file_extent_item.type, 1419 btrfs_stack_file_extent_disk_bytenr(&file_extent_item), 1420 btrfs_stack_file_extent_offset(&file_extent_item), 1421 num_bytes); 1422 while (num_bytes > 0) { 1423 u32 chunk_len; 1424 int num_copies; 1425 int mirror_num; 1426 1427 if (num_bytes > state->datablock_size) 1428 chunk_len = state->datablock_size; 1429 else 1430 chunk_len = num_bytes; 1431 1432 num_copies = btrfs_num_copies(fs_info, next_bytenr, 1433 state->datablock_size); 1434 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1435 pr_info("num_copies(log_bytenr=%llu) = %d\n", 1436 next_bytenr, num_copies); 1437 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1438 struct btrfsic_block_data_ctx next_block_ctx; 1439 struct btrfsic_block *next_block; 1440 int block_was_created; 1441 1442 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1443 pr_info("btrfsic_handle_extent_data(mirror_num=%d)\n", 1444 mirror_num); 1445 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1446 pr_info("\tdisk_bytenr = %llu, num_bytes %u\n", 1447 next_bytenr, chunk_len); 1448 ret = btrfsic_map_block(state, next_bytenr, 1449 chunk_len, &next_block_ctx, 1450 mirror_num); 1451 if (ret) { 1452 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1453 next_bytenr, mirror_num); 1454 return -1; 1455 } 1456 1457 next_block = btrfsic_block_lookup_or_add( 1458 state, 1459 &next_block_ctx, 1460 "referenced ", 1461 0, 1462 force_iodone_flag, 1463 !force_iodone_flag, 1464 mirror_num, 1465 &block_was_created); 1466 if (NULL == next_block) { 1467 btrfsic_release_block_ctx(&next_block_ctx); 1468 return -1; 1469 } 1470 if (!block_was_created) { 1471 if ((state->print_mask & 1472 BTRFSIC_PRINT_MASK_VERBOSE) && 1473 next_block->logical_bytenr != next_bytenr && 1474 !(!next_block->is_metadata && 1475 0 == next_block->logical_bytenr)) { 1476 pr_info("Referenced block @%llu (%s/%llu/%d) found in hash table, D, bytenr mismatch (!= stored %llu).\n", 1477 next_bytenr, 1478 next_block_ctx.dev->name, 1479 next_block_ctx.dev_bytenr, 1480 mirror_num, 1481 next_block->logical_bytenr); 1482 } 1483 next_block->logical_bytenr = next_bytenr; 1484 next_block->mirror_num = mirror_num; 1485 } 1486 1487 l = btrfsic_block_link_lookup_or_add(state, 1488 &next_block_ctx, 1489 next_block, block, 1490 generation); 1491 btrfsic_release_block_ctx(&next_block_ctx); 1492 if (NULL == l) 1493 return -1; 1494 } 1495 1496 next_bytenr += chunk_len; 1497 num_bytes -= chunk_len; 1498 } 1499 1500 return 0; 1501 } 1502 1503 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 1504 struct btrfsic_block_data_ctx *block_ctx_out, 1505 int mirror_num) 1506 { 1507 struct btrfs_fs_info *fs_info = state->fs_info; 1508 int ret; 1509 u64 length; 1510 struct btrfs_bio *multi = NULL; 1511 struct btrfs_device *device; 1512 1513 length = len; 1514 ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, 1515 bytenr, &length, &multi, mirror_num); 1516 1517 if (ret) { 1518 block_ctx_out->start = 0; 1519 block_ctx_out->dev_bytenr = 0; 1520 block_ctx_out->len = 0; 1521 block_ctx_out->dev = NULL; 1522 block_ctx_out->datav = NULL; 1523 block_ctx_out->pagev = NULL; 1524 block_ctx_out->mem_to_free = NULL; 1525 1526 return ret; 1527 } 1528 1529 device = multi->stripes[0].dev; 1530 if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) || 1531 !device->bdev || !device->name) 1532 block_ctx_out->dev = NULL; 1533 else 1534 block_ctx_out->dev = btrfsic_dev_state_lookup( 1535 device->bdev->bd_dev); 1536 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1537 block_ctx_out->start = bytenr; 1538 block_ctx_out->len = len; 1539 block_ctx_out->datav = NULL; 1540 block_ctx_out->pagev = NULL; 1541 block_ctx_out->mem_to_free = NULL; 1542 1543 kfree(multi); 1544 if (NULL == block_ctx_out->dev) { 1545 ret = -ENXIO; 1546 pr_info("btrfsic: error, cannot lookup dev (#1)!\n"); 1547 } 1548 1549 return ret; 1550 } 1551 1552 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1553 { 1554 if (block_ctx->mem_to_free) { 1555 unsigned int num_pages; 1556 1557 BUG_ON(!block_ctx->datav); 1558 BUG_ON(!block_ctx->pagev); 1559 num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >> 1560 PAGE_SHIFT; 1561 while (num_pages > 0) { 1562 num_pages--; 1563 if (block_ctx->datav[num_pages]) { 1564 kunmap(block_ctx->pagev[num_pages]); 1565 block_ctx->datav[num_pages] = NULL; 1566 } 1567 if (block_ctx->pagev[num_pages]) { 1568 __free_page(block_ctx->pagev[num_pages]); 1569 block_ctx->pagev[num_pages] = NULL; 1570 } 1571 } 1572 1573 kfree(block_ctx->mem_to_free); 1574 block_ctx->mem_to_free = NULL; 1575 block_ctx->pagev = NULL; 1576 block_ctx->datav = NULL; 1577 } 1578 } 1579 1580 static int btrfsic_read_block(struct btrfsic_state *state, 1581 struct btrfsic_block_data_ctx *block_ctx) 1582 { 1583 unsigned int num_pages; 1584 unsigned int i; 1585 size_t size; 1586 u64 dev_bytenr; 1587 int ret; 1588 1589 BUG_ON(block_ctx->datav); 1590 BUG_ON(block_ctx->pagev); 1591 BUG_ON(block_ctx->mem_to_free); 1592 if (!PAGE_ALIGNED(block_ctx->dev_bytenr)) { 1593 pr_info("btrfsic: read_block() with unaligned bytenr %llu\n", 1594 block_ctx->dev_bytenr); 1595 return -1; 1596 } 1597 1598 num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >> 1599 PAGE_SHIFT; 1600 size = sizeof(*block_ctx->datav) + sizeof(*block_ctx->pagev); 1601 block_ctx->mem_to_free = kcalloc(num_pages, size, GFP_NOFS); 1602 if (!block_ctx->mem_to_free) 1603 return -ENOMEM; 1604 block_ctx->datav = block_ctx->mem_to_free; 1605 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); 1606 for (i = 0; i < num_pages; i++) { 1607 block_ctx->pagev[i] = alloc_page(GFP_NOFS); 1608 if (!block_ctx->pagev[i]) 1609 return -1; 1610 } 1611 1612 dev_bytenr = block_ctx->dev_bytenr; 1613 for (i = 0; i < num_pages;) { 1614 struct bio *bio; 1615 unsigned int j; 1616 1617 bio = btrfs_io_bio_alloc(num_pages - i); 1618 bio_set_dev(bio, block_ctx->dev->bdev); 1619 bio->bi_iter.bi_sector = dev_bytenr >> 9; 1620 bio->bi_opf = REQ_OP_READ; 1621 1622 for (j = i; j < num_pages; j++) { 1623 ret = bio_add_page(bio, block_ctx->pagev[j], 1624 PAGE_SIZE, 0); 1625 if (PAGE_SIZE != ret) 1626 break; 1627 } 1628 if (j == i) { 1629 pr_info("btrfsic: error, failed to add a single page!\n"); 1630 return -1; 1631 } 1632 if (submit_bio_wait(bio)) { 1633 pr_info("btrfsic: read error at logical %llu dev %s!\n", 1634 block_ctx->start, block_ctx->dev->name); 1635 bio_put(bio); 1636 return -1; 1637 } 1638 bio_put(bio); 1639 dev_bytenr += (j - i) * PAGE_SIZE; 1640 i = j; 1641 } 1642 for (i = 0; i < num_pages; i++) 1643 block_ctx->datav[i] = kmap(block_ctx->pagev[i]); 1644 1645 return block_ctx->len; 1646 } 1647 1648 static void btrfsic_dump_database(struct btrfsic_state *state) 1649 { 1650 const struct btrfsic_block *b_all; 1651 1652 BUG_ON(NULL == state); 1653 1654 pr_info("all_blocks_list:\n"); 1655 list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) { 1656 const struct btrfsic_block_link *l; 1657 1658 pr_info("%c-block @%llu (%s/%llu/%d)\n", 1659 btrfsic_get_block_type(state, b_all), 1660 b_all->logical_bytenr, b_all->dev_state->name, 1661 b_all->dev_bytenr, b_all->mirror_num); 1662 1663 list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) { 1664 pr_info(" %c @%llu (%s/%llu/%d) refers %u* to %c @%llu (%s/%llu/%d)\n", 1665 btrfsic_get_block_type(state, b_all), 1666 b_all->logical_bytenr, b_all->dev_state->name, 1667 b_all->dev_bytenr, b_all->mirror_num, 1668 l->ref_cnt, 1669 btrfsic_get_block_type(state, l->block_ref_to), 1670 l->block_ref_to->logical_bytenr, 1671 l->block_ref_to->dev_state->name, 1672 l->block_ref_to->dev_bytenr, 1673 l->block_ref_to->mirror_num); 1674 } 1675 1676 list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) { 1677 pr_info(" %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n", 1678 btrfsic_get_block_type(state, b_all), 1679 b_all->logical_bytenr, b_all->dev_state->name, 1680 b_all->dev_bytenr, b_all->mirror_num, 1681 l->ref_cnt, 1682 btrfsic_get_block_type(state, l->block_ref_from), 1683 l->block_ref_from->logical_bytenr, 1684 l->block_ref_from->dev_state->name, 1685 l->block_ref_from->dev_bytenr, 1686 l->block_ref_from->mirror_num); 1687 } 1688 1689 pr_info("\n"); 1690 } 1691 } 1692 1693 /* 1694 * Test whether the disk block contains a tree block (leaf or node) 1695 * (note that this test fails for the super block) 1696 */ 1697 static noinline_for_stack int btrfsic_test_for_metadata( 1698 struct btrfsic_state *state, 1699 char **datav, unsigned int num_pages) 1700 { 1701 struct btrfs_fs_info *fs_info = state->fs_info; 1702 SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); 1703 struct btrfs_header *h; 1704 u8 csum[BTRFS_CSUM_SIZE]; 1705 unsigned int i; 1706 1707 if (num_pages * PAGE_SIZE < state->metablock_size) 1708 return 1; /* not metadata */ 1709 num_pages = state->metablock_size >> PAGE_SHIFT; 1710 h = (struct btrfs_header *)datav[0]; 1711 1712 if (memcmp(h->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE)) 1713 return 1; 1714 1715 shash->tfm = fs_info->csum_shash; 1716 crypto_shash_init(shash); 1717 1718 for (i = 0; i < num_pages; i++) { 1719 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); 1720 size_t sublen = i ? PAGE_SIZE : 1721 (PAGE_SIZE - BTRFS_CSUM_SIZE); 1722 1723 crypto_shash_update(shash, data, sublen); 1724 } 1725 crypto_shash_final(shash, csum); 1726 if (memcmp(csum, h->csum, state->csum_size)) 1727 return 1; 1728 1729 return 0; /* is metadata */ 1730 } 1731 1732 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1733 u64 dev_bytenr, char **mapped_datav, 1734 unsigned int num_pages, 1735 struct bio *bio, int *bio_is_patched, 1736 int submit_bio_bh_rw) 1737 { 1738 int is_metadata; 1739 struct btrfsic_block *block; 1740 struct btrfsic_block_data_ctx block_ctx; 1741 int ret; 1742 struct btrfsic_state *state = dev_state->state; 1743 struct block_device *bdev = dev_state->bdev; 1744 unsigned int processed_len; 1745 1746 if (NULL != bio_is_patched) 1747 *bio_is_patched = 0; 1748 1749 again: 1750 if (num_pages == 0) 1751 return; 1752 1753 processed_len = 0; 1754 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, 1755 num_pages)); 1756 1757 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1758 &state->block_hashtable); 1759 if (NULL != block) { 1760 u64 bytenr = 0; 1761 struct btrfsic_block_link *l, *tmp; 1762 1763 if (block->is_superblock) { 1764 bytenr = btrfs_super_bytenr((struct btrfs_super_block *) 1765 mapped_datav[0]); 1766 if (num_pages * PAGE_SIZE < 1767 BTRFS_SUPER_INFO_SIZE) { 1768 pr_info("btrfsic: cannot work with too short bios!\n"); 1769 return; 1770 } 1771 is_metadata = 1; 1772 BUG_ON(!PAGE_ALIGNED(BTRFS_SUPER_INFO_SIZE)); 1773 processed_len = BTRFS_SUPER_INFO_SIZE; 1774 if (state->print_mask & 1775 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1776 pr_info("[before new superblock is written]:\n"); 1777 btrfsic_dump_tree_sub(state, block, 0); 1778 } 1779 } 1780 if (is_metadata) { 1781 if (!block->is_superblock) { 1782 if (num_pages * PAGE_SIZE < 1783 state->metablock_size) { 1784 pr_info("btrfsic: cannot work with too short bios!\n"); 1785 return; 1786 } 1787 processed_len = state->metablock_size; 1788 bytenr = btrfs_stack_header_bytenr( 1789 (struct btrfs_header *) 1790 mapped_datav[0]); 1791 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1792 dev_state, 1793 dev_bytenr); 1794 } 1795 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) { 1796 if (block->logical_bytenr != bytenr && 1797 !(!block->is_metadata && 1798 block->logical_bytenr == 0)) 1799 pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c, bytenr mismatch (!= stored %llu).\n", 1800 bytenr, dev_state->name, 1801 dev_bytenr, 1802 block->mirror_num, 1803 btrfsic_get_block_type(state, 1804 block), 1805 block->logical_bytenr); 1806 else 1807 pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n", 1808 bytenr, dev_state->name, 1809 dev_bytenr, block->mirror_num, 1810 btrfsic_get_block_type(state, 1811 block)); 1812 } 1813 block->logical_bytenr = bytenr; 1814 } else { 1815 if (num_pages * PAGE_SIZE < 1816 state->datablock_size) { 1817 pr_info("btrfsic: cannot work with too short bios!\n"); 1818 return; 1819 } 1820 processed_len = state->datablock_size; 1821 bytenr = block->logical_bytenr; 1822 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1823 pr_info("Written block @%llu (%s/%llu/%d) found in hash table, %c.\n", 1824 bytenr, dev_state->name, dev_bytenr, 1825 block->mirror_num, 1826 btrfsic_get_block_type(state, block)); 1827 } 1828 1829 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1830 pr_info("ref_to_list: %cE, ref_from_list: %cE\n", 1831 list_empty(&block->ref_to_list) ? ' ' : '!', 1832 list_empty(&block->ref_from_list) ? ' ' : '!'); 1833 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { 1834 pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), old(gen=%llu, objectid=%llu, type=%d, offset=%llu), new(gen=%llu), which is referenced by most recent superblock (superblockgen=%llu)!\n", 1835 btrfsic_get_block_type(state, block), bytenr, 1836 dev_state->name, dev_bytenr, block->mirror_num, 1837 block->generation, 1838 btrfs_disk_key_objectid(&block->disk_key), 1839 block->disk_key.type, 1840 btrfs_disk_key_offset(&block->disk_key), 1841 btrfs_stack_header_generation( 1842 (struct btrfs_header *) mapped_datav[0]), 1843 state->max_superblock_generation); 1844 btrfsic_dump_tree(state); 1845 } 1846 1847 if (!block->is_iodone && !block->never_written) { 1848 pr_info("btrfs: attempt to overwrite %c-block @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu, which is not yet iodone!\n", 1849 btrfsic_get_block_type(state, block), bytenr, 1850 dev_state->name, dev_bytenr, block->mirror_num, 1851 block->generation, 1852 btrfs_stack_header_generation( 1853 (struct btrfs_header *) 1854 mapped_datav[0])); 1855 /* it would not be safe to go on */ 1856 btrfsic_dump_tree(state); 1857 goto continue_loop; 1858 } 1859 1860 /* 1861 * Clear all references of this block. Do not free 1862 * the block itself even if is not referenced anymore 1863 * because it still carries valuable information 1864 * like whether it was ever written and IO completed. 1865 */ 1866 list_for_each_entry_safe(l, tmp, &block->ref_to_list, 1867 node_ref_to) { 1868 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1869 btrfsic_print_rem_link(state, l); 1870 l->ref_cnt--; 1871 if (0 == l->ref_cnt) { 1872 list_del(&l->node_ref_to); 1873 list_del(&l->node_ref_from); 1874 btrfsic_block_link_hashtable_remove(l); 1875 btrfsic_block_link_free(l); 1876 } 1877 } 1878 1879 block_ctx.dev = dev_state; 1880 block_ctx.dev_bytenr = dev_bytenr; 1881 block_ctx.start = bytenr; 1882 block_ctx.len = processed_len; 1883 block_ctx.pagev = NULL; 1884 block_ctx.mem_to_free = NULL; 1885 block_ctx.datav = mapped_datav; 1886 1887 if (is_metadata || state->include_extent_data) { 1888 block->never_written = 0; 1889 block->iodone_w_error = 0; 1890 if (NULL != bio) { 1891 block->is_iodone = 0; 1892 BUG_ON(NULL == bio_is_patched); 1893 if (!*bio_is_patched) { 1894 block->orig_bio_private = 1895 bio->bi_private; 1896 block->orig_bio_end_io = 1897 bio->bi_end_io; 1898 block->next_in_same_bio = NULL; 1899 bio->bi_private = block; 1900 bio->bi_end_io = btrfsic_bio_end_io; 1901 *bio_is_patched = 1; 1902 } else { 1903 struct btrfsic_block *chained_block = 1904 (struct btrfsic_block *) 1905 bio->bi_private; 1906 1907 BUG_ON(NULL == chained_block); 1908 block->orig_bio_private = 1909 chained_block->orig_bio_private; 1910 block->orig_bio_end_io = 1911 chained_block->orig_bio_end_io; 1912 block->next_in_same_bio = chained_block; 1913 bio->bi_private = block; 1914 } 1915 } else { 1916 block->is_iodone = 1; 1917 block->orig_bio_private = NULL; 1918 block->orig_bio_end_io = NULL; 1919 block->next_in_same_bio = NULL; 1920 } 1921 } 1922 1923 block->flush_gen = dev_state->last_flush_gen + 1; 1924 block->submit_bio_bh_rw = submit_bio_bh_rw; 1925 if (is_metadata) { 1926 block->logical_bytenr = bytenr; 1927 block->is_metadata = 1; 1928 if (block->is_superblock) { 1929 BUG_ON(PAGE_SIZE != 1930 BTRFS_SUPER_INFO_SIZE); 1931 ret = btrfsic_process_written_superblock( 1932 state, 1933 block, 1934 (struct btrfs_super_block *) 1935 mapped_datav[0]); 1936 if (state->print_mask & 1937 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 1938 pr_info("[after new superblock is written]:\n"); 1939 btrfsic_dump_tree_sub(state, block, 0); 1940 } 1941 } else { 1942 block->mirror_num = 0; /* unknown */ 1943 ret = btrfsic_process_metablock( 1944 state, 1945 block, 1946 &block_ctx, 1947 0, 0); 1948 } 1949 if (ret) 1950 pr_info("btrfsic: btrfsic_process_metablock(root @%llu) failed!\n", 1951 dev_bytenr); 1952 } else { 1953 block->is_metadata = 0; 1954 block->mirror_num = 0; /* unknown */ 1955 block->generation = BTRFSIC_GENERATION_UNKNOWN; 1956 if (!state->include_extent_data 1957 && list_empty(&block->ref_from_list)) { 1958 /* 1959 * disk block is overwritten with extent 1960 * data (not meta data) and we are configured 1961 * to not include extent data: take the 1962 * chance and free the block's memory 1963 */ 1964 btrfsic_block_hashtable_remove(block); 1965 list_del(&block->all_blocks_node); 1966 btrfsic_block_free(block); 1967 } 1968 } 1969 btrfsic_release_block_ctx(&block_ctx); 1970 } else { 1971 /* block has not been found in hash table */ 1972 u64 bytenr; 1973 1974 if (!is_metadata) { 1975 processed_len = state->datablock_size; 1976 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1977 pr_info("Written block (%s/%llu/?) !found in hash table, D.\n", 1978 dev_state->name, dev_bytenr); 1979 if (!state->include_extent_data) { 1980 /* ignore that written D block */ 1981 goto continue_loop; 1982 } 1983 1984 /* this is getting ugly for the 1985 * include_extent_data case... */ 1986 bytenr = 0; /* unknown */ 1987 } else { 1988 processed_len = state->metablock_size; 1989 bytenr = btrfs_stack_header_bytenr( 1990 (struct btrfs_header *) 1991 mapped_datav[0]); 1992 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 1993 dev_bytenr); 1994 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1995 pr_info("Written block @%llu (%s/%llu/?) !found in hash table, M.\n", 1996 bytenr, dev_state->name, dev_bytenr); 1997 } 1998 1999 block_ctx.dev = dev_state; 2000 block_ctx.dev_bytenr = dev_bytenr; 2001 block_ctx.start = bytenr; 2002 block_ctx.len = processed_len; 2003 block_ctx.pagev = NULL; 2004 block_ctx.mem_to_free = NULL; 2005 block_ctx.datav = mapped_datav; 2006 2007 block = btrfsic_block_alloc(); 2008 if (NULL == block) { 2009 btrfsic_release_block_ctx(&block_ctx); 2010 goto continue_loop; 2011 } 2012 block->dev_state = dev_state; 2013 block->dev_bytenr = dev_bytenr; 2014 block->logical_bytenr = bytenr; 2015 block->is_metadata = is_metadata; 2016 block->never_written = 0; 2017 block->iodone_w_error = 0; 2018 block->mirror_num = 0; /* unknown */ 2019 block->flush_gen = dev_state->last_flush_gen + 1; 2020 block->submit_bio_bh_rw = submit_bio_bh_rw; 2021 if (NULL != bio) { 2022 block->is_iodone = 0; 2023 BUG_ON(NULL == bio_is_patched); 2024 if (!*bio_is_patched) { 2025 block->orig_bio_private = bio->bi_private; 2026 block->orig_bio_end_io = bio->bi_end_io; 2027 block->next_in_same_bio = NULL; 2028 bio->bi_private = block; 2029 bio->bi_end_io = btrfsic_bio_end_io; 2030 *bio_is_patched = 1; 2031 } else { 2032 struct btrfsic_block *chained_block = 2033 (struct btrfsic_block *) 2034 bio->bi_private; 2035 2036 BUG_ON(NULL == chained_block); 2037 block->orig_bio_private = 2038 chained_block->orig_bio_private; 2039 block->orig_bio_end_io = 2040 chained_block->orig_bio_end_io; 2041 block->next_in_same_bio = chained_block; 2042 bio->bi_private = block; 2043 } 2044 } else { 2045 block->is_iodone = 1; 2046 block->orig_bio_private = NULL; 2047 block->orig_bio_end_io = NULL; 2048 block->next_in_same_bio = NULL; 2049 } 2050 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2051 pr_info("New written %c-block @%llu (%s/%llu/%d)\n", 2052 is_metadata ? 'M' : 'D', 2053 block->logical_bytenr, block->dev_state->name, 2054 block->dev_bytenr, block->mirror_num); 2055 list_add(&block->all_blocks_node, &state->all_blocks_list); 2056 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2057 2058 if (is_metadata) { 2059 ret = btrfsic_process_metablock(state, block, 2060 &block_ctx, 0, 0); 2061 if (ret) 2062 pr_info("btrfsic: process_metablock(root @%llu) failed!\n", 2063 dev_bytenr); 2064 } 2065 btrfsic_release_block_ctx(&block_ctx); 2066 } 2067 2068 continue_loop: 2069 BUG_ON(!processed_len); 2070 dev_bytenr += processed_len; 2071 mapped_datav += processed_len >> PAGE_SHIFT; 2072 num_pages -= processed_len >> PAGE_SHIFT; 2073 goto again; 2074 } 2075 2076 static void btrfsic_bio_end_io(struct bio *bp) 2077 { 2078 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; 2079 int iodone_w_error; 2080 2081 /* mutex is not held! This is not save if IO is not yet completed 2082 * on umount */ 2083 iodone_w_error = 0; 2084 if (bp->bi_status) 2085 iodone_w_error = 1; 2086 2087 BUG_ON(NULL == block); 2088 bp->bi_private = block->orig_bio_private; 2089 bp->bi_end_io = block->orig_bio_end_io; 2090 2091 do { 2092 struct btrfsic_block *next_block; 2093 struct btrfsic_dev_state *const dev_state = block->dev_state; 2094 2095 if ((dev_state->state->print_mask & 2096 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2097 pr_info("bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2098 bp->bi_status, 2099 btrfsic_get_block_type(dev_state->state, block), 2100 block->logical_bytenr, dev_state->name, 2101 block->dev_bytenr, block->mirror_num); 2102 next_block = block->next_in_same_bio; 2103 block->iodone_w_error = iodone_w_error; 2104 if (block->submit_bio_bh_rw & REQ_PREFLUSH) { 2105 dev_state->last_flush_gen++; 2106 if ((dev_state->state->print_mask & 2107 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2108 pr_info("bio_end_io() new %s flush_gen=%llu\n", 2109 dev_state->name, 2110 dev_state->last_flush_gen); 2111 } 2112 if (block->submit_bio_bh_rw & REQ_FUA) 2113 block->flush_gen = 0; /* FUA completed means block is 2114 * on disk */ 2115 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2116 block = next_block; 2117 } while (NULL != block); 2118 2119 bp->bi_end_io(bp); 2120 } 2121 2122 static int btrfsic_process_written_superblock( 2123 struct btrfsic_state *state, 2124 struct btrfsic_block *const superblock, 2125 struct btrfs_super_block *const super_hdr) 2126 { 2127 struct btrfs_fs_info *fs_info = state->fs_info; 2128 int pass; 2129 2130 superblock->generation = btrfs_super_generation(super_hdr); 2131 if (!(superblock->generation > state->max_superblock_generation || 2132 0 == state->max_superblock_generation)) { 2133 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2134 pr_info("btrfsic: superblock @%llu (%s/%llu/%d) with old gen %llu <= %llu\n", 2135 superblock->logical_bytenr, 2136 superblock->dev_state->name, 2137 superblock->dev_bytenr, superblock->mirror_num, 2138 btrfs_super_generation(super_hdr), 2139 state->max_superblock_generation); 2140 } else { 2141 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2142 pr_info("btrfsic: got new superblock @%llu (%s/%llu/%d) with new gen %llu > %llu\n", 2143 superblock->logical_bytenr, 2144 superblock->dev_state->name, 2145 superblock->dev_bytenr, superblock->mirror_num, 2146 btrfs_super_generation(super_hdr), 2147 state->max_superblock_generation); 2148 2149 state->max_superblock_generation = 2150 btrfs_super_generation(super_hdr); 2151 state->latest_superblock = superblock; 2152 } 2153 2154 for (pass = 0; pass < 3; pass++) { 2155 int ret; 2156 u64 next_bytenr; 2157 struct btrfsic_block *next_block; 2158 struct btrfsic_block_data_ctx tmp_next_block_ctx; 2159 struct btrfsic_block_link *l; 2160 int num_copies; 2161 int mirror_num; 2162 const char *additional_string = NULL; 2163 struct btrfs_disk_key tmp_disk_key = {0}; 2164 2165 btrfs_set_disk_key_objectid(&tmp_disk_key, 2166 BTRFS_ROOT_ITEM_KEY); 2167 btrfs_set_disk_key_objectid(&tmp_disk_key, 0); 2168 2169 switch (pass) { 2170 case 0: 2171 btrfs_set_disk_key_objectid(&tmp_disk_key, 2172 BTRFS_ROOT_TREE_OBJECTID); 2173 additional_string = "root "; 2174 next_bytenr = btrfs_super_root(super_hdr); 2175 if (state->print_mask & 2176 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2177 pr_info("root@%llu\n", next_bytenr); 2178 break; 2179 case 1: 2180 btrfs_set_disk_key_objectid(&tmp_disk_key, 2181 BTRFS_CHUNK_TREE_OBJECTID); 2182 additional_string = "chunk "; 2183 next_bytenr = btrfs_super_chunk_root(super_hdr); 2184 if (state->print_mask & 2185 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2186 pr_info("chunk@%llu\n", next_bytenr); 2187 break; 2188 case 2: 2189 btrfs_set_disk_key_objectid(&tmp_disk_key, 2190 BTRFS_TREE_LOG_OBJECTID); 2191 additional_string = "log "; 2192 next_bytenr = btrfs_super_log_root(super_hdr); 2193 if (0 == next_bytenr) 2194 continue; 2195 if (state->print_mask & 2196 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2197 pr_info("log@%llu\n", next_bytenr); 2198 break; 2199 } 2200 2201 num_copies = btrfs_num_copies(fs_info, next_bytenr, 2202 BTRFS_SUPER_INFO_SIZE); 2203 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2204 pr_info("num_copies(log_bytenr=%llu) = %d\n", 2205 next_bytenr, num_copies); 2206 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2207 int was_created; 2208 2209 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2210 pr_info("btrfsic_process_written_superblock(mirror_num=%d)\n", mirror_num); 2211 ret = btrfsic_map_block(state, next_bytenr, 2212 BTRFS_SUPER_INFO_SIZE, 2213 &tmp_next_block_ctx, 2214 mirror_num); 2215 if (ret) { 2216 pr_info("btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 2217 next_bytenr, mirror_num); 2218 return -1; 2219 } 2220 2221 next_block = btrfsic_block_lookup_or_add( 2222 state, 2223 &tmp_next_block_ctx, 2224 additional_string, 2225 1, 0, 1, 2226 mirror_num, 2227 &was_created); 2228 if (NULL == next_block) { 2229 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2230 return -1; 2231 } 2232 2233 next_block->disk_key = tmp_disk_key; 2234 if (was_created) 2235 next_block->generation = 2236 BTRFSIC_GENERATION_UNKNOWN; 2237 l = btrfsic_block_link_lookup_or_add( 2238 state, 2239 &tmp_next_block_ctx, 2240 next_block, 2241 superblock, 2242 BTRFSIC_GENERATION_UNKNOWN); 2243 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2244 if (NULL == l) 2245 return -1; 2246 } 2247 } 2248 2249 if (WARN_ON(-1 == btrfsic_check_all_ref_blocks(state, superblock, 0))) 2250 btrfsic_dump_tree(state); 2251 2252 return 0; 2253 } 2254 2255 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 2256 struct btrfsic_block *const block, 2257 int recursion_level) 2258 { 2259 const struct btrfsic_block_link *l; 2260 int ret = 0; 2261 2262 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2263 /* 2264 * Note that this situation can happen and does not 2265 * indicate an error in regular cases. It happens 2266 * when disk blocks are freed and later reused. 2267 * The check-integrity module is not aware of any 2268 * block free operations, it just recognizes block 2269 * write operations. Therefore it keeps the linkage 2270 * information for a block until a block is 2271 * rewritten. This can temporarily cause incorrect 2272 * and even circular linkage information. This 2273 * causes no harm unless such blocks are referenced 2274 * by the most recent super block. 2275 */ 2276 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2277 pr_info("btrfsic: abort cyclic linkage (case 1).\n"); 2278 2279 return ret; 2280 } 2281 2282 /* 2283 * This algorithm is recursive because the amount of used stack 2284 * space is very small and the max recursion depth is limited. 2285 */ 2286 list_for_each_entry(l, &block->ref_to_list, node_ref_to) { 2287 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2288 pr_info("rl=%d, %c @%llu (%s/%llu/%d) %u* refers to %c @%llu (%s/%llu/%d)\n", 2289 recursion_level, 2290 btrfsic_get_block_type(state, block), 2291 block->logical_bytenr, block->dev_state->name, 2292 block->dev_bytenr, block->mirror_num, 2293 l->ref_cnt, 2294 btrfsic_get_block_type(state, l->block_ref_to), 2295 l->block_ref_to->logical_bytenr, 2296 l->block_ref_to->dev_state->name, 2297 l->block_ref_to->dev_bytenr, 2298 l->block_ref_to->mirror_num); 2299 if (l->block_ref_to->never_written) { 2300 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is never written!\n", 2301 btrfsic_get_block_type(state, l->block_ref_to), 2302 l->block_ref_to->logical_bytenr, 2303 l->block_ref_to->dev_state->name, 2304 l->block_ref_to->dev_bytenr, 2305 l->block_ref_to->mirror_num); 2306 ret = -1; 2307 } else if (!l->block_ref_to->is_iodone) { 2308 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not yet iodone!\n", 2309 btrfsic_get_block_type(state, l->block_ref_to), 2310 l->block_ref_to->logical_bytenr, 2311 l->block_ref_to->dev_state->name, 2312 l->block_ref_to->dev_bytenr, 2313 l->block_ref_to->mirror_num); 2314 ret = -1; 2315 } else if (l->block_ref_to->iodone_w_error) { 2316 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which has write error!\n", 2317 btrfsic_get_block_type(state, l->block_ref_to), 2318 l->block_ref_to->logical_bytenr, 2319 l->block_ref_to->dev_state->name, 2320 l->block_ref_to->dev_bytenr, 2321 l->block_ref_to->mirror_num); 2322 ret = -1; 2323 } else if (l->parent_generation != 2324 l->block_ref_to->generation && 2325 BTRFSIC_GENERATION_UNKNOWN != 2326 l->parent_generation && 2327 BTRFSIC_GENERATION_UNKNOWN != 2328 l->block_ref_to->generation) { 2329 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) with generation %llu != parent generation %llu!\n", 2330 btrfsic_get_block_type(state, l->block_ref_to), 2331 l->block_ref_to->logical_bytenr, 2332 l->block_ref_to->dev_state->name, 2333 l->block_ref_to->dev_bytenr, 2334 l->block_ref_to->mirror_num, 2335 l->block_ref_to->generation, 2336 l->parent_generation); 2337 ret = -1; 2338 } else if (l->block_ref_to->flush_gen > 2339 l->block_ref_to->dev_state->last_flush_gen) { 2340 pr_info("btrfs: attempt to write superblock which references block %c @%llu (%s/%llu/%d) which is not flushed out of disk's write cache (block flush_gen=%llu, dev->flush_gen=%llu)!\n", 2341 btrfsic_get_block_type(state, l->block_ref_to), 2342 l->block_ref_to->logical_bytenr, 2343 l->block_ref_to->dev_state->name, 2344 l->block_ref_to->dev_bytenr, 2345 l->block_ref_to->mirror_num, block->flush_gen, 2346 l->block_ref_to->dev_state->last_flush_gen); 2347 ret = -1; 2348 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2349 l->block_ref_to, 2350 recursion_level + 2351 1)) { 2352 ret = -1; 2353 } 2354 } 2355 2356 return ret; 2357 } 2358 2359 static int btrfsic_is_block_ref_by_superblock( 2360 const struct btrfsic_state *state, 2361 const struct btrfsic_block *block, 2362 int recursion_level) 2363 { 2364 const struct btrfsic_block_link *l; 2365 2366 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2367 /* refer to comment at "abort cyclic linkage (case 1)" */ 2368 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2369 pr_info("btrfsic: abort cyclic linkage (case 2).\n"); 2370 2371 return 0; 2372 } 2373 2374 /* 2375 * This algorithm is recursive because the amount of used stack space 2376 * is very small and the max recursion depth is limited. 2377 */ 2378 list_for_each_entry(l, &block->ref_from_list, node_ref_from) { 2379 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2380 pr_info("rl=%d, %c @%llu (%s/%llu/%d) is ref %u* from %c @%llu (%s/%llu/%d)\n", 2381 recursion_level, 2382 btrfsic_get_block_type(state, block), 2383 block->logical_bytenr, block->dev_state->name, 2384 block->dev_bytenr, block->mirror_num, 2385 l->ref_cnt, 2386 btrfsic_get_block_type(state, l->block_ref_from), 2387 l->block_ref_from->logical_bytenr, 2388 l->block_ref_from->dev_state->name, 2389 l->block_ref_from->dev_bytenr, 2390 l->block_ref_from->mirror_num); 2391 if (l->block_ref_from->is_superblock && 2392 state->latest_superblock->dev_bytenr == 2393 l->block_ref_from->dev_bytenr && 2394 state->latest_superblock->dev_state->bdev == 2395 l->block_ref_from->dev_state->bdev) 2396 return 1; 2397 else if (btrfsic_is_block_ref_by_superblock(state, 2398 l->block_ref_from, 2399 recursion_level + 2400 1)) 2401 return 1; 2402 } 2403 2404 return 0; 2405 } 2406 2407 static void btrfsic_print_add_link(const struct btrfsic_state *state, 2408 const struct btrfsic_block_link *l) 2409 { 2410 pr_info("Add %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n", 2411 l->ref_cnt, 2412 btrfsic_get_block_type(state, l->block_ref_from), 2413 l->block_ref_from->logical_bytenr, 2414 l->block_ref_from->dev_state->name, 2415 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, 2416 btrfsic_get_block_type(state, l->block_ref_to), 2417 l->block_ref_to->logical_bytenr, 2418 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, 2419 l->block_ref_to->mirror_num); 2420 } 2421 2422 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 2423 const struct btrfsic_block_link *l) 2424 { 2425 pr_info("Rem %u* link from %c @%llu (%s/%llu/%d) to %c @%llu (%s/%llu/%d).\n", 2426 l->ref_cnt, 2427 btrfsic_get_block_type(state, l->block_ref_from), 2428 l->block_ref_from->logical_bytenr, 2429 l->block_ref_from->dev_state->name, 2430 l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num, 2431 btrfsic_get_block_type(state, l->block_ref_to), 2432 l->block_ref_to->logical_bytenr, 2433 l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr, 2434 l->block_ref_to->mirror_num); 2435 } 2436 2437 static char btrfsic_get_block_type(const struct btrfsic_state *state, 2438 const struct btrfsic_block *block) 2439 { 2440 if (block->is_superblock && 2441 state->latest_superblock->dev_bytenr == block->dev_bytenr && 2442 state->latest_superblock->dev_state->bdev == block->dev_state->bdev) 2443 return 'S'; 2444 else if (block->is_superblock) 2445 return 's'; 2446 else if (block->is_metadata) 2447 return 'M'; 2448 else 2449 return 'D'; 2450 } 2451 2452 static void btrfsic_dump_tree(const struct btrfsic_state *state) 2453 { 2454 btrfsic_dump_tree_sub(state, state->latest_superblock, 0); 2455 } 2456 2457 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 2458 const struct btrfsic_block *block, 2459 int indent_level) 2460 { 2461 const struct btrfsic_block_link *l; 2462 int indent_add; 2463 static char buf[80]; 2464 int cursor_position; 2465 2466 /* 2467 * Should better fill an on-stack buffer with a complete line and 2468 * dump it at once when it is time to print a newline character. 2469 */ 2470 2471 /* 2472 * This algorithm is recursive because the amount of used stack space 2473 * is very small and the max recursion depth is limited. 2474 */ 2475 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%u)", 2476 btrfsic_get_block_type(state, block), 2477 block->logical_bytenr, block->dev_state->name, 2478 block->dev_bytenr, block->mirror_num); 2479 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2480 printk("[...]\n"); 2481 return; 2482 } 2483 printk(buf); 2484 indent_level += indent_add; 2485 if (list_empty(&block->ref_to_list)) { 2486 printk("\n"); 2487 return; 2488 } 2489 if (block->mirror_num > 1 && 2490 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { 2491 printk(" [...]\n"); 2492 return; 2493 } 2494 2495 cursor_position = indent_level; 2496 list_for_each_entry(l, &block->ref_to_list, node_ref_to) { 2497 while (cursor_position < indent_level) { 2498 printk(" "); 2499 cursor_position++; 2500 } 2501 if (l->ref_cnt > 1) 2502 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); 2503 else 2504 indent_add = sprintf(buf, " --> "); 2505 if (indent_level + indent_add > 2506 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2507 printk("[...]\n"); 2508 cursor_position = 0; 2509 continue; 2510 } 2511 2512 printk(buf); 2513 2514 btrfsic_dump_tree_sub(state, l->block_ref_to, 2515 indent_level + indent_add); 2516 cursor_position = 0; 2517 } 2518 } 2519 2520 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 2521 struct btrfsic_state *state, 2522 struct btrfsic_block_data_ctx *next_block_ctx, 2523 struct btrfsic_block *next_block, 2524 struct btrfsic_block *from_block, 2525 u64 parent_generation) 2526 { 2527 struct btrfsic_block_link *l; 2528 2529 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, 2530 next_block_ctx->dev_bytenr, 2531 from_block->dev_state->bdev, 2532 from_block->dev_bytenr, 2533 &state->block_link_hashtable); 2534 if (NULL == l) { 2535 l = btrfsic_block_link_alloc(); 2536 if (!l) 2537 return NULL; 2538 2539 l->block_ref_to = next_block; 2540 l->block_ref_from = from_block; 2541 l->ref_cnt = 1; 2542 l->parent_generation = parent_generation; 2543 2544 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2545 btrfsic_print_add_link(state, l); 2546 2547 list_add(&l->node_ref_to, &from_block->ref_to_list); 2548 list_add(&l->node_ref_from, &next_block->ref_from_list); 2549 2550 btrfsic_block_link_hashtable_add(l, 2551 &state->block_link_hashtable); 2552 } else { 2553 l->ref_cnt++; 2554 l->parent_generation = parent_generation; 2555 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2556 btrfsic_print_add_link(state, l); 2557 } 2558 2559 return l; 2560 } 2561 2562 static struct btrfsic_block *btrfsic_block_lookup_or_add( 2563 struct btrfsic_state *state, 2564 struct btrfsic_block_data_ctx *block_ctx, 2565 const char *additional_string, 2566 int is_metadata, 2567 int is_iodone, 2568 int never_written, 2569 int mirror_num, 2570 int *was_created) 2571 { 2572 struct btrfsic_block *block; 2573 2574 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, 2575 block_ctx->dev_bytenr, 2576 &state->block_hashtable); 2577 if (NULL == block) { 2578 struct btrfsic_dev_state *dev_state; 2579 2580 block = btrfsic_block_alloc(); 2581 if (!block) 2582 return NULL; 2583 2584 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev->bd_dev); 2585 if (NULL == dev_state) { 2586 pr_info("btrfsic: error, lookup dev_state failed!\n"); 2587 btrfsic_block_free(block); 2588 return NULL; 2589 } 2590 block->dev_state = dev_state; 2591 block->dev_bytenr = block_ctx->dev_bytenr; 2592 block->logical_bytenr = block_ctx->start; 2593 block->is_metadata = is_metadata; 2594 block->is_iodone = is_iodone; 2595 block->never_written = never_written; 2596 block->mirror_num = mirror_num; 2597 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2598 pr_info("New %s%c-block @%llu (%s/%llu/%d)\n", 2599 additional_string, 2600 btrfsic_get_block_type(state, block), 2601 block->logical_bytenr, dev_state->name, 2602 block->dev_bytenr, mirror_num); 2603 list_add(&block->all_blocks_node, &state->all_blocks_list); 2604 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2605 if (NULL != was_created) 2606 *was_created = 1; 2607 } else { 2608 if (NULL != was_created) 2609 *was_created = 0; 2610 } 2611 2612 return block; 2613 } 2614 2615 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2616 u64 bytenr, 2617 struct btrfsic_dev_state *dev_state, 2618 u64 dev_bytenr) 2619 { 2620 struct btrfs_fs_info *fs_info = state->fs_info; 2621 struct btrfsic_block_data_ctx block_ctx; 2622 int num_copies; 2623 int mirror_num; 2624 int match = 0; 2625 int ret; 2626 2627 num_copies = btrfs_num_copies(fs_info, bytenr, state->metablock_size); 2628 2629 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2630 ret = btrfsic_map_block(state, bytenr, state->metablock_size, 2631 &block_ctx, mirror_num); 2632 if (ret) { 2633 pr_info("btrfsic: btrfsic_map_block(logical @%llu, mirror %d) failed!\n", 2634 bytenr, mirror_num); 2635 continue; 2636 } 2637 2638 if (dev_state->bdev == block_ctx.dev->bdev && 2639 dev_bytenr == block_ctx.dev_bytenr) { 2640 match++; 2641 btrfsic_release_block_ctx(&block_ctx); 2642 break; 2643 } 2644 btrfsic_release_block_ctx(&block_ctx); 2645 } 2646 2647 if (WARN_ON(!match)) { 2648 pr_info("btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio, buffer->log_bytenr=%llu, submit_bio(bdev=%s, phys_bytenr=%llu)!\n", 2649 bytenr, dev_state->name, dev_bytenr); 2650 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2651 ret = btrfsic_map_block(state, bytenr, 2652 state->metablock_size, 2653 &block_ctx, mirror_num); 2654 if (ret) 2655 continue; 2656 2657 pr_info("Read logical bytenr @%llu maps to (%s/%llu/%d)\n", 2658 bytenr, block_ctx.dev->name, 2659 block_ctx.dev_bytenr, mirror_num); 2660 } 2661 } 2662 } 2663 2664 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev) 2665 { 2666 return btrfsic_dev_state_hashtable_lookup(dev, 2667 &btrfsic_dev_state_hashtable); 2668 } 2669 2670 static void __btrfsic_submit_bio(struct bio *bio) 2671 { 2672 struct btrfsic_dev_state *dev_state; 2673 2674 if (!btrfsic_is_initialized) 2675 return; 2676 2677 mutex_lock(&btrfsic_mutex); 2678 /* since btrfsic_submit_bio() is also called before 2679 * btrfsic_mount(), this might return NULL */ 2680 dev_state = btrfsic_dev_state_lookup(bio_dev(bio) + bio->bi_partno); 2681 if (NULL != dev_state && 2682 (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { 2683 unsigned int i = 0; 2684 u64 dev_bytenr; 2685 u64 cur_bytenr; 2686 struct bio_vec bvec; 2687 struct bvec_iter iter; 2688 int bio_is_patched; 2689 char **mapped_datav; 2690 unsigned int segs = bio_segments(bio); 2691 2692 dev_bytenr = 512 * bio->bi_iter.bi_sector; 2693 bio_is_patched = 0; 2694 if (dev_state->state->print_mask & 2695 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 2696 pr_info("submit_bio(rw=%d,0x%x, bi_vcnt=%u, bi_sector=%llu (bytenr %llu), bi_disk=%p)\n", 2697 bio_op(bio), bio->bi_opf, segs, 2698 (unsigned long long)bio->bi_iter.bi_sector, 2699 dev_bytenr, bio->bi_disk); 2700 2701 mapped_datav = kmalloc_array(segs, 2702 sizeof(*mapped_datav), GFP_NOFS); 2703 if (!mapped_datav) 2704 goto leave; 2705 cur_bytenr = dev_bytenr; 2706 2707 bio_for_each_segment(bvec, bio, iter) { 2708 BUG_ON(bvec.bv_len != PAGE_SIZE); 2709 mapped_datav[i] = kmap(bvec.bv_page); 2710 i++; 2711 2712 if (dev_state->state->print_mask & 2713 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE) 2714 pr_info("#%u: bytenr=%llu, len=%u, offset=%u\n", 2715 i, cur_bytenr, bvec.bv_len, bvec.bv_offset); 2716 cur_bytenr += bvec.bv_len; 2717 } 2718 btrfsic_process_written_block(dev_state, dev_bytenr, 2719 mapped_datav, segs, 2720 bio, &bio_is_patched, 2721 bio->bi_opf); 2722 bio_for_each_segment(bvec, bio, iter) 2723 kunmap(bvec.bv_page); 2724 kfree(mapped_datav); 2725 } else if (NULL != dev_state && (bio->bi_opf & REQ_PREFLUSH)) { 2726 if (dev_state->state->print_mask & 2727 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 2728 pr_info("submit_bio(rw=%d,0x%x FLUSH, disk=%p)\n", 2729 bio_op(bio), bio->bi_opf, bio->bi_disk); 2730 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 2731 if ((dev_state->state->print_mask & 2732 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 2733 BTRFSIC_PRINT_MASK_VERBOSE))) 2734 pr_info("btrfsic_submit_bio(%s) with FLUSH but dummy block already in use (ignored)!\n", 2735 dev_state->name); 2736 } else { 2737 struct btrfsic_block *const block = 2738 &dev_state->dummy_block_for_bio_bh_flush; 2739 2740 block->is_iodone = 0; 2741 block->never_written = 0; 2742 block->iodone_w_error = 0; 2743 block->flush_gen = dev_state->last_flush_gen + 1; 2744 block->submit_bio_bh_rw = bio->bi_opf; 2745 block->orig_bio_private = bio->bi_private; 2746 block->orig_bio_end_io = bio->bi_end_io; 2747 block->next_in_same_bio = NULL; 2748 bio->bi_private = block; 2749 bio->bi_end_io = btrfsic_bio_end_io; 2750 } 2751 } 2752 leave: 2753 mutex_unlock(&btrfsic_mutex); 2754 } 2755 2756 void btrfsic_submit_bio(struct bio *bio) 2757 { 2758 __btrfsic_submit_bio(bio); 2759 submit_bio(bio); 2760 } 2761 2762 int btrfsic_submit_bio_wait(struct bio *bio) 2763 { 2764 __btrfsic_submit_bio(bio); 2765 return submit_bio_wait(bio); 2766 } 2767 2768 int btrfsic_mount(struct btrfs_fs_info *fs_info, 2769 struct btrfs_fs_devices *fs_devices, 2770 int including_extent_data, u32 print_mask) 2771 { 2772 int ret; 2773 struct btrfsic_state *state; 2774 struct list_head *dev_head = &fs_devices->devices; 2775 struct btrfs_device *device; 2776 2777 if (!PAGE_ALIGNED(fs_info->nodesize)) { 2778 pr_info("btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n", 2779 fs_info->nodesize, PAGE_SIZE); 2780 return -1; 2781 } 2782 if (!PAGE_ALIGNED(fs_info->sectorsize)) { 2783 pr_info("btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n", 2784 fs_info->sectorsize, PAGE_SIZE); 2785 return -1; 2786 } 2787 state = kvzalloc(sizeof(*state), GFP_KERNEL); 2788 if (!state) 2789 return -ENOMEM; 2790 2791 if (!btrfsic_is_initialized) { 2792 mutex_init(&btrfsic_mutex); 2793 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); 2794 btrfsic_is_initialized = 1; 2795 } 2796 mutex_lock(&btrfsic_mutex); 2797 state->fs_info = fs_info; 2798 state->print_mask = print_mask; 2799 state->include_extent_data = including_extent_data; 2800 state->csum_size = 0; 2801 state->metablock_size = fs_info->nodesize; 2802 state->datablock_size = fs_info->sectorsize; 2803 INIT_LIST_HEAD(&state->all_blocks_list); 2804 btrfsic_block_hashtable_init(&state->block_hashtable); 2805 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 2806 state->max_superblock_generation = 0; 2807 state->latest_superblock = NULL; 2808 2809 list_for_each_entry(device, dev_head, dev_list) { 2810 struct btrfsic_dev_state *ds; 2811 const char *p; 2812 2813 if (!device->bdev || !device->name) 2814 continue; 2815 2816 ds = btrfsic_dev_state_alloc(); 2817 if (NULL == ds) { 2818 mutex_unlock(&btrfsic_mutex); 2819 return -ENOMEM; 2820 } 2821 ds->bdev = device->bdev; 2822 ds->state = state; 2823 bdevname(ds->bdev, ds->name); 2824 ds->name[BDEVNAME_SIZE - 1] = '\0'; 2825 p = kbasename(ds->name); 2826 strlcpy(ds->name, p, sizeof(ds->name)); 2827 btrfsic_dev_state_hashtable_add(ds, 2828 &btrfsic_dev_state_hashtable); 2829 } 2830 2831 ret = btrfsic_process_superblock(state, fs_devices); 2832 if (0 != ret) { 2833 mutex_unlock(&btrfsic_mutex); 2834 btrfsic_unmount(fs_devices); 2835 return ret; 2836 } 2837 2838 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) 2839 btrfsic_dump_database(state); 2840 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) 2841 btrfsic_dump_tree(state); 2842 2843 mutex_unlock(&btrfsic_mutex); 2844 return 0; 2845 } 2846 2847 void btrfsic_unmount(struct btrfs_fs_devices *fs_devices) 2848 { 2849 struct btrfsic_block *b_all, *tmp_all; 2850 struct btrfsic_state *state; 2851 struct list_head *dev_head = &fs_devices->devices; 2852 struct btrfs_device *device; 2853 2854 if (!btrfsic_is_initialized) 2855 return; 2856 2857 mutex_lock(&btrfsic_mutex); 2858 2859 state = NULL; 2860 list_for_each_entry(device, dev_head, dev_list) { 2861 struct btrfsic_dev_state *ds; 2862 2863 if (!device->bdev || !device->name) 2864 continue; 2865 2866 ds = btrfsic_dev_state_hashtable_lookup( 2867 device->bdev->bd_dev, 2868 &btrfsic_dev_state_hashtable); 2869 if (NULL != ds) { 2870 state = ds->state; 2871 btrfsic_dev_state_hashtable_remove(ds); 2872 btrfsic_dev_state_free(ds); 2873 } 2874 } 2875 2876 if (NULL == state) { 2877 pr_info("btrfsic: error, cannot find state information on umount!\n"); 2878 mutex_unlock(&btrfsic_mutex); 2879 return; 2880 } 2881 2882 /* 2883 * Don't care about keeping the lists' state up to date, 2884 * just free all memory that was allocated dynamically. 2885 * Free the blocks and the block_links. 2886 */ 2887 list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list, 2888 all_blocks_node) { 2889 struct btrfsic_block_link *l, *tmp; 2890 2891 list_for_each_entry_safe(l, tmp, &b_all->ref_to_list, 2892 node_ref_to) { 2893 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2894 btrfsic_print_rem_link(state, l); 2895 2896 l->ref_cnt--; 2897 if (0 == l->ref_cnt) 2898 btrfsic_block_link_free(l); 2899 } 2900 2901 if (b_all->is_iodone || b_all->never_written) 2902 btrfsic_block_free(b_all); 2903 else 2904 pr_info("btrfs: attempt to free %c-block @%llu (%s/%llu/%d) on umount which is not yet iodone!\n", 2905 btrfsic_get_block_type(state, b_all), 2906 b_all->logical_bytenr, b_all->dev_state->name, 2907 b_all->dev_bytenr, b_all->mirror_num); 2908 } 2909 2910 mutex_unlock(&btrfsic_mutex); 2911 2912 kvfree(state); 2913 } 2914