1 /* 2 * Copyright (C) STRATO AG 2011. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 /* 20 * This module can be used to catch cases when the btrfs kernel 21 * code executes write requests to the disk that bring the file 22 * system in an inconsistent state. In such a state, a power-loss 23 * or kernel panic event would cause that the data on disk is 24 * lost or at least damaged. 25 * 26 * Code is added that examines all block write requests during 27 * runtime (including writes of the super block). Three rules 28 * are verified and an error is printed on violation of the 29 * rules: 30 * 1. It is not allowed to write a disk block which is 31 * currently referenced by the super block (either directly 32 * or indirectly). 33 * 2. When a super block is written, it is verified that all 34 * referenced (directly or indirectly) blocks fulfill the 35 * following requirements: 36 * 2a. All referenced blocks have either been present when 37 * the file system was mounted, (i.e., they have been 38 * referenced by the super block) or they have been 39 * written since then and the write completion callback 40 * was called and no write error was indicated and a 41 * FLUSH request to the device where these blocks are 42 * located was received and completed. 43 * 2b. All referenced blocks need to have a generation 44 * number which is equal to the parent's number. 45 * 46 * One issue that was found using this module was that the log 47 * tree on disk became temporarily corrupted because disk blocks 48 * that had been in use for the log tree had been freed and 49 * reused too early, while being referenced by the written super 50 * block. 51 * 52 * The search term in the kernel log that can be used to filter 53 * on the existence of detected integrity issues is 54 * "btrfs: attempt". 55 * 56 * The integrity check is enabled via mount options. These 57 * mount options are only supported if the integrity check 58 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. 59 * 60 * Example #1, apply integrity checks to all metadata: 61 * mount /dev/sdb1 /mnt -o check_int 62 * 63 * Example #2, apply integrity checks to all metadata and 64 * to data extents: 65 * mount /dev/sdb1 /mnt -o check_int_data 66 * 67 * Example #3, apply integrity checks to all metadata and dump 68 * the tree that the super block references to kernel messages 69 * each time after a super block was written: 70 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 71 * 72 * If the integrity check tool is included and activated in 73 * the mount options, plenty of kernel memory is used, and 74 * plenty of additional CPU cycles are spent. Enabling this 75 * functionality is not intended for normal use. In most 76 * cases, unless you are a btrfs developer who needs to verify 77 * the integrity of (super)-block write requests, do not 78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to 79 * include and compile the integrity check tool. 80 */ 81 82 #include <linux/sched.h> 83 #include <linux/slab.h> 84 #include <linux/buffer_head.h> 85 #include <linux/mutex.h> 86 #include <linux/crc32c.h> 87 #include <linux/genhd.h> 88 #include <linux/blkdev.h> 89 #include "ctree.h" 90 #include "disk-io.h" 91 #include "transaction.h" 92 #include "extent_io.h" 93 #include "volumes.h" 94 #include "print-tree.h" 95 #include "locking.h" 96 #include "check-integrity.h" 97 #include "rcu-string.h" 98 99 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 100 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 101 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 102 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 103 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 104 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 105 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 106 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 107 * excluding " [...]" */ 108 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 109 110 /* 111 * The definition of the bitmask fields for the print_mask. 112 * They are specified with the mount option check_integrity_print_mask. 113 */ 114 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 115 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 116 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 117 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 118 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 119 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 120 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 121 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 122 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 123 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 124 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 125 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 126 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 127 128 struct btrfsic_dev_state; 129 struct btrfsic_state; 130 131 struct btrfsic_block { 132 u32 magic_num; /* only used for debug purposes */ 133 unsigned int is_metadata:1; /* if it is meta-data, not data-data */ 134 unsigned int is_superblock:1; /* if it is one of the superblocks */ 135 unsigned int is_iodone:1; /* if is done by lower subsystem */ 136 unsigned int iodone_w_error:1; /* error was indicated to endio */ 137 unsigned int never_written:1; /* block was added because it was 138 * referenced, not because it was 139 * written */ 140 unsigned int mirror_num; /* large enough to hold 141 * BTRFS_SUPER_MIRROR_MAX */ 142 struct btrfsic_dev_state *dev_state; 143 u64 dev_bytenr; /* key, physical byte num on disk */ 144 u64 logical_bytenr; /* logical byte num on disk */ 145 u64 generation; 146 struct btrfs_disk_key disk_key; /* extra info to print in case of 147 * issues, will not always be correct */ 148 struct list_head collision_resolving_node; /* list node */ 149 struct list_head all_blocks_node; /* list node */ 150 151 /* the following two lists contain block_link items */ 152 struct list_head ref_to_list; /* list */ 153 struct list_head ref_from_list; /* list */ 154 struct btrfsic_block *next_in_same_bio; 155 void *orig_bio_bh_private; 156 union { 157 bio_end_io_t *bio; 158 bh_end_io_t *bh; 159 } orig_bio_bh_end_io; 160 int submit_bio_bh_rw; 161 u64 flush_gen; /* only valid if !never_written */ 162 }; 163 164 /* 165 * Elements of this type are allocated dynamically and required because 166 * each block object can refer to and can be ref from multiple blocks. 167 * The key to lookup them in the hashtable is the dev_bytenr of 168 * the block ref to plus the one from the block refered from. 169 * The fact that they are searchable via a hashtable and that a 170 * ref_cnt is maintained is not required for the btrfs integrity 171 * check algorithm itself, it is only used to make the output more 172 * beautiful in case that an error is detected (an error is defined 173 * as a write operation to a block while that block is still referenced). 174 */ 175 struct btrfsic_block_link { 176 u32 magic_num; /* only used for debug purposes */ 177 u32 ref_cnt; 178 struct list_head node_ref_to; /* list node */ 179 struct list_head node_ref_from; /* list node */ 180 struct list_head collision_resolving_node; /* list node */ 181 struct btrfsic_block *block_ref_to; 182 struct btrfsic_block *block_ref_from; 183 u64 parent_generation; 184 }; 185 186 struct btrfsic_dev_state { 187 u32 magic_num; /* only used for debug purposes */ 188 struct block_device *bdev; 189 struct btrfsic_state *state; 190 struct list_head collision_resolving_node; /* list node */ 191 struct btrfsic_block dummy_block_for_bio_bh_flush; 192 u64 last_flush_gen; 193 char name[BDEVNAME_SIZE]; 194 }; 195 196 struct btrfsic_block_hashtable { 197 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; 198 }; 199 200 struct btrfsic_block_link_hashtable { 201 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; 202 }; 203 204 struct btrfsic_dev_state_hashtable { 205 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; 206 }; 207 208 struct btrfsic_block_data_ctx { 209 u64 start; /* virtual bytenr */ 210 u64 dev_bytenr; /* physical bytenr on device */ 211 u32 len; 212 struct btrfsic_dev_state *dev; 213 char **datav; 214 struct page **pagev; 215 void *mem_to_free; 216 }; 217 218 /* This structure is used to implement recursion without occupying 219 * any stack space, refer to btrfsic_process_metablock() */ 220 struct btrfsic_stack_frame { 221 u32 magic; 222 u32 nr; 223 int error; 224 int i; 225 int limit_nesting; 226 int num_copies; 227 int mirror_num; 228 struct btrfsic_block *block; 229 struct btrfsic_block_data_ctx *block_ctx; 230 struct btrfsic_block *next_block; 231 struct btrfsic_block_data_ctx next_block_ctx; 232 struct btrfs_header *hdr; 233 struct btrfsic_stack_frame *prev; 234 }; 235 236 /* Some state per mounted filesystem */ 237 struct btrfsic_state { 238 u32 print_mask; 239 int include_extent_data; 240 int csum_size; 241 struct list_head all_blocks_list; 242 struct btrfsic_block_hashtable block_hashtable; 243 struct btrfsic_block_link_hashtable block_link_hashtable; 244 struct btrfs_root *root; 245 u64 max_superblock_generation; 246 struct btrfsic_block *latest_superblock; 247 u32 metablock_size; 248 u32 datablock_size; 249 }; 250 251 static void btrfsic_block_init(struct btrfsic_block *b); 252 static struct btrfsic_block *btrfsic_block_alloc(void); 253 static void btrfsic_block_free(struct btrfsic_block *b); 254 static void btrfsic_block_link_init(struct btrfsic_block_link *n); 255 static struct btrfsic_block_link *btrfsic_block_link_alloc(void); 256 static void btrfsic_block_link_free(struct btrfsic_block_link *n); 257 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds); 258 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void); 259 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds); 260 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h); 261 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 262 struct btrfsic_block_hashtable *h); 263 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b); 264 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 265 struct block_device *bdev, 266 u64 dev_bytenr, 267 struct btrfsic_block_hashtable *h); 268 static void btrfsic_block_link_hashtable_init( 269 struct btrfsic_block_link_hashtable *h); 270 static void btrfsic_block_link_hashtable_add( 271 struct btrfsic_block_link *l, 272 struct btrfsic_block_link_hashtable *h); 273 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l); 274 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 275 struct block_device *bdev_ref_to, 276 u64 dev_bytenr_ref_to, 277 struct block_device *bdev_ref_from, 278 u64 dev_bytenr_ref_from, 279 struct btrfsic_block_link_hashtable *h); 280 static void btrfsic_dev_state_hashtable_init( 281 struct btrfsic_dev_state_hashtable *h); 282 static void btrfsic_dev_state_hashtable_add( 283 struct btrfsic_dev_state *ds, 284 struct btrfsic_dev_state_hashtable *h); 285 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds); 286 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 287 struct block_device *bdev, 288 struct btrfsic_dev_state_hashtable *h); 289 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void); 290 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf); 291 static int btrfsic_process_superblock(struct btrfsic_state *state, 292 struct btrfs_fs_devices *fs_devices); 293 static int btrfsic_process_metablock(struct btrfsic_state *state, 294 struct btrfsic_block *block, 295 struct btrfsic_block_data_ctx *block_ctx, 296 int limit_nesting, int force_iodone_flag); 297 static void btrfsic_read_from_block_data( 298 struct btrfsic_block_data_ctx *block_ctx, 299 void *dst, u32 offset, size_t len); 300 static int btrfsic_create_link_to_next_block( 301 struct btrfsic_state *state, 302 struct btrfsic_block *block, 303 struct btrfsic_block_data_ctx 304 *block_ctx, u64 next_bytenr, 305 int limit_nesting, 306 struct btrfsic_block_data_ctx *next_block_ctx, 307 struct btrfsic_block **next_blockp, 308 int force_iodone_flag, 309 int *num_copiesp, int *mirror_nump, 310 struct btrfs_disk_key *disk_key, 311 u64 parent_generation); 312 static int btrfsic_handle_extent_data(struct btrfsic_state *state, 313 struct btrfsic_block *block, 314 struct btrfsic_block_data_ctx *block_ctx, 315 u32 item_offset, int force_iodone_flag); 316 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 317 struct btrfsic_block_data_ctx *block_ctx_out, 318 int mirror_num); 319 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 320 u32 len, struct block_device *bdev, 321 struct btrfsic_block_data_ctx *block_ctx_out); 322 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); 323 static int btrfsic_read_block(struct btrfsic_state *state, 324 struct btrfsic_block_data_ctx *block_ctx); 325 static void btrfsic_dump_database(struct btrfsic_state *state); 326 static void btrfsic_complete_bio_end_io(struct bio *bio, int err); 327 static int btrfsic_test_for_metadata(struct btrfsic_state *state, 328 char **datav, unsigned int num_pages); 329 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 330 u64 dev_bytenr, char **mapped_datav, 331 unsigned int num_pages, 332 struct bio *bio, int *bio_is_patched, 333 struct buffer_head *bh, 334 int submit_bio_bh_rw); 335 static int btrfsic_process_written_superblock( 336 struct btrfsic_state *state, 337 struct btrfsic_block *const block, 338 struct btrfs_super_block *const super_hdr); 339 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); 340 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); 341 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, 342 const struct btrfsic_block *block, 343 int recursion_level); 344 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 345 struct btrfsic_block *const block, 346 int recursion_level); 347 static void btrfsic_print_add_link(const struct btrfsic_state *state, 348 const struct btrfsic_block_link *l); 349 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 350 const struct btrfsic_block_link *l); 351 static char btrfsic_get_block_type(const struct btrfsic_state *state, 352 const struct btrfsic_block *block); 353 static void btrfsic_dump_tree(const struct btrfsic_state *state); 354 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 355 const struct btrfsic_block *block, 356 int indent_level); 357 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 358 struct btrfsic_state *state, 359 struct btrfsic_block_data_ctx *next_block_ctx, 360 struct btrfsic_block *next_block, 361 struct btrfsic_block *from_block, 362 u64 parent_generation); 363 static struct btrfsic_block *btrfsic_block_lookup_or_add( 364 struct btrfsic_state *state, 365 struct btrfsic_block_data_ctx *block_ctx, 366 const char *additional_string, 367 int is_metadata, 368 int is_iodone, 369 int never_written, 370 int mirror_num, 371 int *was_created); 372 static int btrfsic_process_superblock_dev_mirror( 373 struct btrfsic_state *state, 374 struct btrfsic_dev_state *dev_state, 375 struct btrfs_device *device, 376 int superblock_mirror_num, 377 struct btrfsic_dev_state **selected_dev_state, 378 struct btrfs_super_block *selected_super); 379 static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 380 struct block_device *bdev); 381 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 382 u64 bytenr, 383 struct btrfsic_dev_state *dev_state, 384 u64 dev_bytenr); 385 386 static struct mutex btrfsic_mutex; 387 static int btrfsic_is_initialized; 388 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; 389 390 391 static void btrfsic_block_init(struct btrfsic_block *b) 392 { 393 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; 394 b->dev_state = NULL; 395 b->dev_bytenr = 0; 396 b->logical_bytenr = 0; 397 b->generation = BTRFSIC_GENERATION_UNKNOWN; 398 b->disk_key.objectid = 0; 399 b->disk_key.type = 0; 400 b->disk_key.offset = 0; 401 b->is_metadata = 0; 402 b->is_superblock = 0; 403 b->is_iodone = 0; 404 b->iodone_w_error = 0; 405 b->never_written = 0; 406 b->mirror_num = 0; 407 b->next_in_same_bio = NULL; 408 b->orig_bio_bh_private = NULL; 409 b->orig_bio_bh_end_io.bio = NULL; 410 INIT_LIST_HEAD(&b->collision_resolving_node); 411 INIT_LIST_HEAD(&b->all_blocks_node); 412 INIT_LIST_HEAD(&b->ref_to_list); 413 INIT_LIST_HEAD(&b->ref_from_list); 414 b->submit_bio_bh_rw = 0; 415 b->flush_gen = 0; 416 } 417 418 static struct btrfsic_block *btrfsic_block_alloc(void) 419 { 420 struct btrfsic_block *b; 421 422 b = kzalloc(sizeof(*b), GFP_NOFS); 423 if (NULL != b) 424 btrfsic_block_init(b); 425 426 return b; 427 } 428 429 static void btrfsic_block_free(struct btrfsic_block *b) 430 { 431 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); 432 kfree(b); 433 } 434 435 static void btrfsic_block_link_init(struct btrfsic_block_link *l) 436 { 437 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; 438 l->ref_cnt = 1; 439 INIT_LIST_HEAD(&l->node_ref_to); 440 INIT_LIST_HEAD(&l->node_ref_from); 441 INIT_LIST_HEAD(&l->collision_resolving_node); 442 l->block_ref_to = NULL; 443 l->block_ref_from = NULL; 444 } 445 446 static struct btrfsic_block_link *btrfsic_block_link_alloc(void) 447 { 448 struct btrfsic_block_link *l; 449 450 l = kzalloc(sizeof(*l), GFP_NOFS); 451 if (NULL != l) 452 btrfsic_block_link_init(l); 453 454 return l; 455 } 456 457 static void btrfsic_block_link_free(struct btrfsic_block_link *l) 458 { 459 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); 460 kfree(l); 461 } 462 463 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) 464 { 465 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; 466 ds->bdev = NULL; 467 ds->state = NULL; 468 ds->name[0] = '\0'; 469 INIT_LIST_HEAD(&ds->collision_resolving_node); 470 ds->last_flush_gen = 0; 471 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); 472 ds->dummy_block_for_bio_bh_flush.is_iodone = 1; 473 ds->dummy_block_for_bio_bh_flush.dev_state = ds; 474 } 475 476 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) 477 { 478 struct btrfsic_dev_state *ds; 479 480 ds = kzalloc(sizeof(*ds), GFP_NOFS); 481 if (NULL != ds) 482 btrfsic_dev_state_init(ds); 483 484 return ds; 485 } 486 487 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) 488 { 489 BUG_ON(!(NULL == ds || 490 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); 491 kfree(ds); 492 } 493 494 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) 495 { 496 int i; 497 498 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) 499 INIT_LIST_HEAD(h->table + i); 500 } 501 502 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 503 struct btrfsic_block_hashtable *h) 504 { 505 const unsigned int hashval = 506 (((unsigned int)(b->dev_bytenr >> 16)) ^ 507 ((unsigned int)((uintptr_t)b->dev_state->bdev))) & 508 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 509 510 list_add(&b->collision_resolving_node, h->table + hashval); 511 } 512 513 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) 514 { 515 list_del(&b->collision_resolving_node); 516 } 517 518 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 519 struct block_device *bdev, 520 u64 dev_bytenr, 521 struct btrfsic_block_hashtable *h) 522 { 523 const unsigned int hashval = 524 (((unsigned int)(dev_bytenr >> 16)) ^ 525 ((unsigned int)((uintptr_t)bdev))) & 526 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 527 struct list_head *elem; 528 529 list_for_each(elem, h->table + hashval) { 530 struct btrfsic_block *const b = 531 list_entry(elem, struct btrfsic_block, 532 collision_resolving_node); 533 534 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) 535 return b; 536 } 537 538 return NULL; 539 } 540 541 static void btrfsic_block_link_hashtable_init( 542 struct btrfsic_block_link_hashtable *h) 543 { 544 int i; 545 546 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) 547 INIT_LIST_HEAD(h->table + i); 548 } 549 550 static void btrfsic_block_link_hashtable_add( 551 struct btrfsic_block_link *l, 552 struct btrfsic_block_link_hashtable *h) 553 { 554 const unsigned int hashval = 555 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ 556 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ 557 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ 558 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) 559 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 560 561 BUG_ON(NULL == l->block_ref_to); 562 BUG_ON(NULL == l->block_ref_from); 563 list_add(&l->collision_resolving_node, h->table + hashval); 564 } 565 566 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) 567 { 568 list_del(&l->collision_resolving_node); 569 } 570 571 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 572 struct block_device *bdev_ref_to, 573 u64 dev_bytenr_ref_to, 574 struct block_device *bdev_ref_from, 575 u64 dev_bytenr_ref_from, 576 struct btrfsic_block_link_hashtable *h) 577 { 578 const unsigned int hashval = 579 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ 580 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ 581 ((unsigned int)((uintptr_t)bdev_ref_to)) ^ 582 ((unsigned int)((uintptr_t)bdev_ref_from))) & 583 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 584 struct list_head *elem; 585 586 list_for_each(elem, h->table + hashval) { 587 struct btrfsic_block_link *const l = 588 list_entry(elem, struct btrfsic_block_link, 589 collision_resolving_node); 590 591 BUG_ON(NULL == l->block_ref_to); 592 BUG_ON(NULL == l->block_ref_from); 593 if (l->block_ref_to->dev_state->bdev == bdev_ref_to && 594 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && 595 l->block_ref_from->dev_state->bdev == bdev_ref_from && 596 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) 597 return l; 598 } 599 600 return NULL; 601 } 602 603 static void btrfsic_dev_state_hashtable_init( 604 struct btrfsic_dev_state_hashtable *h) 605 { 606 int i; 607 608 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) 609 INIT_LIST_HEAD(h->table + i); 610 } 611 612 static void btrfsic_dev_state_hashtable_add( 613 struct btrfsic_dev_state *ds, 614 struct btrfsic_dev_state_hashtable *h) 615 { 616 const unsigned int hashval = 617 (((unsigned int)((uintptr_t)ds->bdev)) & 618 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 619 620 list_add(&ds->collision_resolving_node, h->table + hashval); 621 } 622 623 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) 624 { 625 list_del(&ds->collision_resolving_node); 626 } 627 628 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 629 struct block_device *bdev, 630 struct btrfsic_dev_state_hashtable *h) 631 { 632 const unsigned int hashval = 633 (((unsigned int)((uintptr_t)bdev)) & 634 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 635 struct list_head *elem; 636 637 list_for_each(elem, h->table + hashval) { 638 struct btrfsic_dev_state *const ds = 639 list_entry(elem, struct btrfsic_dev_state, 640 collision_resolving_node); 641 642 if (ds->bdev == bdev) 643 return ds; 644 } 645 646 return NULL; 647 } 648 649 static int btrfsic_process_superblock(struct btrfsic_state *state, 650 struct btrfs_fs_devices *fs_devices) 651 { 652 int ret = 0; 653 struct btrfs_super_block *selected_super; 654 struct list_head *dev_head = &fs_devices->devices; 655 struct btrfs_device *device; 656 struct btrfsic_dev_state *selected_dev_state = NULL; 657 int pass; 658 659 BUG_ON(NULL == state); 660 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); 661 if (NULL == selected_super) { 662 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 663 return -1; 664 } 665 666 list_for_each_entry(device, dev_head, dev_list) { 667 int i; 668 struct btrfsic_dev_state *dev_state; 669 670 if (!device->bdev || !device->name) 671 continue; 672 673 dev_state = btrfsic_dev_state_lookup(device->bdev); 674 BUG_ON(NULL == dev_state); 675 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 676 ret = btrfsic_process_superblock_dev_mirror( 677 state, dev_state, device, i, 678 &selected_dev_state, selected_super); 679 if (0 != ret && 0 == i) { 680 kfree(selected_super); 681 return ret; 682 } 683 } 684 } 685 686 if (NULL == state->latest_superblock) { 687 printk(KERN_INFO "btrfsic: no superblock found!\n"); 688 kfree(selected_super); 689 return -1; 690 } 691 692 state->csum_size = btrfs_super_csum_size(selected_super); 693 694 for (pass = 0; pass < 3; pass++) { 695 int num_copies; 696 int mirror_num; 697 u64 next_bytenr; 698 699 switch (pass) { 700 case 0: 701 next_bytenr = btrfs_super_root(selected_super); 702 if (state->print_mask & 703 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 704 printk(KERN_INFO "root@%llu\n", 705 (unsigned long long)next_bytenr); 706 break; 707 case 1: 708 next_bytenr = btrfs_super_chunk_root(selected_super); 709 if (state->print_mask & 710 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 711 printk(KERN_INFO "chunk@%llu\n", 712 (unsigned long long)next_bytenr); 713 break; 714 case 2: 715 next_bytenr = btrfs_super_log_root(selected_super); 716 if (0 == next_bytenr) 717 continue; 718 if (state->print_mask & 719 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 720 printk(KERN_INFO "log@%llu\n", 721 (unsigned long long)next_bytenr); 722 break; 723 } 724 725 num_copies = 726 btrfs_num_copies(state->root->fs_info, 727 next_bytenr, state->metablock_size); 728 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 729 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 730 (unsigned long long)next_bytenr, num_copies); 731 732 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 733 struct btrfsic_block *next_block; 734 struct btrfsic_block_data_ctx tmp_next_block_ctx; 735 struct btrfsic_block_link *l; 736 737 ret = btrfsic_map_block(state, next_bytenr, 738 state->metablock_size, 739 &tmp_next_block_ctx, 740 mirror_num); 741 if (ret) { 742 printk(KERN_INFO "btrfsic:" 743 " btrfsic_map_block(root @%llu," 744 " mirror %d) failed!\n", 745 (unsigned long long)next_bytenr, 746 mirror_num); 747 kfree(selected_super); 748 return -1; 749 } 750 751 next_block = btrfsic_block_hashtable_lookup( 752 tmp_next_block_ctx.dev->bdev, 753 tmp_next_block_ctx.dev_bytenr, 754 &state->block_hashtable); 755 BUG_ON(NULL == next_block); 756 757 l = btrfsic_block_link_hashtable_lookup( 758 tmp_next_block_ctx.dev->bdev, 759 tmp_next_block_ctx.dev_bytenr, 760 state->latest_superblock->dev_state-> 761 bdev, 762 state->latest_superblock->dev_bytenr, 763 &state->block_link_hashtable); 764 BUG_ON(NULL == l); 765 766 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 767 if (ret < (int)PAGE_CACHE_SIZE) { 768 printk(KERN_INFO 769 "btrfsic: read @logical %llu failed!\n", 770 (unsigned long long) 771 tmp_next_block_ctx.start); 772 btrfsic_release_block_ctx(&tmp_next_block_ctx); 773 kfree(selected_super); 774 return -1; 775 } 776 777 ret = btrfsic_process_metablock(state, 778 next_block, 779 &tmp_next_block_ctx, 780 BTRFS_MAX_LEVEL + 3, 1); 781 btrfsic_release_block_ctx(&tmp_next_block_ctx); 782 } 783 } 784 785 kfree(selected_super); 786 return ret; 787 } 788 789 static int btrfsic_process_superblock_dev_mirror( 790 struct btrfsic_state *state, 791 struct btrfsic_dev_state *dev_state, 792 struct btrfs_device *device, 793 int superblock_mirror_num, 794 struct btrfsic_dev_state **selected_dev_state, 795 struct btrfs_super_block *selected_super) 796 { 797 struct btrfs_super_block *super_tmp; 798 u64 dev_bytenr; 799 struct buffer_head *bh; 800 struct btrfsic_block *superblock_tmp; 801 int pass; 802 struct block_device *const superblock_bdev = device->bdev; 803 804 /* super block bytenr is always the unmapped device bytenr */ 805 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 806 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) 807 return -1; 808 bh = __bread(superblock_bdev, dev_bytenr / 4096, 809 BTRFS_SUPER_INFO_SIZE); 810 if (NULL == bh) 811 return -1; 812 super_tmp = (struct btrfs_super_block *) 813 (bh->b_data + (dev_bytenr & 4095)); 814 815 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 816 btrfs_super_magic(super_tmp) != BTRFS_MAGIC || 817 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 818 btrfs_super_nodesize(super_tmp) != state->metablock_size || 819 btrfs_super_leafsize(super_tmp) != state->metablock_size || 820 btrfs_super_sectorsize(super_tmp) != state->datablock_size) { 821 brelse(bh); 822 return 0; 823 } 824 825 superblock_tmp = 826 btrfsic_block_hashtable_lookup(superblock_bdev, 827 dev_bytenr, 828 &state->block_hashtable); 829 if (NULL == superblock_tmp) { 830 superblock_tmp = btrfsic_block_alloc(); 831 if (NULL == superblock_tmp) { 832 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 833 brelse(bh); 834 return -1; 835 } 836 /* for superblock, only the dev_bytenr makes sense */ 837 superblock_tmp->dev_bytenr = dev_bytenr; 838 superblock_tmp->dev_state = dev_state; 839 superblock_tmp->logical_bytenr = dev_bytenr; 840 superblock_tmp->generation = btrfs_super_generation(super_tmp); 841 superblock_tmp->is_metadata = 1; 842 superblock_tmp->is_superblock = 1; 843 superblock_tmp->is_iodone = 1; 844 superblock_tmp->never_written = 0; 845 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 846 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 847 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" 848 " @%llu (%s/%llu/%d)\n", 849 superblock_bdev, 850 rcu_str_deref(device->name), 851 (unsigned long long)dev_bytenr, 852 dev_state->name, 853 (unsigned long long)dev_bytenr, 854 superblock_mirror_num); 855 list_add(&superblock_tmp->all_blocks_node, 856 &state->all_blocks_list); 857 btrfsic_block_hashtable_add(superblock_tmp, 858 &state->block_hashtable); 859 } 860 861 /* select the one with the highest generation field */ 862 if (btrfs_super_generation(super_tmp) > 863 state->max_superblock_generation || 864 0 == state->max_superblock_generation) { 865 memcpy(selected_super, super_tmp, sizeof(*selected_super)); 866 *selected_dev_state = dev_state; 867 state->max_superblock_generation = 868 btrfs_super_generation(super_tmp); 869 state->latest_superblock = superblock_tmp; 870 } 871 872 for (pass = 0; pass < 3; pass++) { 873 u64 next_bytenr; 874 int num_copies; 875 int mirror_num; 876 const char *additional_string = NULL; 877 struct btrfs_disk_key tmp_disk_key; 878 879 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 880 tmp_disk_key.offset = 0; 881 switch (pass) { 882 case 0: 883 btrfs_set_disk_key_objectid(&tmp_disk_key, 884 BTRFS_ROOT_TREE_OBJECTID); 885 additional_string = "initial root "; 886 next_bytenr = btrfs_super_root(super_tmp); 887 break; 888 case 1: 889 btrfs_set_disk_key_objectid(&tmp_disk_key, 890 BTRFS_CHUNK_TREE_OBJECTID); 891 additional_string = "initial chunk "; 892 next_bytenr = btrfs_super_chunk_root(super_tmp); 893 break; 894 case 2: 895 btrfs_set_disk_key_objectid(&tmp_disk_key, 896 BTRFS_TREE_LOG_OBJECTID); 897 additional_string = "initial log "; 898 next_bytenr = btrfs_super_log_root(super_tmp); 899 if (0 == next_bytenr) 900 continue; 901 break; 902 } 903 904 num_copies = 905 btrfs_num_copies(state->root->fs_info, 906 next_bytenr, state->metablock_size); 907 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 908 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 909 (unsigned long long)next_bytenr, num_copies); 910 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 911 struct btrfsic_block *next_block; 912 struct btrfsic_block_data_ctx tmp_next_block_ctx; 913 struct btrfsic_block_link *l; 914 915 if (btrfsic_map_block(state, next_bytenr, 916 state->metablock_size, 917 &tmp_next_block_ctx, 918 mirror_num)) { 919 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 920 "bytenr @%llu, mirror %d) failed!\n", 921 (unsigned long long)next_bytenr, 922 mirror_num); 923 brelse(bh); 924 return -1; 925 } 926 927 next_block = btrfsic_block_lookup_or_add( 928 state, &tmp_next_block_ctx, 929 additional_string, 1, 1, 0, 930 mirror_num, NULL); 931 if (NULL == next_block) { 932 btrfsic_release_block_ctx(&tmp_next_block_ctx); 933 brelse(bh); 934 return -1; 935 } 936 937 next_block->disk_key = tmp_disk_key; 938 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 939 l = btrfsic_block_link_lookup_or_add( 940 state, &tmp_next_block_ctx, 941 next_block, superblock_tmp, 942 BTRFSIC_GENERATION_UNKNOWN); 943 btrfsic_release_block_ctx(&tmp_next_block_ctx); 944 if (NULL == l) { 945 brelse(bh); 946 return -1; 947 } 948 } 949 } 950 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) 951 btrfsic_dump_tree_sub(state, superblock_tmp, 0); 952 953 brelse(bh); 954 return 0; 955 } 956 957 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) 958 { 959 struct btrfsic_stack_frame *sf; 960 961 sf = kzalloc(sizeof(*sf), GFP_NOFS); 962 if (NULL == sf) 963 printk(KERN_INFO "btrfsic: alloc memory failed!\n"); 964 else 965 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; 966 return sf; 967 } 968 969 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) 970 { 971 BUG_ON(!(NULL == sf || 972 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); 973 kfree(sf); 974 } 975 976 static int btrfsic_process_metablock( 977 struct btrfsic_state *state, 978 struct btrfsic_block *const first_block, 979 struct btrfsic_block_data_ctx *const first_block_ctx, 980 int first_limit_nesting, int force_iodone_flag) 981 { 982 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 983 struct btrfsic_stack_frame *sf; 984 struct btrfsic_stack_frame *next_stack; 985 struct btrfs_header *const first_hdr = 986 (struct btrfs_header *)first_block_ctx->datav[0]; 987 988 BUG_ON(!first_hdr); 989 sf = &initial_stack_frame; 990 sf->error = 0; 991 sf->i = -1; 992 sf->limit_nesting = first_limit_nesting; 993 sf->block = first_block; 994 sf->block_ctx = first_block_ctx; 995 sf->next_block = NULL; 996 sf->hdr = first_hdr; 997 sf->prev = NULL; 998 999 continue_with_new_stack_frame: 1000 sf->block->generation = le64_to_cpu(sf->hdr->generation); 1001 if (0 == sf->hdr->level) { 1002 struct btrfs_leaf *const leafhdr = 1003 (struct btrfs_leaf *)sf->hdr; 1004 1005 if (-1 == sf->i) { 1006 sf->nr = btrfs_stack_header_nritems(&leafhdr->header); 1007 1008 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1009 printk(KERN_INFO 1010 "leaf %llu items %d generation %llu" 1011 " owner %llu\n", 1012 (unsigned long long) 1013 sf->block_ctx->start, 1014 sf->nr, 1015 (unsigned long long) 1016 btrfs_stack_header_generation( 1017 &leafhdr->header), 1018 (unsigned long long) 1019 btrfs_stack_header_owner( 1020 &leafhdr->header)); 1021 } 1022 1023 continue_with_current_leaf_stack_frame: 1024 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1025 sf->i++; 1026 sf->num_copies = 0; 1027 } 1028 1029 if (sf->i < sf->nr) { 1030 struct btrfs_item disk_item; 1031 u32 disk_item_offset = 1032 (uintptr_t)(leafhdr->items + sf->i) - 1033 (uintptr_t)leafhdr; 1034 struct btrfs_disk_key *disk_key; 1035 u8 type; 1036 u32 item_offset; 1037 u32 item_size; 1038 1039 if (disk_item_offset + sizeof(struct btrfs_item) > 1040 sf->block_ctx->len) { 1041 leaf_item_out_of_bounce_error: 1042 printk(KERN_INFO 1043 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", 1044 sf->block_ctx->start, 1045 sf->block_ctx->dev->name); 1046 goto one_stack_frame_backwards; 1047 } 1048 btrfsic_read_from_block_data(sf->block_ctx, 1049 &disk_item, 1050 disk_item_offset, 1051 sizeof(struct btrfs_item)); 1052 item_offset = btrfs_stack_item_offset(&disk_item); 1053 item_size = btrfs_stack_item_offset(&disk_item); 1054 disk_key = &disk_item.key; 1055 type = btrfs_disk_key_type(disk_key); 1056 1057 if (BTRFS_ROOT_ITEM_KEY == type) { 1058 struct btrfs_root_item root_item; 1059 u32 root_item_offset; 1060 u64 next_bytenr; 1061 1062 root_item_offset = item_offset + 1063 offsetof(struct btrfs_leaf, items); 1064 if (root_item_offset + item_size > 1065 sf->block_ctx->len) 1066 goto leaf_item_out_of_bounce_error; 1067 btrfsic_read_from_block_data( 1068 sf->block_ctx, &root_item, 1069 root_item_offset, 1070 item_size); 1071 next_bytenr = btrfs_root_bytenr(&root_item); 1072 1073 sf->error = 1074 btrfsic_create_link_to_next_block( 1075 state, 1076 sf->block, 1077 sf->block_ctx, 1078 next_bytenr, 1079 sf->limit_nesting, 1080 &sf->next_block_ctx, 1081 &sf->next_block, 1082 force_iodone_flag, 1083 &sf->num_copies, 1084 &sf->mirror_num, 1085 disk_key, 1086 btrfs_root_generation( 1087 &root_item)); 1088 if (sf->error) 1089 goto one_stack_frame_backwards; 1090 1091 if (NULL != sf->next_block) { 1092 struct btrfs_header *const next_hdr = 1093 (struct btrfs_header *) 1094 sf->next_block_ctx.datav[0]; 1095 1096 next_stack = 1097 btrfsic_stack_frame_alloc(); 1098 if (NULL == next_stack) { 1099 btrfsic_release_block_ctx( 1100 &sf-> 1101 next_block_ctx); 1102 goto one_stack_frame_backwards; 1103 } 1104 1105 next_stack->i = -1; 1106 next_stack->block = sf->next_block; 1107 next_stack->block_ctx = 1108 &sf->next_block_ctx; 1109 next_stack->next_block = NULL; 1110 next_stack->hdr = next_hdr; 1111 next_stack->limit_nesting = 1112 sf->limit_nesting - 1; 1113 next_stack->prev = sf; 1114 sf = next_stack; 1115 goto continue_with_new_stack_frame; 1116 } 1117 } else if (BTRFS_EXTENT_DATA_KEY == type && 1118 state->include_extent_data) { 1119 sf->error = btrfsic_handle_extent_data( 1120 state, 1121 sf->block, 1122 sf->block_ctx, 1123 item_offset, 1124 force_iodone_flag); 1125 if (sf->error) 1126 goto one_stack_frame_backwards; 1127 } 1128 1129 goto continue_with_current_leaf_stack_frame; 1130 } 1131 } else { 1132 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1133 1134 if (-1 == sf->i) { 1135 sf->nr = btrfs_stack_header_nritems(&nodehdr->header); 1136 1137 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1138 printk(KERN_INFO "node %llu level %d items %d" 1139 " generation %llu owner %llu\n", 1140 (unsigned long long) 1141 sf->block_ctx->start, 1142 nodehdr->header.level, sf->nr, 1143 (unsigned long long) 1144 btrfs_stack_header_generation( 1145 &nodehdr->header), 1146 (unsigned long long) 1147 btrfs_stack_header_owner( 1148 &nodehdr->header)); 1149 } 1150 1151 continue_with_current_node_stack_frame: 1152 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1153 sf->i++; 1154 sf->num_copies = 0; 1155 } 1156 1157 if (sf->i < sf->nr) { 1158 struct btrfs_key_ptr key_ptr; 1159 u32 key_ptr_offset; 1160 u64 next_bytenr; 1161 1162 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - 1163 (uintptr_t)nodehdr; 1164 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > 1165 sf->block_ctx->len) { 1166 printk(KERN_INFO 1167 "btrfsic: node item out of bounce at logical %llu, dev %s\n", 1168 sf->block_ctx->start, 1169 sf->block_ctx->dev->name); 1170 goto one_stack_frame_backwards; 1171 } 1172 btrfsic_read_from_block_data( 1173 sf->block_ctx, &key_ptr, key_ptr_offset, 1174 sizeof(struct btrfs_key_ptr)); 1175 next_bytenr = btrfs_stack_key_blockptr(&key_ptr); 1176 1177 sf->error = btrfsic_create_link_to_next_block( 1178 state, 1179 sf->block, 1180 sf->block_ctx, 1181 next_bytenr, 1182 sf->limit_nesting, 1183 &sf->next_block_ctx, 1184 &sf->next_block, 1185 force_iodone_flag, 1186 &sf->num_copies, 1187 &sf->mirror_num, 1188 &key_ptr.key, 1189 btrfs_stack_key_generation(&key_ptr)); 1190 if (sf->error) 1191 goto one_stack_frame_backwards; 1192 1193 if (NULL != sf->next_block) { 1194 struct btrfs_header *const next_hdr = 1195 (struct btrfs_header *) 1196 sf->next_block_ctx.datav[0]; 1197 1198 next_stack = btrfsic_stack_frame_alloc(); 1199 if (NULL == next_stack) 1200 goto one_stack_frame_backwards; 1201 1202 next_stack->i = -1; 1203 next_stack->block = sf->next_block; 1204 next_stack->block_ctx = &sf->next_block_ctx; 1205 next_stack->next_block = NULL; 1206 next_stack->hdr = next_hdr; 1207 next_stack->limit_nesting = 1208 sf->limit_nesting - 1; 1209 next_stack->prev = sf; 1210 sf = next_stack; 1211 goto continue_with_new_stack_frame; 1212 } 1213 1214 goto continue_with_current_node_stack_frame; 1215 } 1216 } 1217 1218 one_stack_frame_backwards: 1219 if (NULL != sf->prev) { 1220 struct btrfsic_stack_frame *const prev = sf->prev; 1221 1222 /* the one for the initial block is freed in the caller */ 1223 btrfsic_release_block_ctx(sf->block_ctx); 1224 1225 if (sf->error) { 1226 prev->error = sf->error; 1227 btrfsic_stack_frame_free(sf); 1228 sf = prev; 1229 goto one_stack_frame_backwards; 1230 } 1231 1232 btrfsic_stack_frame_free(sf); 1233 sf = prev; 1234 goto continue_with_new_stack_frame; 1235 } else { 1236 BUG_ON(&initial_stack_frame != sf); 1237 } 1238 1239 return sf->error; 1240 } 1241 1242 static void btrfsic_read_from_block_data( 1243 struct btrfsic_block_data_ctx *block_ctx, 1244 void *dstv, u32 offset, size_t len) 1245 { 1246 size_t cur; 1247 size_t offset_in_page; 1248 char *kaddr; 1249 char *dst = (char *)dstv; 1250 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); 1251 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; 1252 1253 WARN_ON(offset + len > block_ctx->len); 1254 offset_in_page = (start_offset + offset) & 1255 ((unsigned long)PAGE_CACHE_SIZE - 1); 1256 1257 while (len > 0) { 1258 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); 1259 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> 1260 PAGE_CACHE_SHIFT); 1261 kaddr = block_ctx->datav[i]; 1262 memcpy(dst, kaddr + offset_in_page, cur); 1263 1264 dst += cur; 1265 len -= cur; 1266 offset_in_page = 0; 1267 i++; 1268 } 1269 } 1270 1271 static int btrfsic_create_link_to_next_block( 1272 struct btrfsic_state *state, 1273 struct btrfsic_block *block, 1274 struct btrfsic_block_data_ctx *block_ctx, 1275 u64 next_bytenr, 1276 int limit_nesting, 1277 struct btrfsic_block_data_ctx *next_block_ctx, 1278 struct btrfsic_block **next_blockp, 1279 int force_iodone_flag, 1280 int *num_copiesp, int *mirror_nump, 1281 struct btrfs_disk_key *disk_key, 1282 u64 parent_generation) 1283 { 1284 struct btrfsic_block *next_block = NULL; 1285 int ret; 1286 struct btrfsic_block_link *l; 1287 int did_alloc_block_link; 1288 int block_was_created; 1289 1290 *next_blockp = NULL; 1291 if (0 == *num_copiesp) { 1292 *num_copiesp = 1293 btrfs_num_copies(state->root->fs_info, 1294 next_bytenr, state->metablock_size); 1295 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1296 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1297 (unsigned long long)next_bytenr, *num_copiesp); 1298 *mirror_nump = 1; 1299 } 1300 1301 if (*mirror_nump > *num_copiesp) 1302 return 0; 1303 1304 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1305 printk(KERN_INFO 1306 "btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1307 *mirror_nump); 1308 ret = btrfsic_map_block(state, next_bytenr, 1309 state->metablock_size, 1310 next_block_ctx, *mirror_nump); 1311 if (ret) { 1312 printk(KERN_INFO 1313 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1314 (unsigned long long)next_bytenr, *mirror_nump); 1315 btrfsic_release_block_ctx(next_block_ctx); 1316 *next_blockp = NULL; 1317 return -1; 1318 } 1319 1320 next_block = btrfsic_block_lookup_or_add(state, 1321 next_block_ctx, "referenced ", 1322 1, force_iodone_flag, 1323 !force_iodone_flag, 1324 *mirror_nump, 1325 &block_was_created); 1326 if (NULL == next_block) { 1327 btrfsic_release_block_ctx(next_block_ctx); 1328 *next_blockp = NULL; 1329 return -1; 1330 } 1331 if (block_was_created) { 1332 l = NULL; 1333 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 1334 } else { 1335 if (next_block->logical_bytenr != next_bytenr && 1336 !(!next_block->is_metadata && 1337 0 == next_block->logical_bytenr)) { 1338 printk(KERN_INFO 1339 "Referenced block @%llu (%s/%llu/%d)" 1340 " found in hash table, %c," 1341 " bytenr mismatch (!= stored %llu).\n", 1342 (unsigned long long)next_bytenr, 1343 next_block_ctx->dev->name, 1344 (unsigned long long)next_block_ctx->dev_bytenr, 1345 *mirror_nump, 1346 btrfsic_get_block_type(state, next_block), 1347 (unsigned long long)next_block->logical_bytenr); 1348 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1349 printk(KERN_INFO 1350 "Referenced block @%llu (%s/%llu/%d)" 1351 " found in hash table, %c.\n", 1352 (unsigned long long)next_bytenr, 1353 next_block_ctx->dev->name, 1354 (unsigned long long)next_block_ctx->dev_bytenr, 1355 *mirror_nump, 1356 btrfsic_get_block_type(state, next_block)); 1357 next_block->logical_bytenr = next_bytenr; 1358 1359 next_block->mirror_num = *mirror_nump; 1360 l = btrfsic_block_link_hashtable_lookup( 1361 next_block_ctx->dev->bdev, 1362 next_block_ctx->dev_bytenr, 1363 block_ctx->dev->bdev, 1364 block_ctx->dev_bytenr, 1365 &state->block_link_hashtable); 1366 } 1367 1368 next_block->disk_key = *disk_key; 1369 if (NULL == l) { 1370 l = btrfsic_block_link_alloc(); 1371 if (NULL == l) { 1372 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 1373 btrfsic_release_block_ctx(next_block_ctx); 1374 *next_blockp = NULL; 1375 return -1; 1376 } 1377 1378 did_alloc_block_link = 1; 1379 l->block_ref_to = next_block; 1380 l->block_ref_from = block; 1381 l->ref_cnt = 1; 1382 l->parent_generation = parent_generation; 1383 1384 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1385 btrfsic_print_add_link(state, l); 1386 1387 list_add(&l->node_ref_to, &block->ref_to_list); 1388 list_add(&l->node_ref_from, &next_block->ref_from_list); 1389 1390 btrfsic_block_link_hashtable_add(l, 1391 &state->block_link_hashtable); 1392 } else { 1393 did_alloc_block_link = 0; 1394 if (0 == limit_nesting) { 1395 l->ref_cnt++; 1396 l->parent_generation = parent_generation; 1397 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1398 btrfsic_print_add_link(state, l); 1399 } 1400 } 1401 1402 if (limit_nesting > 0 && did_alloc_block_link) { 1403 ret = btrfsic_read_block(state, next_block_ctx); 1404 if (ret < (int)next_block_ctx->len) { 1405 printk(KERN_INFO 1406 "btrfsic: read block @logical %llu failed!\n", 1407 (unsigned long long)next_bytenr); 1408 btrfsic_release_block_ctx(next_block_ctx); 1409 *next_blockp = NULL; 1410 return -1; 1411 } 1412 1413 *next_blockp = next_block; 1414 } else { 1415 *next_blockp = NULL; 1416 } 1417 (*mirror_nump)++; 1418 1419 return 0; 1420 } 1421 1422 static int btrfsic_handle_extent_data( 1423 struct btrfsic_state *state, 1424 struct btrfsic_block *block, 1425 struct btrfsic_block_data_ctx *block_ctx, 1426 u32 item_offset, int force_iodone_flag) 1427 { 1428 int ret; 1429 struct btrfs_file_extent_item file_extent_item; 1430 u64 file_extent_item_offset; 1431 u64 next_bytenr; 1432 u64 num_bytes; 1433 u64 generation; 1434 struct btrfsic_block_link *l; 1435 1436 file_extent_item_offset = offsetof(struct btrfs_leaf, items) + 1437 item_offset; 1438 if (file_extent_item_offset + 1439 offsetof(struct btrfs_file_extent_item, disk_num_bytes) > 1440 block_ctx->len) { 1441 printk(KERN_INFO 1442 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1443 block_ctx->start, block_ctx->dev->name); 1444 return -1; 1445 } 1446 1447 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1448 file_extent_item_offset, 1449 offsetof(struct btrfs_file_extent_item, disk_num_bytes)); 1450 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1451 btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) { 1452 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1453 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", 1454 file_extent_item.type, 1455 (unsigned long long) 1456 btrfs_stack_file_extent_disk_bytenr( 1457 &file_extent_item)); 1458 return 0; 1459 } 1460 1461 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > 1462 block_ctx->len) { 1463 printk(KERN_INFO 1464 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1465 block_ctx->start, block_ctx->dev->name); 1466 return -1; 1467 } 1468 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1469 file_extent_item_offset, 1470 sizeof(struct btrfs_file_extent_item)); 1471 next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) + 1472 btrfs_stack_file_extent_offset(&file_extent_item); 1473 generation = btrfs_stack_file_extent_generation(&file_extent_item); 1474 num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item); 1475 generation = btrfs_stack_file_extent_generation(&file_extent_item); 1476 1477 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1478 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1479 " offset = %llu, num_bytes = %llu\n", 1480 file_extent_item.type, 1481 (unsigned long long) 1482 btrfs_stack_file_extent_disk_bytenr(&file_extent_item), 1483 (unsigned long long) 1484 btrfs_stack_file_extent_offset(&file_extent_item), 1485 (unsigned long long)num_bytes); 1486 while (num_bytes > 0) { 1487 u32 chunk_len; 1488 int num_copies; 1489 int mirror_num; 1490 1491 if (num_bytes > state->datablock_size) 1492 chunk_len = state->datablock_size; 1493 else 1494 chunk_len = num_bytes; 1495 1496 num_copies = 1497 btrfs_num_copies(state->root->fs_info, 1498 next_bytenr, state->datablock_size); 1499 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1500 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1501 (unsigned long long)next_bytenr, num_copies); 1502 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1503 struct btrfsic_block_data_ctx next_block_ctx; 1504 struct btrfsic_block *next_block; 1505 int block_was_created; 1506 1507 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1508 printk(KERN_INFO "btrfsic_handle_extent_data(" 1509 "mirror_num=%d)\n", mirror_num); 1510 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1511 printk(KERN_INFO 1512 "\tdisk_bytenr = %llu, num_bytes %u\n", 1513 (unsigned long long)next_bytenr, 1514 chunk_len); 1515 ret = btrfsic_map_block(state, next_bytenr, 1516 chunk_len, &next_block_ctx, 1517 mirror_num); 1518 if (ret) { 1519 printk(KERN_INFO 1520 "btrfsic: btrfsic_map_block(@%llu," 1521 " mirror=%d) failed!\n", 1522 (unsigned long long)next_bytenr, 1523 mirror_num); 1524 return -1; 1525 } 1526 1527 next_block = btrfsic_block_lookup_or_add( 1528 state, 1529 &next_block_ctx, 1530 "referenced ", 1531 0, 1532 force_iodone_flag, 1533 !force_iodone_flag, 1534 mirror_num, 1535 &block_was_created); 1536 if (NULL == next_block) { 1537 printk(KERN_INFO 1538 "btrfsic: error, kmalloc failed!\n"); 1539 btrfsic_release_block_ctx(&next_block_ctx); 1540 return -1; 1541 } 1542 if (!block_was_created) { 1543 if (next_block->logical_bytenr != next_bytenr && 1544 !(!next_block->is_metadata && 1545 0 == next_block->logical_bytenr)) { 1546 printk(KERN_INFO 1547 "Referenced block" 1548 " @%llu (%s/%llu/%d)" 1549 " found in hash table, D," 1550 " bytenr mismatch" 1551 " (!= stored %llu).\n", 1552 (unsigned long long)next_bytenr, 1553 next_block_ctx.dev->name, 1554 (unsigned long long) 1555 next_block_ctx.dev_bytenr, 1556 mirror_num, 1557 (unsigned long long) 1558 next_block->logical_bytenr); 1559 } 1560 next_block->logical_bytenr = next_bytenr; 1561 next_block->mirror_num = mirror_num; 1562 } 1563 1564 l = btrfsic_block_link_lookup_or_add(state, 1565 &next_block_ctx, 1566 next_block, block, 1567 generation); 1568 btrfsic_release_block_ctx(&next_block_ctx); 1569 if (NULL == l) 1570 return -1; 1571 } 1572 1573 next_bytenr += chunk_len; 1574 num_bytes -= chunk_len; 1575 } 1576 1577 return 0; 1578 } 1579 1580 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 1581 struct btrfsic_block_data_ctx *block_ctx_out, 1582 int mirror_num) 1583 { 1584 int ret; 1585 u64 length; 1586 struct btrfs_bio *multi = NULL; 1587 struct btrfs_device *device; 1588 1589 length = len; 1590 ret = btrfs_map_block(state->root->fs_info, READ, 1591 bytenr, &length, &multi, mirror_num); 1592 1593 if (ret) { 1594 block_ctx_out->start = 0; 1595 block_ctx_out->dev_bytenr = 0; 1596 block_ctx_out->len = 0; 1597 block_ctx_out->dev = NULL; 1598 block_ctx_out->datav = NULL; 1599 block_ctx_out->pagev = NULL; 1600 block_ctx_out->mem_to_free = NULL; 1601 1602 return ret; 1603 } 1604 1605 device = multi->stripes[0].dev; 1606 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); 1607 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1608 block_ctx_out->start = bytenr; 1609 block_ctx_out->len = len; 1610 block_ctx_out->datav = NULL; 1611 block_ctx_out->pagev = NULL; 1612 block_ctx_out->mem_to_free = NULL; 1613 1614 kfree(multi); 1615 if (NULL == block_ctx_out->dev) { 1616 ret = -ENXIO; 1617 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); 1618 } 1619 1620 return ret; 1621 } 1622 1623 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 1624 u32 len, struct block_device *bdev, 1625 struct btrfsic_block_data_ctx *block_ctx_out) 1626 { 1627 block_ctx_out->dev = btrfsic_dev_state_lookup(bdev); 1628 block_ctx_out->dev_bytenr = bytenr; 1629 block_ctx_out->start = bytenr; 1630 block_ctx_out->len = len; 1631 block_ctx_out->datav = NULL; 1632 block_ctx_out->pagev = NULL; 1633 block_ctx_out->mem_to_free = NULL; 1634 if (NULL != block_ctx_out->dev) { 1635 return 0; 1636 } else { 1637 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n"); 1638 return -ENXIO; 1639 } 1640 } 1641 1642 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1643 { 1644 if (block_ctx->mem_to_free) { 1645 unsigned int num_pages; 1646 1647 BUG_ON(!block_ctx->datav); 1648 BUG_ON(!block_ctx->pagev); 1649 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1650 PAGE_CACHE_SHIFT; 1651 while (num_pages > 0) { 1652 num_pages--; 1653 if (block_ctx->datav[num_pages]) { 1654 kunmap(block_ctx->pagev[num_pages]); 1655 block_ctx->datav[num_pages] = NULL; 1656 } 1657 if (block_ctx->pagev[num_pages]) { 1658 __free_page(block_ctx->pagev[num_pages]); 1659 block_ctx->pagev[num_pages] = NULL; 1660 } 1661 } 1662 1663 kfree(block_ctx->mem_to_free); 1664 block_ctx->mem_to_free = NULL; 1665 block_ctx->pagev = NULL; 1666 block_ctx->datav = NULL; 1667 } 1668 } 1669 1670 static int btrfsic_read_block(struct btrfsic_state *state, 1671 struct btrfsic_block_data_ctx *block_ctx) 1672 { 1673 unsigned int num_pages; 1674 unsigned int i; 1675 u64 dev_bytenr; 1676 int ret; 1677 1678 BUG_ON(block_ctx->datav); 1679 BUG_ON(block_ctx->pagev); 1680 BUG_ON(block_ctx->mem_to_free); 1681 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { 1682 printk(KERN_INFO 1683 "btrfsic: read_block() with unaligned bytenr %llu\n", 1684 (unsigned long long)block_ctx->dev_bytenr); 1685 return -1; 1686 } 1687 1688 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1689 PAGE_CACHE_SHIFT; 1690 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + 1691 sizeof(*block_ctx->pagev)) * 1692 num_pages, GFP_NOFS); 1693 if (!block_ctx->mem_to_free) 1694 return -1; 1695 block_ctx->datav = block_ctx->mem_to_free; 1696 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); 1697 for (i = 0; i < num_pages; i++) { 1698 block_ctx->pagev[i] = alloc_page(GFP_NOFS); 1699 if (!block_ctx->pagev[i]) 1700 return -1; 1701 } 1702 1703 dev_bytenr = block_ctx->dev_bytenr; 1704 for (i = 0; i < num_pages;) { 1705 struct bio *bio; 1706 unsigned int j; 1707 DECLARE_COMPLETION_ONSTACK(complete); 1708 1709 bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i); 1710 if (!bio) { 1711 printk(KERN_INFO 1712 "btrfsic: bio_alloc() for %u pages failed!\n", 1713 num_pages - i); 1714 return -1; 1715 } 1716 bio->bi_bdev = block_ctx->dev->bdev; 1717 bio->bi_sector = dev_bytenr >> 9; 1718 bio->bi_end_io = btrfsic_complete_bio_end_io; 1719 bio->bi_private = &complete; 1720 1721 for (j = i; j < num_pages; j++) { 1722 ret = bio_add_page(bio, block_ctx->pagev[j], 1723 PAGE_CACHE_SIZE, 0); 1724 if (PAGE_CACHE_SIZE != ret) 1725 break; 1726 } 1727 if (j == i) { 1728 printk(KERN_INFO 1729 "btrfsic: error, failed to add a single page!\n"); 1730 return -1; 1731 } 1732 submit_bio(READ, bio); 1733 1734 /* this will also unplug the queue */ 1735 wait_for_completion(&complete); 1736 1737 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 1738 printk(KERN_INFO 1739 "btrfsic: read error at logical %llu dev %s!\n", 1740 block_ctx->start, block_ctx->dev->name); 1741 bio_put(bio); 1742 return -1; 1743 } 1744 bio_put(bio); 1745 dev_bytenr += (j - i) * PAGE_CACHE_SIZE; 1746 i = j; 1747 } 1748 for (i = 0; i < num_pages; i++) { 1749 block_ctx->datav[i] = kmap(block_ctx->pagev[i]); 1750 if (!block_ctx->datav[i]) { 1751 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", 1752 block_ctx->dev->name); 1753 return -1; 1754 } 1755 } 1756 1757 return block_ctx->len; 1758 } 1759 1760 static void btrfsic_complete_bio_end_io(struct bio *bio, int err) 1761 { 1762 complete((struct completion *)bio->bi_private); 1763 } 1764 1765 static void btrfsic_dump_database(struct btrfsic_state *state) 1766 { 1767 struct list_head *elem_all; 1768 1769 BUG_ON(NULL == state); 1770 1771 printk(KERN_INFO "all_blocks_list:\n"); 1772 list_for_each(elem_all, &state->all_blocks_list) { 1773 const struct btrfsic_block *const b_all = 1774 list_entry(elem_all, struct btrfsic_block, 1775 all_blocks_node); 1776 struct list_head *elem_ref_to; 1777 struct list_head *elem_ref_from; 1778 1779 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", 1780 btrfsic_get_block_type(state, b_all), 1781 (unsigned long long)b_all->logical_bytenr, 1782 b_all->dev_state->name, 1783 (unsigned long long)b_all->dev_bytenr, 1784 b_all->mirror_num); 1785 1786 list_for_each(elem_ref_to, &b_all->ref_to_list) { 1787 const struct btrfsic_block_link *const l = 1788 list_entry(elem_ref_to, 1789 struct btrfsic_block_link, 1790 node_ref_to); 1791 1792 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1793 " refers %u* to" 1794 " %c @%llu (%s/%llu/%d)\n", 1795 btrfsic_get_block_type(state, b_all), 1796 (unsigned long long)b_all->logical_bytenr, 1797 b_all->dev_state->name, 1798 (unsigned long long)b_all->dev_bytenr, 1799 b_all->mirror_num, 1800 l->ref_cnt, 1801 btrfsic_get_block_type(state, l->block_ref_to), 1802 (unsigned long long) 1803 l->block_ref_to->logical_bytenr, 1804 l->block_ref_to->dev_state->name, 1805 (unsigned long long)l->block_ref_to->dev_bytenr, 1806 l->block_ref_to->mirror_num); 1807 } 1808 1809 list_for_each(elem_ref_from, &b_all->ref_from_list) { 1810 const struct btrfsic_block_link *const l = 1811 list_entry(elem_ref_from, 1812 struct btrfsic_block_link, 1813 node_ref_from); 1814 1815 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1816 " is ref %u* from" 1817 " %c @%llu (%s/%llu/%d)\n", 1818 btrfsic_get_block_type(state, b_all), 1819 (unsigned long long)b_all->logical_bytenr, 1820 b_all->dev_state->name, 1821 (unsigned long long)b_all->dev_bytenr, 1822 b_all->mirror_num, 1823 l->ref_cnt, 1824 btrfsic_get_block_type(state, l->block_ref_from), 1825 (unsigned long long) 1826 l->block_ref_from->logical_bytenr, 1827 l->block_ref_from->dev_state->name, 1828 (unsigned long long) 1829 l->block_ref_from->dev_bytenr, 1830 l->block_ref_from->mirror_num); 1831 } 1832 1833 printk(KERN_INFO "\n"); 1834 } 1835 } 1836 1837 /* 1838 * Test whether the disk block contains a tree block (leaf or node) 1839 * (note that this test fails for the super block) 1840 */ 1841 static int btrfsic_test_for_metadata(struct btrfsic_state *state, 1842 char **datav, unsigned int num_pages) 1843 { 1844 struct btrfs_header *h; 1845 u8 csum[BTRFS_CSUM_SIZE]; 1846 u32 crc = ~(u32)0; 1847 unsigned int i; 1848 1849 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) 1850 return 1; /* not metadata */ 1851 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; 1852 h = (struct btrfs_header *)datav[0]; 1853 1854 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) 1855 return 1; 1856 1857 for (i = 0; i < num_pages; i++) { 1858 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); 1859 size_t sublen = i ? PAGE_CACHE_SIZE : 1860 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); 1861 1862 crc = crc32c(crc, data, sublen); 1863 } 1864 btrfs_csum_final(crc, csum); 1865 if (memcmp(csum, h->csum, state->csum_size)) 1866 return 1; 1867 1868 return 0; /* is metadata */ 1869 } 1870 1871 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1872 u64 dev_bytenr, char **mapped_datav, 1873 unsigned int num_pages, 1874 struct bio *bio, int *bio_is_patched, 1875 struct buffer_head *bh, 1876 int submit_bio_bh_rw) 1877 { 1878 int is_metadata; 1879 struct btrfsic_block *block; 1880 struct btrfsic_block_data_ctx block_ctx; 1881 int ret; 1882 struct btrfsic_state *state = dev_state->state; 1883 struct block_device *bdev = dev_state->bdev; 1884 unsigned int processed_len; 1885 1886 if (NULL != bio_is_patched) 1887 *bio_is_patched = 0; 1888 1889 again: 1890 if (num_pages == 0) 1891 return; 1892 1893 processed_len = 0; 1894 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, 1895 num_pages)); 1896 1897 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1898 &state->block_hashtable); 1899 if (NULL != block) { 1900 u64 bytenr = 0; 1901 struct list_head *elem_ref_to; 1902 struct list_head *tmp_ref_to; 1903 1904 if (block->is_superblock) { 1905 bytenr = btrfs_super_bytenr((struct btrfs_super_block *) 1906 mapped_datav[0]); 1907 if (num_pages * PAGE_CACHE_SIZE < 1908 BTRFS_SUPER_INFO_SIZE) { 1909 printk(KERN_INFO 1910 "btrfsic: cannot work with too short bios!\n"); 1911 return; 1912 } 1913 is_metadata = 1; 1914 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); 1915 processed_len = BTRFS_SUPER_INFO_SIZE; 1916 if (state->print_mask & 1917 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1918 printk(KERN_INFO 1919 "[before new superblock is written]:\n"); 1920 btrfsic_dump_tree_sub(state, block, 0); 1921 } 1922 } 1923 if (is_metadata) { 1924 if (!block->is_superblock) { 1925 if (num_pages * PAGE_CACHE_SIZE < 1926 state->metablock_size) { 1927 printk(KERN_INFO 1928 "btrfsic: cannot work with too short bios!\n"); 1929 return; 1930 } 1931 processed_len = state->metablock_size; 1932 bytenr = btrfs_stack_header_bytenr( 1933 (struct btrfs_header *) 1934 mapped_datav[0]); 1935 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1936 dev_state, 1937 dev_bytenr); 1938 } 1939 if (block->logical_bytenr != bytenr) { 1940 printk(KERN_INFO 1941 "Written block @%llu (%s/%llu/%d)" 1942 " found in hash table, %c," 1943 " bytenr mismatch" 1944 " (!= stored %llu).\n", 1945 (unsigned long long)bytenr, 1946 dev_state->name, 1947 (unsigned long long)dev_bytenr, 1948 block->mirror_num, 1949 btrfsic_get_block_type(state, block), 1950 (unsigned long long) 1951 block->logical_bytenr); 1952 block->logical_bytenr = bytenr; 1953 } else if (state->print_mask & 1954 BTRFSIC_PRINT_MASK_VERBOSE) 1955 printk(KERN_INFO 1956 "Written block @%llu (%s/%llu/%d)" 1957 " found in hash table, %c.\n", 1958 (unsigned long long)bytenr, 1959 dev_state->name, 1960 (unsigned long long)dev_bytenr, 1961 block->mirror_num, 1962 btrfsic_get_block_type(state, block)); 1963 } else { 1964 if (num_pages * PAGE_CACHE_SIZE < 1965 state->datablock_size) { 1966 printk(KERN_INFO 1967 "btrfsic: cannot work with too short bios!\n"); 1968 return; 1969 } 1970 processed_len = state->datablock_size; 1971 bytenr = block->logical_bytenr; 1972 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1973 printk(KERN_INFO 1974 "Written block @%llu (%s/%llu/%d)" 1975 " found in hash table, %c.\n", 1976 (unsigned long long)bytenr, 1977 dev_state->name, 1978 (unsigned long long)dev_bytenr, 1979 block->mirror_num, 1980 btrfsic_get_block_type(state, block)); 1981 } 1982 1983 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1984 printk(KERN_INFO 1985 "ref_to_list: %cE, ref_from_list: %cE\n", 1986 list_empty(&block->ref_to_list) ? ' ' : '!', 1987 list_empty(&block->ref_from_list) ? ' ' : '!'); 1988 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { 1989 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1990 " @%llu (%s/%llu/%d), old(gen=%llu," 1991 " objectid=%llu, type=%d, offset=%llu)," 1992 " new(gen=%llu)," 1993 " which is referenced by most recent superblock" 1994 " (superblockgen=%llu)!\n", 1995 btrfsic_get_block_type(state, block), 1996 (unsigned long long)bytenr, 1997 dev_state->name, 1998 (unsigned long long)dev_bytenr, 1999 block->mirror_num, 2000 (unsigned long long)block->generation, 2001 (unsigned long long) 2002 btrfs_disk_key_objectid(&block->disk_key), 2003 block->disk_key.type, 2004 (unsigned long long) 2005 btrfs_disk_key_offset(&block->disk_key), 2006 (unsigned long long) 2007 btrfs_stack_header_generation( 2008 (struct btrfs_header *) mapped_datav[0]), 2009 (unsigned long long) 2010 state->max_superblock_generation); 2011 btrfsic_dump_tree(state); 2012 } 2013 2014 if (!block->is_iodone && !block->never_written) { 2015 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 2016 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," 2017 " which is not yet iodone!\n", 2018 btrfsic_get_block_type(state, block), 2019 (unsigned long long)bytenr, 2020 dev_state->name, 2021 (unsigned long long)dev_bytenr, 2022 block->mirror_num, 2023 (unsigned long long)block->generation, 2024 (unsigned long long) 2025 btrfs_stack_header_generation( 2026 (struct btrfs_header *) 2027 mapped_datav[0])); 2028 /* it would not be safe to go on */ 2029 btrfsic_dump_tree(state); 2030 goto continue_loop; 2031 } 2032 2033 /* 2034 * Clear all references of this block. Do not free 2035 * the block itself even if is not referenced anymore 2036 * because it still carries valueable information 2037 * like whether it was ever written and IO completed. 2038 */ 2039 list_for_each_safe(elem_ref_to, tmp_ref_to, 2040 &block->ref_to_list) { 2041 struct btrfsic_block_link *const l = 2042 list_entry(elem_ref_to, 2043 struct btrfsic_block_link, 2044 node_ref_to); 2045 2046 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2047 btrfsic_print_rem_link(state, l); 2048 l->ref_cnt--; 2049 if (0 == l->ref_cnt) { 2050 list_del(&l->node_ref_to); 2051 list_del(&l->node_ref_from); 2052 btrfsic_block_link_hashtable_remove(l); 2053 btrfsic_block_link_free(l); 2054 } 2055 } 2056 2057 if (block->is_superblock) 2058 ret = btrfsic_map_superblock(state, bytenr, 2059 processed_len, 2060 bdev, &block_ctx); 2061 else 2062 ret = btrfsic_map_block(state, bytenr, processed_len, 2063 &block_ctx, 0); 2064 if (ret) { 2065 printk(KERN_INFO 2066 "btrfsic: btrfsic_map_block(root @%llu)" 2067 " failed!\n", (unsigned long long)bytenr); 2068 goto continue_loop; 2069 } 2070 block_ctx.datav = mapped_datav; 2071 /* the following is required in case of writes to mirrors, 2072 * use the same that was used for the lookup */ 2073 block_ctx.dev = dev_state; 2074 block_ctx.dev_bytenr = dev_bytenr; 2075 2076 if (is_metadata || state->include_extent_data) { 2077 block->never_written = 0; 2078 block->iodone_w_error = 0; 2079 if (NULL != bio) { 2080 block->is_iodone = 0; 2081 BUG_ON(NULL == bio_is_patched); 2082 if (!*bio_is_patched) { 2083 block->orig_bio_bh_private = 2084 bio->bi_private; 2085 block->orig_bio_bh_end_io.bio = 2086 bio->bi_end_io; 2087 block->next_in_same_bio = NULL; 2088 bio->bi_private = block; 2089 bio->bi_end_io = btrfsic_bio_end_io; 2090 *bio_is_patched = 1; 2091 } else { 2092 struct btrfsic_block *chained_block = 2093 (struct btrfsic_block *) 2094 bio->bi_private; 2095 2096 BUG_ON(NULL == chained_block); 2097 block->orig_bio_bh_private = 2098 chained_block->orig_bio_bh_private; 2099 block->orig_bio_bh_end_io.bio = 2100 chained_block->orig_bio_bh_end_io. 2101 bio; 2102 block->next_in_same_bio = chained_block; 2103 bio->bi_private = block; 2104 } 2105 } else if (NULL != bh) { 2106 block->is_iodone = 0; 2107 block->orig_bio_bh_private = bh->b_private; 2108 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2109 block->next_in_same_bio = NULL; 2110 bh->b_private = block; 2111 bh->b_end_io = btrfsic_bh_end_io; 2112 } else { 2113 block->is_iodone = 1; 2114 block->orig_bio_bh_private = NULL; 2115 block->orig_bio_bh_end_io.bio = NULL; 2116 block->next_in_same_bio = NULL; 2117 } 2118 } 2119 2120 block->flush_gen = dev_state->last_flush_gen + 1; 2121 block->submit_bio_bh_rw = submit_bio_bh_rw; 2122 if (is_metadata) { 2123 block->logical_bytenr = bytenr; 2124 block->is_metadata = 1; 2125 if (block->is_superblock) { 2126 BUG_ON(PAGE_CACHE_SIZE != 2127 BTRFS_SUPER_INFO_SIZE); 2128 ret = btrfsic_process_written_superblock( 2129 state, 2130 block, 2131 (struct btrfs_super_block *) 2132 mapped_datav[0]); 2133 if (state->print_mask & 2134 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 2135 printk(KERN_INFO 2136 "[after new superblock is written]:\n"); 2137 btrfsic_dump_tree_sub(state, block, 0); 2138 } 2139 } else { 2140 block->mirror_num = 0; /* unknown */ 2141 ret = btrfsic_process_metablock( 2142 state, 2143 block, 2144 &block_ctx, 2145 0, 0); 2146 } 2147 if (ret) 2148 printk(KERN_INFO 2149 "btrfsic: btrfsic_process_metablock" 2150 "(root @%llu) failed!\n", 2151 (unsigned long long)dev_bytenr); 2152 } else { 2153 block->is_metadata = 0; 2154 block->mirror_num = 0; /* unknown */ 2155 block->generation = BTRFSIC_GENERATION_UNKNOWN; 2156 if (!state->include_extent_data 2157 && list_empty(&block->ref_from_list)) { 2158 /* 2159 * disk block is overwritten with extent 2160 * data (not meta data) and we are configured 2161 * to not include extent data: take the 2162 * chance and free the block's memory 2163 */ 2164 btrfsic_block_hashtable_remove(block); 2165 list_del(&block->all_blocks_node); 2166 btrfsic_block_free(block); 2167 } 2168 } 2169 btrfsic_release_block_ctx(&block_ctx); 2170 } else { 2171 /* block has not been found in hash table */ 2172 u64 bytenr; 2173 2174 if (!is_metadata) { 2175 processed_len = state->datablock_size; 2176 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2177 printk(KERN_INFO "Written block (%s/%llu/?)" 2178 " !found in hash table, D.\n", 2179 dev_state->name, 2180 (unsigned long long)dev_bytenr); 2181 if (!state->include_extent_data) { 2182 /* ignore that written D block */ 2183 goto continue_loop; 2184 } 2185 2186 /* this is getting ugly for the 2187 * include_extent_data case... */ 2188 bytenr = 0; /* unknown */ 2189 block_ctx.start = bytenr; 2190 block_ctx.len = processed_len; 2191 block_ctx.mem_to_free = NULL; 2192 block_ctx.pagev = NULL; 2193 } else { 2194 processed_len = state->metablock_size; 2195 bytenr = btrfs_stack_header_bytenr( 2196 (struct btrfs_header *) 2197 mapped_datav[0]); 2198 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2199 dev_bytenr); 2200 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2201 printk(KERN_INFO 2202 "Written block @%llu (%s/%llu/?)" 2203 " !found in hash table, M.\n", 2204 (unsigned long long)bytenr, 2205 dev_state->name, 2206 (unsigned long long)dev_bytenr); 2207 2208 ret = btrfsic_map_block(state, bytenr, processed_len, 2209 &block_ctx, 0); 2210 if (ret) { 2211 printk(KERN_INFO 2212 "btrfsic: btrfsic_map_block(root @%llu)" 2213 " failed!\n", 2214 (unsigned long long)dev_bytenr); 2215 goto continue_loop; 2216 } 2217 } 2218 block_ctx.datav = mapped_datav; 2219 /* the following is required in case of writes to mirrors, 2220 * use the same that was used for the lookup */ 2221 block_ctx.dev = dev_state; 2222 block_ctx.dev_bytenr = dev_bytenr; 2223 2224 block = btrfsic_block_alloc(); 2225 if (NULL == block) { 2226 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2227 btrfsic_release_block_ctx(&block_ctx); 2228 goto continue_loop; 2229 } 2230 block->dev_state = dev_state; 2231 block->dev_bytenr = dev_bytenr; 2232 block->logical_bytenr = bytenr; 2233 block->is_metadata = is_metadata; 2234 block->never_written = 0; 2235 block->iodone_w_error = 0; 2236 block->mirror_num = 0; /* unknown */ 2237 block->flush_gen = dev_state->last_flush_gen + 1; 2238 block->submit_bio_bh_rw = submit_bio_bh_rw; 2239 if (NULL != bio) { 2240 block->is_iodone = 0; 2241 BUG_ON(NULL == bio_is_patched); 2242 if (!*bio_is_patched) { 2243 block->orig_bio_bh_private = bio->bi_private; 2244 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 2245 block->next_in_same_bio = NULL; 2246 bio->bi_private = block; 2247 bio->bi_end_io = btrfsic_bio_end_io; 2248 *bio_is_patched = 1; 2249 } else { 2250 struct btrfsic_block *chained_block = 2251 (struct btrfsic_block *) 2252 bio->bi_private; 2253 2254 BUG_ON(NULL == chained_block); 2255 block->orig_bio_bh_private = 2256 chained_block->orig_bio_bh_private; 2257 block->orig_bio_bh_end_io.bio = 2258 chained_block->orig_bio_bh_end_io.bio; 2259 block->next_in_same_bio = chained_block; 2260 bio->bi_private = block; 2261 } 2262 } else if (NULL != bh) { 2263 block->is_iodone = 0; 2264 block->orig_bio_bh_private = bh->b_private; 2265 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2266 block->next_in_same_bio = NULL; 2267 bh->b_private = block; 2268 bh->b_end_io = btrfsic_bh_end_io; 2269 } else { 2270 block->is_iodone = 1; 2271 block->orig_bio_bh_private = NULL; 2272 block->orig_bio_bh_end_io.bio = NULL; 2273 block->next_in_same_bio = NULL; 2274 } 2275 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2276 printk(KERN_INFO 2277 "New written %c-block @%llu (%s/%llu/%d)\n", 2278 is_metadata ? 'M' : 'D', 2279 (unsigned long long)block->logical_bytenr, 2280 block->dev_state->name, 2281 (unsigned long long)block->dev_bytenr, 2282 block->mirror_num); 2283 list_add(&block->all_blocks_node, &state->all_blocks_list); 2284 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2285 2286 if (is_metadata) { 2287 ret = btrfsic_process_metablock(state, block, 2288 &block_ctx, 0, 0); 2289 if (ret) 2290 printk(KERN_INFO 2291 "btrfsic: process_metablock(root @%llu)" 2292 " failed!\n", 2293 (unsigned long long)dev_bytenr); 2294 } 2295 btrfsic_release_block_ctx(&block_ctx); 2296 } 2297 2298 continue_loop: 2299 BUG_ON(!processed_len); 2300 dev_bytenr += processed_len; 2301 mapped_datav += processed_len >> PAGE_CACHE_SHIFT; 2302 num_pages -= processed_len >> PAGE_CACHE_SHIFT; 2303 goto again; 2304 } 2305 2306 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) 2307 { 2308 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; 2309 int iodone_w_error; 2310 2311 /* mutex is not held! This is not save if IO is not yet completed 2312 * on umount */ 2313 iodone_w_error = 0; 2314 if (bio_error_status) 2315 iodone_w_error = 1; 2316 2317 BUG_ON(NULL == block); 2318 bp->bi_private = block->orig_bio_bh_private; 2319 bp->bi_end_io = block->orig_bio_bh_end_io.bio; 2320 2321 do { 2322 struct btrfsic_block *next_block; 2323 struct btrfsic_dev_state *const dev_state = block->dev_state; 2324 2325 if ((dev_state->state->print_mask & 2326 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2327 printk(KERN_INFO 2328 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2329 bio_error_status, 2330 btrfsic_get_block_type(dev_state->state, block), 2331 (unsigned long long)block->logical_bytenr, 2332 dev_state->name, 2333 (unsigned long long)block->dev_bytenr, 2334 block->mirror_num); 2335 next_block = block->next_in_same_bio; 2336 block->iodone_w_error = iodone_w_error; 2337 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2338 dev_state->last_flush_gen++; 2339 if ((dev_state->state->print_mask & 2340 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2341 printk(KERN_INFO 2342 "bio_end_io() new %s flush_gen=%llu\n", 2343 dev_state->name, 2344 (unsigned long long) 2345 dev_state->last_flush_gen); 2346 } 2347 if (block->submit_bio_bh_rw & REQ_FUA) 2348 block->flush_gen = 0; /* FUA completed means block is 2349 * on disk */ 2350 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2351 block = next_block; 2352 } while (NULL != block); 2353 2354 bp->bi_end_io(bp, bio_error_status); 2355 } 2356 2357 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) 2358 { 2359 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private; 2360 int iodone_w_error = !uptodate; 2361 struct btrfsic_dev_state *dev_state; 2362 2363 BUG_ON(NULL == block); 2364 dev_state = block->dev_state; 2365 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2366 printk(KERN_INFO 2367 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", 2368 iodone_w_error, 2369 btrfsic_get_block_type(dev_state->state, block), 2370 (unsigned long long)block->logical_bytenr, 2371 block->dev_state->name, 2372 (unsigned long long)block->dev_bytenr, 2373 block->mirror_num); 2374 2375 block->iodone_w_error = iodone_w_error; 2376 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2377 dev_state->last_flush_gen++; 2378 if ((dev_state->state->print_mask & 2379 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2380 printk(KERN_INFO 2381 "bh_end_io() new %s flush_gen=%llu\n", 2382 dev_state->name, 2383 (unsigned long long)dev_state->last_flush_gen); 2384 } 2385 if (block->submit_bio_bh_rw & REQ_FUA) 2386 block->flush_gen = 0; /* FUA completed means block is on disk */ 2387 2388 bh->b_private = block->orig_bio_bh_private; 2389 bh->b_end_io = block->orig_bio_bh_end_io.bh; 2390 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2391 bh->b_end_io(bh, uptodate); 2392 } 2393 2394 static int btrfsic_process_written_superblock( 2395 struct btrfsic_state *state, 2396 struct btrfsic_block *const superblock, 2397 struct btrfs_super_block *const super_hdr) 2398 { 2399 int pass; 2400 2401 superblock->generation = btrfs_super_generation(super_hdr); 2402 if (!(superblock->generation > state->max_superblock_generation || 2403 0 == state->max_superblock_generation)) { 2404 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2405 printk(KERN_INFO 2406 "btrfsic: superblock @%llu (%s/%llu/%d)" 2407 " with old gen %llu <= %llu\n", 2408 (unsigned long long)superblock->logical_bytenr, 2409 superblock->dev_state->name, 2410 (unsigned long long)superblock->dev_bytenr, 2411 superblock->mirror_num, 2412 (unsigned long long) 2413 btrfs_super_generation(super_hdr), 2414 (unsigned long long) 2415 state->max_superblock_generation); 2416 } else { 2417 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2418 printk(KERN_INFO 2419 "btrfsic: got new superblock @%llu (%s/%llu/%d)" 2420 " with new gen %llu > %llu\n", 2421 (unsigned long long)superblock->logical_bytenr, 2422 superblock->dev_state->name, 2423 (unsigned long long)superblock->dev_bytenr, 2424 superblock->mirror_num, 2425 (unsigned long long) 2426 btrfs_super_generation(super_hdr), 2427 (unsigned long long) 2428 state->max_superblock_generation); 2429 2430 state->max_superblock_generation = 2431 btrfs_super_generation(super_hdr); 2432 state->latest_superblock = superblock; 2433 } 2434 2435 for (pass = 0; pass < 3; pass++) { 2436 int ret; 2437 u64 next_bytenr; 2438 struct btrfsic_block *next_block; 2439 struct btrfsic_block_data_ctx tmp_next_block_ctx; 2440 struct btrfsic_block_link *l; 2441 int num_copies; 2442 int mirror_num; 2443 const char *additional_string = NULL; 2444 struct btrfs_disk_key tmp_disk_key = {0}; 2445 2446 btrfs_set_disk_key_objectid(&tmp_disk_key, 2447 BTRFS_ROOT_ITEM_KEY); 2448 btrfs_set_disk_key_objectid(&tmp_disk_key, 0); 2449 2450 switch (pass) { 2451 case 0: 2452 btrfs_set_disk_key_objectid(&tmp_disk_key, 2453 BTRFS_ROOT_TREE_OBJECTID); 2454 additional_string = "root "; 2455 next_bytenr = btrfs_super_root(super_hdr); 2456 if (state->print_mask & 2457 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2458 printk(KERN_INFO "root@%llu\n", 2459 (unsigned long long)next_bytenr); 2460 break; 2461 case 1: 2462 btrfs_set_disk_key_objectid(&tmp_disk_key, 2463 BTRFS_CHUNK_TREE_OBJECTID); 2464 additional_string = "chunk "; 2465 next_bytenr = btrfs_super_chunk_root(super_hdr); 2466 if (state->print_mask & 2467 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2468 printk(KERN_INFO "chunk@%llu\n", 2469 (unsigned long long)next_bytenr); 2470 break; 2471 case 2: 2472 btrfs_set_disk_key_objectid(&tmp_disk_key, 2473 BTRFS_TREE_LOG_OBJECTID); 2474 additional_string = "log "; 2475 next_bytenr = btrfs_super_log_root(super_hdr); 2476 if (0 == next_bytenr) 2477 continue; 2478 if (state->print_mask & 2479 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2480 printk(KERN_INFO "log@%llu\n", 2481 (unsigned long long)next_bytenr); 2482 break; 2483 } 2484 2485 num_copies = 2486 btrfs_num_copies(state->root->fs_info, 2487 next_bytenr, BTRFS_SUPER_INFO_SIZE); 2488 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2489 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2490 (unsigned long long)next_bytenr, num_copies); 2491 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2492 int was_created; 2493 2494 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2495 printk(KERN_INFO 2496 "btrfsic_process_written_superblock(" 2497 "mirror_num=%d)\n", mirror_num); 2498 ret = btrfsic_map_block(state, next_bytenr, 2499 BTRFS_SUPER_INFO_SIZE, 2500 &tmp_next_block_ctx, 2501 mirror_num); 2502 if (ret) { 2503 printk(KERN_INFO 2504 "btrfsic: btrfsic_map_block(@%llu," 2505 " mirror=%d) failed!\n", 2506 (unsigned long long)next_bytenr, 2507 mirror_num); 2508 return -1; 2509 } 2510 2511 next_block = btrfsic_block_lookup_or_add( 2512 state, 2513 &tmp_next_block_ctx, 2514 additional_string, 2515 1, 0, 1, 2516 mirror_num, 2517 &was_created); 2518 if (NULL == next_block) { 2519 printk(KERN_INFO 2520 "btrfsic: error, kmalloc failed!\n"); 2521 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2522 return -1; 2523 } 2524 2525 next_block->disk_key = tmp_disk_key; 2526 if (was_created) 2527 next_block->generation = 2528 BTRFSIC_GENERATION_UNKNOWN; 2529 l = btrfsic_block_link_lookup_or_add( 2530 state, 2531 &tmp_next_block_ctx, 2532 next_block, 2533 superblock, 2534 BTRFSIC_GENERATION_UNKNOWN); 2535 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2536 if (NULL == l) 2537 return -1; 2538 } 2539 } 2540 2541 if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { 2542 WARN_ON(1); 2543 btrfsic_dump_tree(state); 2544 } 2545 2546 return 0; 2547 } 2548 2549 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 2550 struct btrfsic_block *const block, 2551 int recursion_level) 2552 { 2553 struct list_head *elem_ref_to; 2554 int ret = 0; 2555 2556 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2557 /* 2558 * Note that this situation can happen and does not 2559 * indicate an error in regular cases. It happens 2560 * when disk blocks are freed and later reused. 2561 * The check-integrity module is not aware of any 2562 * block free operations, it just recognizes block 2563 * write operations. Therefore it keeps the linkage 2564 * information for a block until a block is 2565 * rewritten. This can temporarily cause incorrect 2566 * and even circular linkage informations. This 2567 * causes no harm unless such blocks are referenced 2568 * by the most recent super block. 2569 */ 2570 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2571 printk(KERN_INFO 2572 "btrfsic: abort cyclic linkage (case 1).\n"); 2573 2574 return ret; 2575 } 2576 2577 /* 2578 * This algorithm is recursive because the amount of used stack 2579 * space is very small and the max recursion depth is limited. 2580 */ 2581 list_for_each(elem_ref_to, &block->ref_to_list) { 2582 const struct btrfsic_block_link *const l = 2583 list_entry(elem_ref_to, struct btrfsic_block_link, 2584 node_ref_to); 2585 2586 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2587 printk(KERN_INFO 2588 "rl=%d, %c @%llu (%s/%llu/%d)" 2589 " %u* refers to %c @%llu (%s/%llu/%d)\n", 2590 recursion_level, 2591 btrfsic_get_block_type(state, block), 2592 (unsigned long long)block->logical_bytenr, 2593 block->dev_state->name, 2594 (unsigned long long)block->dev_bytenr, 2595 block->mirror_num, 2596 l->ref_cnt, 2597 btrfsic_get_block_type(state, l->block_ref_to), 2598 (unsigned long long) 2599 l->block_ref_to->logical_bytenr, 2600 l->block_ref_to->dev_state->name, 2601 (unsigned long long)l->block_ref_to->dev_bytenr, 2602 l->block_ref_to->mirror_num); 2603 if (l->block_ref_to->never_written) { 2604 printk(KERN_INFO "btrfs: attempt to write superblock" 2605 " which references block %c @%llu (%s/%llu/%d)" 2606 " which is never written!\n", 2607 btrfsic_get_block_type(state, l->block_ref_to), 2608 (unsigned long long) 2609 l->block_ref_to->logical_bytenr, 2610 l->block_ref_to->dev_state->name, 2611 (unsigned long long)l->block_ref_to->dev_bytenr, 2612 l->block_ref_to->mirror_num); 2613 ret = -1; 2614 } else if (!l->block_ref_to->is_iodone) { 2615 printk(KERN_INFO "btrfs: attempt to write superblock" 2616 " which references block %c @%llu (%s/%llu/%d)" 2617 " which is not yet iodone!\n", 2618 btrfsic_get_block_type(state, l->block_ref_to), 2619 (unsigned long long) 2620 l->block_ref_to->logical_bytenr, 2621 l->block_ref_to->dev_state->name, 2622 (unsigned long long)l->block_ref_to->dev_bytenr, 2623 l->block_ref_to->mirror_num); 2624 ret = -1; 2625 } else if (l->block_ref_to->iodone_w_error) { 2626 printk(KERN_INFO "btrfs: attempt to write superblock" 2627 " which references block %c @%llu (%s/%llu/%d)" 2628 " which has write error!\n", 2629 btrfsic_get_block_type(state, l->block_ref_to), 2630 (unsigned long long) 2631 l->block_ref_to->logical_bytenr, 2632 l->block_ref_to->dev_state->name, 2633 (unsigned long long)l->block_ref_to->dev_bytenr, 2634 l->block_ref_to->mirror_num); 2635 ret = -1; 2636 } else if (l->parent_generation != 2637 l->block_ref_to->generation && 2638 BTRFSIC_GENERATION_UNKNOWN != 2639 l->parent_generation && 2640 BTRFSIC_GENERATION_UNKNOWN != 2641 l->block_ref_to->generation) { 2642 printk(KERN_INFO "btrfs: attempt to write superblock" 2643 " which references block %c @%llu (%s/%llu/%d)" 2644 " with generation %llu !=" 2645 " parent generation %llu!\n", 2646 btrfsic_get_block_type(state, l->block_ref_to), 2647 (unsigned long long) 2648 l->block_ref_to->logical_bytenr, 2649 l->block_ref_to->dev_state->name, 2650 (unsigned long long)l->block_ref_to->dev_bytenr, 2651 l->block_ref_to->mirror_num, 2652 (unsigned long long)l->block_ref_to->generation, 2653 (unsigned long long)l->parent_generation); 2654 ret = -1; 2655 } else if (l->block_ref_to->flush_gen > 2656 l->block_ref_to->dev_state->last_flush_gen) { 2657 printk(KERN_INFO "btrfs: attempt to write superblock" 2658 " which references block %c @%llu (%s/%llu/%d)" 2659 " which is not flushed out of disk's write cache" 2660 " (block flush_gen=%llu," 2661 " dev->flush_gen=%llu)!\n", 2662 btrfsic_get_block_type(state, l->block_ref_to), 2663 (unsigned long long) 2664 l->block_ref_to->logical_bytenr, 2665 l->block_ref_to->dev_state->name, 2666 (unsigned long long)l->block_ref_to->dev_bytenr, 2667 l->block_ref_to->mirror_num, 2668 (unsigned long long)block->flush_gen, 2669 (unsigned long long) 2670 l->block_ref_to->dev_state->last_flush_gen); 2671 ret = -1; 2672 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2673 l->block_ref_to, 2674 recursion_level + 2675 1)) { 2676 ret = -1; 2677 } 2678 } 2679 2680 return ret; 2681 } 2682 2683 static int btrfsic_is_block_ref_by_superblock( 2684 const struct btrfsic_state *state, 2685 const struct btrfsic_block *block, 2686 int recursion_level) 2687 { 2688 struct list_head *elem_ref_from; 2689 2690 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2691 /* refer to comment at "abort cyclic linkage (case 1)" */ 2692 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2693 printk(KERN_INFO 2694 "btrfsic: abort cyclic linkage (case 2).\n"); 2695 2696 return 0; 2697 } 2698 2699 /* 2700 * This algorithm is recursive because the amount of used stack space 2701 * is very small and the max recursion depth is limited. 2702 */ 2703 list_for_each(elem_ref_from, &block->ref_from_list) { 2704 const struct btrfsic_block_link *const l = 2705 list_entry(elem_ref_from, struct btrfsic_block_link, 2706 node_ref_from); 2707 2708 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2709 printk(KERN_INFO 2710 "rl=%d, %c @%llu (%s/%llu/%d)" 2711 " is ref %u* from %c @%llu (%s/%llu/%d)\n", 2712 recursion_level, 2713 btrfsic_get_block_type(state, block), 2714 (unsigned long long)block->logical_bytenr, 2715 block->dev_state->name, 2716 (unsigned long long)block->dev_bytenr, 2717 block->mirror_num, 2718 l->ref_cnt, 2719 btrfsic_get_block_type(state, l->block_ref_from), 2720 (unsigned long long) 2721 l->block_ref_from->logical_bytenr, 2722 l->block_ref_from->dev_state->name, 2723 (unsigned long long) 2724 l->block_ref_from->dev_bytenr, 2725 l->block_ref_from->mirror_num); 2726 if (l->block_ref_from->is_superblock && 2727 state->latest_superblock->dev_bytenr == 2728 l->block_ref_from->dev_bytenr && 2729 state->latest_superblock->dev_state->bdev == 2730 l->block_ref_from->dev_state->bdev) 2731 return 1; 2732 else if (btrfsic_is_block_ref_by_superblock(state, 2733 l->block_ref_from, 2734 recursion_level + 2735 1)) 2736 return 1; 2737 } 2738 2739 return 0; 2740 } 2741 2742 static void btrfsic_print_add_link(const struct btrfsic_state *state, 2743 const struct btrfsic_block_link *l) 2744 { 2745 printk(KERN_INFO 2746 "Add %u* link from %c @%llu (%s/%llu/%d)" 2747 " to %c @%llu (%s/%llu/%d).\n", 2748 l->ref_cnt, 2749 btrfsic_get_block_type(state, l->block_ref_from), 2750 (unsigned long long)l->block_ref_from->logical_bytenr, 2751 l->block_ref_from->dev_state->name, 2752 (unsigned long long)l->block_ref_from->dev_bytenr, 2753 l->block_ref_from->mirror_num, 2754 btrfsic_get_block_type(state, l->block_ref_to), 2755 (unsigned long long)l->block_ref_to->logical_bytenr, 2756 l->block_ref_to->dev_state->name, 2757 (unsigned long long)l->block_ref_to->dev_bytenr, 2758 l->block_ref_to->mirror_num); 2759 } 2760 2761 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 2762 const struct btrfsic_block_link *l) 2763 { 2764 printk(KERN_INFO 2765 "Rem %u* link from %c @%llu (%s/%llu/%d)" 2766 " to %c @%llu (%s/%llu/%d).\n", 2767 l->ref_cnt, 2768 btrfsic_get_block_type(state, l->block_ref_from), 2769 (unsigned long long)l->block_ref_from->logical_bytenr, 2770 l->block_ref_from->dev_state->name, 2771 (unsigned long long)l->block_ref_from->dev_bytenr, 2772 l->block_ref_from->mirror_num, 2773 btrfsic_get_block_type(state, l->block_ref_to), 2774 (unsigned long long)l->block_ref_to->logical_bytenr, 2775 l->block_ref_to->dev_state->name, 2776 (unsigned long long)l->block_ref_to->dev_bytenr, 2777 l->block_ref_to->mirror_num); 2778 } 2779 2780 static char btrfsic_get_block_type(const struct btrfsic_state *state, 2781 const struct btrfsic_block *block) 2782 { 2783 if (block->is_superblock && 2784 state->latest_superblock->dev_bytenr == block->dev_bytenr && 2785 state->latest_superblock->dev_state->bdev == block->dev_state->bdev) 2786 return 'S'; 2787 else if (block->is_superblock) 2788 return 's'; 2789 else if (block->is_metadata) 2790 return 'M'; 2791 else 2792 return 'D'; 2793 } 2794 2795 static void btrfsic_dump_tree(const struct btrfsic_state *state) 2796 { 2797 btrfsic_dump_tree_sub(state, state->latest_superblock, 0); 2798 } 2799 2800 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 2801 const struct btrfsic_block *block, 2802 int indent_level) 2803 { 2804 struct list_head *elem_ref_to; 2805 int indent_add; 2806 static char buf[80]; 2807 int cursor_position; 2808 2809 /* 2810 * Should better fill an on-stack buffer with a complete line and 2811 * dump it at once when it is time to print a newline character. 2812 */ 2813 2814 /* 2815 * This algorithm is recursive because the amount of used stack space 2816 * is very small and the max recursion depth is limited. 2817 */ 2818 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", 2819 btrfsic_get_block_type(state, block), 2820 (unsigned long long)block->logical_bytenr, 2821 block->dev_state->name, 2822 (unsigned long long)block->dev_bytenr, 2823 block->mirror_num); 2824 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2825 printk("[...]\n"); 2826 return; 2827 } 2828 printk(buf); 2829 indent_level += indent_add; 2830 if (list_empty(&block->ref_to_list)) { 2831 printk("\n"); 2832 return; 2833 } 2834 if (block->mirror_num > 1 && 2835 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { 2836 printk(" [...]\n"); 2837 return; 2838 } 2839 2840 cursor_position = indent_level; 2841 list_for_each(elem_ref_to, &block->ref_to_list) { 2842 const struct btrfsic_block_link *const l = 2843 list_entry(elem_ref_to, struct btrfsic_block_link, 2844 node_ref_to); 2845 2846 while (cursor_position < indent_level) { 2847 printk(" "); 2848 cursor_position++; 2849 } 2850 if (l->ref_cnt > 1) 2851 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); 2852 else 2853 indent_add = sprintf(buf, " --> "); 2854 if (indent_level + indent_add > 2855 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2856 printk("[...]\n"); 2857 cursor_position = 0; 2858 continue; 2859 } 2860 2861 printk(buf); 2862 2863 btrfsic_dump_tree_sub(state, l->block_ref_to, 2864 indent_level + indent_add); 2865 cursor_position = 0; 2866 } 2867 } 2868 2869 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 2870 struct btrfsic_state *state, 2871 struct btrfsic_block_data_ctx *next_block_ctx, 2872 struct btrfsic_block *next_block, 2873 struct btrfsic_block *from_block, 2874 u64 parent_generation) 2875 { 2876 struct btrfsic_block_link *l; 2877 2878 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, 2879 next_block_ctx->dev_bytenr, 2880 from_block->dev_state->bdev, 2881 from_block->dev_bytenr, 2882 &state->block_link_hashtable); 2883 if (NULL == l) { 2884 l = btrfsic_block_link_alloc(); 2885 if (NULL == l) { 2886 printk(KERN_INFO 2887 "btrfsic: error, kmalloc" " failed!\n"); 2888 return NULL; 2889 } 2890 2891 l->block_ref_to = next_block; 2892 l->block_ref_from = from_block; 2893 l->ref_cnt = 1; 2894 l->parent_generation = parent_generation; 2895 2896 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2897 btrfsic_print_add_link(state, l); 2898 2899 list_add(&l->node_ref_to, &from_block->ref_to_list); 2900 list_add(&l->node_ref_from, &next_block->ref_from_list); 2901 2902 btrfsic_block_link_hashtable_add(l, 2903 &state->block_link_hashtable); 2904 } else { 2905 l->ref_cnt++; 2906 l->parent_generation = parent_generation; 2907 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2908 btrfsic_print_add_link(state, l); 2909 } 2910 2911 return l; 2912 } 2913 2914 static struct btrfsic_block *btrfsic_block_lookup_or_add( 2915 struct btrfsic_state *state, 2916 struct btrfsic_block_data_ctx *block_ctx, 2917 const char *additional_string, 2918 int is_metadata, 2919 int is_iodone, 2920 int never_written, 2921 int mirror_num, 2922 int *was_created) 2923 { 2924 struct btrfsic_block *block; 2925 2926 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, 2927 block_ctx->dev_bytenr, 2928 &state->block_hashtable); 2929 if (NULL == block) { 2930 struct btrfsic_dev_state *dev_state; 2931 2932 block = btrfsic_block_alloc(); 2933 if (NULL == block) { 2934 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2935 return NULL; 2936 } 2937 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev); 2938 if (NULL == dev_state) { 2939 printk(KERN_INFO 2940 "btrfsic: error, lookup dev_state failed!\n"); 2941 btrfsic_block_free(block); 2942 return NULL; 2943 } 2944 block->dev_state = dev_state; 2945 block->dev_bytenr = block_ctx->dev_bytenr; 2946 block->logical_bytenr = block_ctx->start; 2947 block->is_metadata = is_metadata; 2948 block->is_iodone = is_iodone; 2949 block->never_written = never_written; 2950 block->mirror_num = mirror_num; 2951 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2952 printk(KERN_INFO 2953 "New %s%c-block @%llu (%s/%llu/%d)\n", 2954 additional_string, 2955 btrfsic_get_block_type(state, block), 2956 (unsigned long long)block->logical_bytenr, 2957 dev_state->name, 2958 (unsigned long long)block->dev_bytenr, 2959 mirror_num); 2960 list_add(&block->all_blocks_node, &state->all_blocks_list); 2961 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2962 if (NULL != was_created) 2963 *was_created = 1; 2964 } else { 2965 if (NULL != was_created) 2966 *was_created = 0; 2967 } 2968 2969 return block; 2970 } 2971 2972 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2973 u64 bytenr, 2974 struct btrfsic_dev_state *dev_state, 2975 u64 dev_bytenr) 2976 { 2977 int num_copies; 2978 int mirror_num; 2979 int ret; 2980 struct btrfsic_block_data_ctx block_ctx; 2981 int match = 0; 2982 2983 num_copies = btrfs_num_copies(state->root->fs_info, 2984 bytenr, state->metablock_size); 2985 2986 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2987 ret = btrfsic_map_block(state, bytenr, state->metablock_size, 2988 &block_ctx, mirror_num); 2989 if (ret) { 2990 printk(KERN_INFO "btrfsic:" 2991 " btrfsic_map_block(logical @%llu," 2992 " mirror %d) failed!\n", 2993 (unsigned long long)bytenr, mirror_num); 2994 continue; 2995 } 2996 2997 if (dev_state->bdev == block_ctx.dev->bdev && 2998 dev_bytenr == block_ctx.dev_bytenr) { 2999 match++; 3000 btrfsic_release_block_ctx(&block_ctx); 3001 break; 3002 } 3003 btrfsic_release_block_ctx(&block_ctx); 3004 } 3005 3006 if (!match) { 3007 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," 3008 " buffer->log_bytenr=%llu, submit_bio(bdev=%s," 3009 " phys_bytenr=%llu)!\n", 3010 (unsigned long long)bytenr, dev_state->name, 3011 (unsigned long long)dev_bytenr); 3012 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 3013 ret = btrfsic_map_block(state, bytenr, 3014 state->metablock_size, 3015 &block_ctx, mirror_num); 3016 if (ret) 3017 continue; 3018 3019 printk(KERN_INFO "Read logical bytenr @%llu maps to" 3020 " (%s/%llu/%d)\n", 3021 (unsigned long long)bytenr, 3022 block_ctx.dev->name, 3023 (unsigned long long)block_ctx.dev_bytenr, 3024 mirror_num); 3025 } 3026 WARN_ON(1); 3027 } 3028 } 3029 3030 static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 3031 struct block_device *bdev) 3032 { 3033 struct btrfsic_dev_state *ds; 3034 3035 ds = btrfsic_dev_state_hashtable_lookup(bdev, 3036 &btrfsic_dev_state_hashtable); 3037 return ds; 3038 } 3039 3040 int btrfsic_submit_bh(int rw, struct buffer_head *bh) 3041 { 3042 struct btrfsic_dev_state *dev_state; 3043 3044 if (!btrfsic_is_initialized) 3045 return submit_bh(rw, bh); 3046 3047 mutex_lock(&btrfsic_mutex); 3048 /* since btrfsic_submit_bh() might also be called before 3049 * btrfsic_mount(), this might return NULL */ 3050 dev_state = btrfsic_dev_state_lookup(bh->b_bdev); 3051 3052 /* Only called to write the superblock (incl. FLUSH/FUA) */ 3053 if (NULL != dev_state && 3054 (rw & WRITE) && bh->b_size > 0) { 3055 u64 dev_bytenr; 3056 3057 dev_bytenr = 4096 * bh->b_blocknr; 3058 if (dev_state->state->print_mask & 3059 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3060 printk(KERN_INFO 3061 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," 3062 " size=%lu, data=%p, bdev=%p)\n", 3063 rw, (unsigned long)bh->b_blocknr, 3064 (unsigned long long)dev_bytenr, 3065 (unsigned long)bh->b_size, bh->b_data, 3066 bh->b_bdev); 3067 btrfsic_process_written_block(dev_state, dev_bytenr, 3068 &bh->b_data, 1, NULL, 3069 NULL, bh, rw); 3070 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3071 if (dev_state->state->print_mask & 3072 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3073 printk(KERN_INFO 3074 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", 3075 rw, bh->b_bdev); 3076 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3077 if ((dev_state->state->print_mask & 3078 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3079 BTRFSIC_PRINT_MASK_VERBOSE))) 3080 printk(KERN_INFO 3081 "btrfsic_submit_bh(%s) with FLUSH" 3082 " but dummy block already in use" 3083 " (ignored)!\n", 3084 dev_state->name); 3085 } else { 3086 struct btrfsic_block *const block = 3087 &dev_state->dummy_block_for_bio_bh_flush; 3088 3089 block->is_iodone = 0; 3090 block->never_written = 0; 3091 block->iodone_w_error = 0; 3092 block->flush_gen = dev_state->last_flush_gen + 1; 3093 block->submit_bio_bh_rw = rw; 3094 block->orig_bio_bh_private = bh->b_private; 3095 block->orig_bio_bh_end_io.bh = bh->b_end_io; 3096 block->next_in_same_bio = NULL; 3097 bh->b_private = block; 3098 bh->b_end_io = btrfsic_bh_end_io; 3099 } 3100 } 3101 mutex_unlock(&btrfsic_mutex); 3102 return submit_bh(rw, bh); 3103 } 3104 3105 void btrfsic_submit_bio(int rw, struct bio *bio) 3106 { 3107 struct btrfsic_dev_state *dev_state; 3108 3109 if (!btrfsic_is_initialized) { 3110 submit_bio(rw, bio); 3111 return; 3112 } 3113 3114 mutex_lock(&btrfsic_mutex); 3115 /* since btrfsic_submit_bio() is also called before 3116 * btrfsic_mount(), this might return NULL */ 3117 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); 3118 if (NULL != dev_state && 3119 (rw & WRITE) && NULL != bio->bi_io_vec) { 3120 unsigned int i; 3121 u64 dev_bytenr; 3122 int bio_is_patched; 3123 char **mapped_datav; 3124 3125 dev_bytenr = 512 * bio->bi_sector; 3126 bio_is_patched = 0; 3127 if (dev_state->state->print_mask & 3128 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3129 printk(KERN_INFO 3130 "submit_bio(rw=0x%x, bi_vcnt=%u," 3131 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", 3132 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, 3133 (unsigned long long)dev_bytenr, 3134 bio->bi_bdev); 3135 3136 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3137 GFP_NOFS); 3138 if (!mapped_datav) 3139 goto leave; 3140 for (i = 0; i < bio->bi_vcnt; i++) { 3141 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); 3142 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); 3143 if (!mapped_datav[i]) { 3144 while (i > 0) { 3145 i--; 3146 kunmap(bio->bi_io_vec[i].bv_page); 3147 } 3148 kfree(mapped_datav); 3149 goto leave; 3150 } 3151 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3152 BTRFSIC_PRINT_MASK_VERBOSE) == 3153 (dev_state->state->print_mask & 3154 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3155 BTRFSIC_PRINT_MASK_VERBOSE))) 3156 printk(KERN_INFO 3157 "#%u: page=%p, len=%u, offset=%u\n", 3158 i, bio->bi_io_vec[i].bv_page, 3159 bio->bi_io_vec[i].bv_len, 3160 bio->bi_io_vec[i].bv_offset); 3161 } 3162 btrfsic_process_written_block(dev_state, dev_bytenr, 3163 mapped_datav, bio->bi_vcnt, 3164 bio, &bio_is_patched, 3165 NULL, rw); 3166 while (i > 0) { 3167 i--; 3168 kunmap(bio->bi_io_vec[i].bv_page); 3169 } 3170 kfree(mapped_datav); 3171 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3172 if (dev_state->state->print_mask & 3173 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3174 printk(KERN_INFO 3175 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", 3176 rw, bio->bi_bdev); 3177 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3178 if ((dev_state->state->print_mask & 3179 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3180 BTRFSIC_PRINT_MASK_VERBOSE))) 3181 printk(KERN_INFO 3182 "btrfsic_submit_bio(%s) with FLUSH" 3183 " but dummy block already in use" 3184 " (ignored)!\n", 3185 dev_state->name); 3186 } else { 3187 struct btrfsic_block *const block = 3188 &dev_state->dummy_block_for_bio_bh_flush; 3189 3190 block->is_iodone = 0; 3191 block->never_written = 0; 3192 block->iodone_w_error = 0; 3193 block->flush_gen = dev_state->last_flush_gen + 1; 3194 block->submit_bio_bh_rw = rw; 3195 block->orig_bio_bh_private = bio->bi_private; 3196 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 3197 block->next_in_same_bio = NULL; 3198 bio->bi_private = block; 3199 bio->bi_end_io = btrfsic_bio_end_io; 3200 } 3201 } 3202 leave: 3203 mutex_unlock(&btrfsic_mutex); 3204 3205 submit_bio(rw, bio); 3206 } 3207 3208 int btrfsic_mount(struct btrfs_root *root, 3209 struct btrfs_fs_devices *fs_devices, 3210 int including_extent_data, u32 print_mask) 3211 { 3212 int ret; 3213 struct btrfsic_state *state; 3214 struct list_head *dev_head = &fs_devices->devices; 3215 struct btrfs_device *device; 3216 3217 if (root->nodesize != root->leafsize) { 3218 printk(KERN_INFO 3219 "btrfsic: cannot handle nodesize %d != leafsize %d!\n", 3220 root->nodesize, root->leafsize); 3221 return -1; 3222 } 3223 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { 3224 printk(KERN_INFO 3225 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3226 root->nodesize, (unsigned long)PAGE_CACHE_SIZE); 3227 return -1; 3228 } 3229 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3230 printk(KERN_INFO 3231 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3232 root->leafsize, (unsigned long)PAGE_CACHE_SIZE); 3233 return -1; 3234 } 3235 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3236 printk(KERN_INFO 3237 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3238 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); 3239 return -1; 3240 } 3241 state = kzalloc(sizeof(*state), GFP_NOFS); 3242 if (NULL == state) { 3243 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); 3244 return -1; 3245 } 3246 3247 if (!btrfsic_is_initialized) { 3248 mutex_init(&btrfsic_mutex); 3249 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); 3250 btrfsic_is_initialized = 1; 3251 } 3252 mutex_lock(&btrfsic_mutex); 3253 state->root = root; 3254 state->print_mask = print_mask; 3255 state->include_extent_data = including_extent_data; 3256 state->csum_size = 0; 3257 state->metablock_size = root->nodesize; 3258 state->datablock_size = root->sectorsize; 3259 INIT_LIST_HEAD(&state->all_blocks_list); 3260 btrfsic_block_hashtable_init(&state->block_hashtable); 3261 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 3262 state->max_superblock_generation = 0; 3263 state->latest_superblock = NULL; 3264 3265 list_for_each_entry(device, dev_head, dev_list) { 3266 struct btrfsic_dev_state *ds; 3267 char *p; 3268 3269 if (!device->bdev || !device->name) 3270 continue; 3271 3272 ds = btrfsic_dev_state_alloc(); 3273 if (NULL == ds) { 3274 printk(KERN_INFO 3275 "btrfs check-integrity: kmalloc() failed!\n"); 3276 mutex_unlock(&btrfsic_mutex); 3277 return -1; 3278 } 3279 ds->bdev = device->bdev; 3280 ds->state = state; 3281 bdevname(ds->bdev, ds->name); 3282 ds->name[BDEVNAME_SIZE - 1] = '\0'; 3283 for (p = ds->name; *p != '\0'; p++); 3284 while (p > ds->name && *p != '/') 3285 p--; 3286 if (*p == '/') 3287 p++; 3288 strlcpy(ds->name, p, sizeof(ds->name)); 3289 btrfsic_dev_state_hashtable_add(ds, 3290 &btrfsic_dev_state_hashtable); 3291 } 3292 3293 ret = btrfsic_process_superblock(state, fs_devices); 3294 if (0 != ret) { 3295 mutex_unlock(&btrfsic_mutex); 3296 btrfsic_unmount(root, fs_devices); 3297 return ret; 3298 } 3299 3300 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) 3301 btrfsic_dump_database(state); 3302 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) 3303 btrfsic_dump_tree(state); 3304 3305 mutex_unlock(&btrfsic_mutex); 3306 return 0; 3307 } 3308 3309 void btrfsic_unmount(struct btrfs_root *root, 3310 struct btrfs_fs_devices *fs_devices) 3311 { 3312 struct list_head *elem_all; 3313 struct list_head *tmp_all; 3314 struct btrfsic_state *state; 3315 struct list_head *dev_head = &fs_devices->devices; 3316 struct btrfs_device *device; 3317 3318 if (!btrfsic_is_initialized) 3319 return; 3320 3321 mutex_lock(&btrfsic_mutex); 3322 3323 state = NULL; 3324 list_for_each_entry(device, dev_head, dev_list) { 3325 struct btrfsic_dev_state *ds; 3326 3327 if (!device->bdev || !device->name) 3328 continue; 3329 3330 ds = btrfsic_dev_state_hashtable_lookup( 3331 device->bdev, 3332 &btrfsic_dev_state_hashtable); 3333 if (NULL != ds) { 3334 state = ds->state; 3335 btrfsic_dev_state_hashtable_remove(ds); 3336 btrfsic_dev_state_free(ds); 3337 } 3338 } 3339 3340 if (NULL == state) { 3341 printk(KERN_INFO 3342 "btrfsic: error, cannot find state information" 3343 " on umount!\n"); 3344 mutex_unlock(&btrfsic_mutex); 3345 return; 3346 } 3347 3348 /* 3349 * Don't care about keeping the lists' state up to date, 3350 * just free all memory that was allocated dynamically. 3351 * Free the blocks and the block_links. 3352 */ 3353 list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) { 3354 struct btrfsic_block *const b_all = 3355 list_entry(elem_all, struct btrfsic_block, 3356 all_blocks_node); 3357 struct list_head *elem_ref_to; 3358 struct list_head *tmp_ref_to; 3359 3360 list_for_each_safe(elem_ref_to, tmp_ref_to, 3361 &b_all->ref_to_list) { 3362 struct btrfsic_block_link *const l = 3363 list_entry(elem_ref_to, 3364 struct btrfsic_block_link, 3365 node_ref_to); 3366 3367 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 3368 btrfsic_print_rem_link(state, l); 3369 3370 l->ref_cnt--; 3371 if (0 == l->ref_cnt) 3372 btrfsic_block_link_free(l); 3373 } 3374 3375 if (b_all->is_iodone || b_all->never_written) 3376 btrfsic_block_free(b_all); 3377 else 3378 printk(KERN_INFO "btrfs: attempt to free %c-block" 3379 " @%llu (%s/%llu/%d) on umount which is" 3380 " not yet iodone!\n", 3381 btrfsic_get_block_type(state, b_all), 3382 (unsigned long long)b_all->logical_bytenr, 3383 b_all->dev_state->name, 3384 (unsigned long long)b_all->dev_bytenr, 3385 b_all->mirror_num); 3386 } 3387 3388 mutex_unlock(&btrfsic_mutex); 3389 3390 kfree(state); 3391 } 3392