1 /* 2 * Copyright (C) STRATO AG 2011. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 /* 20 * This module can be used to catch cases when the btrfs kernel 21 * code executes write requests to the disk that bring the file 22 * system in an inconsistent state. In such a state, a power-loss 23 * or kernel panic event would cause that the data on disk is 24 * lost or at least damaged. 25 * 26 * Code is added that examines all block write requests during 27 * runtime (including writes of the super block). Three rules 28 * are verified and an error is printed on violation of the 29 * rules: 30 * 1. It is not allowed to write a disk block which is 31 * currently referenced by the super block (either directly 32 * or indirectly). 33 * 2. When a super block is written, it is verified that all 34 * referenced (directly or indirectly) blocks fulfill the 35 * following requirements: 36 * 2a. All referenced blocks have either been present when 37 * the file system was mounted, (i.e., they have been 38 * referenced by the super block) or they have been 39 * written since then and the write completion callback 40 * was called and no write error was indicated and a 41 * FLUSH request to the device where these blocks are 42 * located was received and completed. 43 * 2b. All referenced blocks need to have a generation 44 * number which is equal to the parent's number. 45 * 46 * One issue that was found using this module was that the log 47 * tree on disk became temporarily corrupted because disk blocks 48 * that had been in use for the log tree had been freed and 49 * reused too early, while being referenced by the written super 50 * block. 51 * 52 * The search term in the kernel log that can be used to filter 53 * on the existence of detected integrity issues is 54 * "btrfs: attempt". 55 * 56 * The integrity check is enabled via mount options. These 57 * mount options are only supported if the integrity check 58 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. 59 * 60 * Example #1, apply integrity checks to all metadata: 61 * mount /dev/sdb1 /mnt -o check_int 62 * 63 * Example #2, apply integrity checks to all metadata and 64 * to data extents: 65 * mount /dev/sdb1 /mnt -o check_int_data 66 * 67 * Example #3, apply integrity checks to all metadata and dump 68 * the tree that the super block references to kernel messages 69 * each time after a super block was written: 70 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 71 * 72 * If the integrity check tool is included and activated in 73 * the mount options, plenty of kernel memory is used, and 74 * plenty of additional CPU cycles are spent. Enabling this 75 * functionality is not intended for normal use. In most 76 * cases, unless you are a btrfs developer who needs to verify 77 * the integrity of (super)-block write requests, do not 78 * enable the config option BTRFS_FS_CHECK_INTEGRITY to 79 * include and compile the integrity check tool. 80 */ 81 82 #include <linux/sched.h> 83 #include <linux/slab.h> 84 #include <linux/buffer_head.h> 85 #include <linux/mutex.h> 86 #include <linux/crc32c.h> 87 #include <linux/genhd.h> 88 #include <linux/blkdev.h> 89 #include "ctree.h" 90 #include "disk-io.h" 91 #include "transaction.h" 92 #include "extent_io.h" 93 #include "volumes.h" 94 #include "print-tree.h" 95 #include "locking.h" 96 #include "check-integrity.h" 97 #include "rcu-string.h" 98 99 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 100 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 101 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 102 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 103 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 104 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 105 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 106 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 107 * excluding " [...]" */ 108 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 109 110 /* 111 * The definition of the bitmask fields for the print_mask. 112 * They are specified with the mount option check_integrity_print_mask. 113 */ 114 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 115 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 116 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 117 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 118 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 119 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 120 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 121 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 122 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 123 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 124 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 125 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 126 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 127 128 struct btrfsic_dev_state; 129 struct btrfsic_state; 130 131 struct btrfsic_block { 132 u32 magic_num; /* only used for debug purposes */ 133 unsigned int is_metadata:1; /* if it is meta-data, not data-data */ 134 unsigned int is_superblock:1; /* if it is one of the superblocks */ 135 unsigned int is_iodone:1; /* if is done by lower subsystem */ 136 unsigned int iodone_w_error:1; /* error was indicated to endio */ 137 unsigned int never_written:1; /* block was added because it was 138 * referenced, not because it was 139 * written */ 140 unsigned int mirror_num; /* large enough to hold 141 * BTRFS_SUPER_MIRROR_MAX */ 142 struct btrfsic_dev_state *dev_state; 143 u64 dev_bytenr; /* key, physical byte num on disk */ 144 u64 logical_bytenr; /* logical byte num on disk */ 145 u64 generation; 146 struct btrfs_disk_key disk_key; /* extra info to print in case of 147 * issues, will not always be correct */ 148 struct list_head collision_resolving_node; /* list node */ 149 struct list_head all_blocks_node; /* list node */ 150 151 /* the following two lists contain block_link items */ 152 struct list_head ref_to_list; /* list */ 153 struct list_head ref_from_list; /* list */ 154 struct btrfsic_block *next_in_same_bio; 155 void *orig_bio_bh_private; 156 union { 157 bio_end_io_t *bio; 158 bh_end_io_t *bh; 159 } orig_bio_bh_end_io; 160 int submit_bio_bh_rw; 161 u64 flush_gen; /* only valid if !never_written */ 162 }; 163 164 /* 165 * Elements of this type are allocated dynamically and required because 166 * each block object can refer to and can be ref from multiple blocks. 167 * The key to lookup them in the hashtable is the dev_bytenr of 168 * the block ref to plus the one from the block refered from. 169 * The fact that they are searchable via a hashtable and that a 170 * ref_cnt is maintained is not required for the btrfs integrity 171 * check algorithm itself, it is only used to make the output more 172 * beautiful in case that an error is detected (an error is defined 173 * as a write operation to a block while that block is still referenced). 174 */ 175 struct btrfsic_block_link { 176 u32 magic_num; /* only used for debug purposes */ 177 u32 ref_cnt; 178 struct list_head node_ref_to; /* list node */ 179 struct list_head node_ref_from; /* list node */ 180 struct list_head collision_resolving_node; /* list node */ 181 struct btrfsic_block *block_ref_to; 182 struct btrfsic_block *block_ref_from; 183 u64 parent_generation; 184 }; 185 186 struct btrfsic_dev_state { 187 u32 magic_num; /* only used for debug purposes */ 188 struct block_device *bdev; 189 struct btrfsic_state *state; 190 struct list_head collision_resolving_node; /* list node */ 191 struct btrfsic_block dummy_block_for_bio_bh_flush; 192 u64 last_flush_gen; 193 char name[BDEVNAME_SIZE]; 194 }; 195 196 struct btrfsic_block_hashtable { 197 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; 198 }; 199 200 struct btrfsic_block_link_hashtable { 201 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; 202 }; 203 204 struct btrfsic_dev_state_hashtable { 205 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; 206 }; 207 208 struct btrfsic_block_data_ctx { 209 u64 start; /* virtual bytenr */ 210 u64 dev_bytenr; /* physical bytenr on device */ 211 u32 len; 212 struct btrfsic_dev_state *dev; 213 char **datav; 214 struct page **pagev; 215 void *mem_to_free; 216 }; 217 218 /* This structure is used to implement recursion without occupying 219 * any stack space, refer to btrfsic_process_metablock() */ 220 struct btrfsic_stack_frame { 221 u32 magic; 222 u32 nr; 223 int error; 224 int i; 225 int limit_nesting; 226 int num_copies; 227 int mirror_num; 228 struct btrfsic_block *block; 229 struct btrfsic_block_data_ctx *block_ctx; 230 struct btrfsic_block *next_block; 231 struct btrfsic_block_data_ctx next_block_ctx; 232 struct btrfs_header *hdr; 233 struct btrfsic_stack_frame *prev; 234 }; 235 236 /* Some state per mounted filesystem */ 237 struct btrfsic_state { 238 u32 print_mask; 239 int include_extent_data; 240 int csum_size; 241 struct list_head all_blocks_list; 242 struct btrfsic_block_hashtable block_hashtable; 243 struct btrfsic_block_link_hashtable block_link_hashtable; 244 struct btrfs_root *root; 245 u64 max_superblock_generation; 246 struct btrfsic_block *latest_superblock; 247 u32 metablock_size; 248 u32 datablock_size; 249 }; 250 251 static void btrfsic_block_init(struct btrfsic_block *b); 252 static struct btrfsic_block *btrfsic_block_alloc(void); 253 static void btrfsic_block_free(struct btrfsic_block *b); 254 static void btrfsic_block_link_init(struct btrfsic_block_link *n); 255 static struct btrfsic_block_link *btrfsic_block_link_alloc(void); 256 static void btrfsic_block_link_free(struct btrfsic_block_link *n); 257 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds); 258 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void); 259 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds); 260 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h); 261 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 262 struct btrfsic_block_hashtable *h); 263 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b); 264 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 265 struct block_device *bdev, 266 u64 dev_bytenr, 267 struct btrfsic_block_hashtable *h); 268 static void btrfsic_block_link_hashtable_init( 269 struct btrfsic_block_link_hashtable *h); 270 static void btrfsic_block_link_hashtable_add( 271 struct btrfsic_block_link *l, 272 struct btrfsic_block_link_hashtable *h); 273 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l); 274 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 275 struct block_device *bdev_ref_to, 276 u64 dev_bytenr_ref_to, 277 struct block_device *bdev_ref_from, 278 u64 dev_bytenr_ref_from, 279 struct btrfsic_block_link_hashtable *h); 280 static void btrfsic_dev_state_hashtable_init( 281 struct btrfsic_dev_state_hashtable *h); 282 static void btrfsic_dev_state_hashtable_add( 283 struct btrfsic_dev_state *ds, 284 struct btrfsic_dev_state_hashtable *h); 285 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds); 286 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 287 struct block_device *bdev, 288 struct btrfsic_dev_state_hashtable *h); 289 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void); 290 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf); 291 static int btrfsic_process_superblock(struct btrfsic_state *state, 292 struct btrfs_fs_devices *fs_devices); 293 static int btrfsic_process_metablock(struct btrfsic_state *state, 294 struct btrfsic_block *block, 295 struct btrfsic_block_data_ctx *block_ctx, 296 int limit_nesting, int force_iodone_flag); 297 static void btrfsic_read_from_block_data( 298 struct btrfsic_block_data_ctx *block_ctx, 299 void *dst, u32 offset, size_t len); 300 static int btrfsic_create_link_to_next_block( 301 struct btrfsic_state *state, 302 struct btrfsic_block *block, 303 struct btrfsic_block_data_ctx 304 *block_ctx, u64 next_bytenr, 305 int limit_nesting, 306 struct btrfsic_block_data_ctx *next_block_ctx, 307 struct btrfsic_block **next_blockp, 308 int force_iodone_flag, 309 int *num_copiesp, int *mirror_nump, 310 struct btrfs_disk_key *disk_key, 311 u64 parent_generation); 312 static int btrfsic_handle_extent_data(struct btrfsic_state *state, 313 struct btrfsic_block *block, 314 struct btrfsic_block_data_ctx *block_ctx, 315 u32 item_offset, int force_iodone_flag); 316 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 317 struct btrfsic_block_data_ctx *block_ctx_out, 318 int mirror_num); 319 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 320 u32 len, struct block_device *bdev, 321 struct btrfsic_block_data_ctx *block_ctx_out); 322 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); 323 static int btrfsic_read_block(struct btrfsic_state *state, 324 struct btrfsic_block_data_ctx *block_ctx); 325 static void btrfsic_dump_database(struct btrfsic_state *state); 326 static void btrfsic_complete_bio_end_io(struct bio *bio, int err); 327 static int btrfsic_test_for_metadata(struct btrfsic_state *state, 328 char **datav, unsigned int num_pages); 329 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 330 u64 dev_bytenr, char **mapped_datav, 331 unsigned int num_pages, 332 struct bio *bio, int *bio_is_patched, 333 struct buffer_head *bh, 334 int submit_bio_bh_rw); 335 static int btrfsic_process_written_superblock( 336 struct btrfsic_state *state, 337 struct btrfsic_block *const block, 338 struct btrfs_super_block *const super_hdr); 339 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); 340 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); 341 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, 342 const struct btrfsic_block *block, 343 int recursion_level); 344 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 345 struct btrfsic_block *const block, 346 int recursion_level); 347 static void btrfsic_print_add_link(const struct btrfsic_state *state, 348 const struct btrfsic_block_link *l); 349 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 350 const struct btrfsic_block_link *l); 351 static char btrfsic_get_block_type(const struct btrfsic_state *state, 352 const struct btrfsic_block *block); 353 static void btrfsic_dump_tree(const struct btrfsic_state *state); 354 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 355 const struct btrfsic_block *block, 356 int indent_level); 357 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 358 struct btrfsic_state *state, 359 struct btrfsic_block_data_ctx *next_block_ctx, 360 struct btrfsic_block *next_block, 361 struct btrfsic_block *from_block, 362 u64 parent_generation); 363 static struct btrfsic_block *btrfsic_block_lookup_or_add( 364 struct btrfsic_state *state, 365 struct btrfsic_block_data_ctx *block_ctx, 366 const char *additional_string, 367 int is_metadata, 368 int is_iodone, 369 int never_written, 370 int mirror_num, 371 int *was_created); 372 static int btrfsic_process_superblock_dev_mirror( 373 struct btrfsic_state *state, 374 struct btrfsic_dev_state *dev_state, 375 struct btrfs_device *device, 376 int superblock_mirror_num, 377 struct btrfsic_dev_state **selected_dev_state, 378 struct btrfs_super_block *selected_super); 379 static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 380 struct block_device *bdev); 381 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 382 u64 bytenr, 383 struct btrfsic_dev_state *dev_state, 384 u64 dev_bytenr); 385 386 static struct mutex btrfsic_mutex; 387 static int btrfsic_is_initialized; 388 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; 389 390 391 static void btrfsic_block_init(struct btrfsic_block *b) 392 { 393 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; 394 b->dev_state = NULL; 395 b->dev_bytenr = 0; 396 b->logical_bytenr = 0; 397 b->generation = BTRFSIC_GENERATION_UNKNOWN; 398 b->disk_key.objectid = 0; 399 b->disk_key.type = 0; 400 b->disk_key.offset = 0; 401 b->is_metadata = 0; 402 b->is_superblock = 0; 403 b->is_iodone = 0; 404 b->iodone_w_error = 0; 405 b->never_written = 0; 406 b->mirror_num = 0; 407 b->next_in_same_bio = NULL; 408 b->orig_bio_bh_private = NULL; 409 b->orig_bio_bh_end_io.bio = NULL; 410 INIT_LIST_HEAD(&b->collision_resolving_node); 411 INIT_LIST_HEAD(&b->all_blocks_node); 412 INIT_LIST_HEAD(&b->ref_to_list); 413 INIT_LIST_HEAD(&b->ref_from_list); 414 b->submit_bio_bh_rw = 0; 415 b->flush_gen = 0; 416 } 417 418 static struct btrfsic_block *btrfsic_block_alloc(void) 419 { 420 struct btrfsic_block *b; 421 422 b = kzalloc(sizeof(*b), GFP_NOFS); 423 if (NULL != b) 424 btrfsic_block_init(b); 425 426 return b; 427 } 428 429 static void btrfsic_block_free(struct btrfsic_block *b) 430 { 431 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); 432 kfree(b); 433 } 434 435 static void btrfsic_block_link_init(struct btrfsic_block_link *l) 436 { 437 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; 438 l->ref_cnt = 1; 439 INIT_LIST_HEAD(&l->node_ref_to); 440 INIT_LIST_HEAD(&l->node_ref_from); 441 INIT_LIST_HEAD(&l->collision_resolving_node); 442 l->block_ref_to = NULL; 443 l->block_ref_from = NULL; 444 } 445 446 static struct btrfsic_block_link *btrfsic_block_link_alloc(void) 447 { 448 struct btrfsic_block_link *l; 449 450 l = kzalloc(sizeof(*l), GFP_NOFS); 451 if (NULL != l) 452 btrfsic_block_link_init(l); 453 454 return l; 455 } 456 457 static void btrfsic_block_link_free(struct btrfsic_block_link *l) 458 { 459 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); 460 kfree(l); 461 } 462 463 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) 464 { 465 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; 466 ds->bdev = NULL; 467 ds->state = NULL; 468 ds->name[0] = '\0'; 469 INIT_LIST_HEAD(&ds->collision_resolving_node); 470 ds->last_flush_gen = 0; 471 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); 472 ds->dummy_block_for_bio_bh_flush.is_iodone = 1; 473 ds->dummy_block_for_bio_bh_flush.dev_state = ds; 474 } 475 476 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) 477 { 478 struct btrfsic_dev_state *ds; 479 480 ds = kzalloc(sizeof(*ds), GFP_NOFS); 481 if (NULL != ds) 482 btrfsic_dev_state_init(ds); 483 484 return ds; 485 } 486 487 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) 488 { 489 BUG_ON(!(NULL == ds || 490 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); 491 kfree(ds); 492 } 493 494 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) 495 { 496 int i; 497 498 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) 499 INIT_LIST_HEAD(h->table + i); 500 } 501 502 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 503 struct btrfsic_block_hashtable *h) 504 { 505 const unsigned int hashval = 506 (((unsigned int)(b->dev_bytenr >> 16)) ^ 507 ((unsigned int)((uintptr_t)b->dev_state->bdev))) & 508 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 509 510 list_add(&b->collision_resolving_node, h->table + hashval); 511 } 512 513 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) 514 { 515 list_del(&b->collision_resolving_node); 516 } 517 518 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 519 struct block_device *bdev, 520 u64 dev_bytenr, 521 struct btrfsic_block_hashtable *h) 522 { 523 const unsigned int hashval = 524 (((unsigned int)(dev_bytenr >> 16)) ^ 525 ((unsigned int)((uintptr_t)bdev))) & 526 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 527 struct list_head *elem; 528 529 list_for_each(elem, h->table + hashval) { 530 struct btrfsic_block *const b = 531 list_entry(elem, struct btrfsic_block, 532 collision_resolving_node); 533 534 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) 535 return b; 536 } 537 538 return NULL; 539 } 540 541 static void btrfsic_block_link_hashtable_init( 542 struct btrfsic_block_link_hashtable *h) 543 { 544 int i; 545 546 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) 547 INIT_LIST_HEAD(h->table + i); 548 } 549 550 static void btrfsic_block_link_hashtable_add( 551 struct btrfsic_block_link *l, 552 struct btrfsic_block_link_hashtable *h) 553 { 554 const unsigned int hashval = 555 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ 556 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ 557 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ 558 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) 559 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 560 561 BUG_ON(NULL == l->block_ref_to); 562 BUG_ON(NULL == l->block_ref_from); 563 list_add(&l->collision_resolving_node, h->table + hashval); 564 } 565 566 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) 567 { 568 list_del(&l->collision_resolving_node); 569 } 570 571 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 572 struct block_device *bdev_ref_to, 573 u64 dev_bytenr_ref_to, 574 struct block_device *bdev_ref_from, 575 u64 dev_bytenr_ref_from, 576 struct btrfsic_block_link_hashtable *h) 577 { 578 const unsigned int hashval = 579 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ 580 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ 581 ((unsigned int)((uintptr_t)bdev_ref_to)) ^ 582 ((unsigned int)((uintptr_t)bdev_ref_from))) & 583 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 584 struct list_head *elem; 585 586 list_for_each(elem, h->table + hashval) { 587 struct btrfsic_block_link *const l = 588 list_entry(elem, struct btrfsic_block_link, 589 collision_resolving_node); 590 591 BUG_ON(NULL == l->block_ref_to); 592 BUG_ON(NULL == l->block_ref_from); 593 if (l->block_ref_to->dev_state->bdev == bdev_ref_to && 594 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && 595 l->block_ref_from->dev_state->bdev == bdev_ref_from && 596 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) 597 return l; 598 } 599 600 return NULL; 601 } 602 603 static void btrfsic_dev_state_hashtable_init( 604 struct btrfsic_dev_state_hashtable *h) 605 { 606 int i; 607 608 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) 609 INIT_LIST_HEAD(h->table + i); 610 } 611 612 static void btrfsic_dev_state_hashtable_add( 613 struct btrfsic_dev_state *ds, 614 struct btrfsic_dev_state_hashtable *h) 615 { 616 const unsigned int hashval = 617 (((unsigned int)((uintptr_t)ds->bdev)) & 618 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 619 620 list_add(&ds->collision_resolving_node, h->table + hashval); 621 } 622 623 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) 624 { 625 list_del(&ds->collision_resolving_node); 626 } 627 628 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 629 struct block_device *bdev, 630 struct btrfsic_dev_state_hashtable *h) 631 { 632 const unsigned int hashval = 633 (((unsigned int)((uintptr_t)bdev)) & 634 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 635 struct list_head *elem; 636 637 list_for_each(elem, h->table + hashval) { 638 struct btrfsic_dev_state *const ds = 639 list_entry(elem, struct btrfsic_dev_state, 640 collision_resolving_node); 641 642 if (ds->bdev == bdev) 643 return ds; 644 } 645 646 return NULL; 647 } 648 649 static int btrfsic_process_superblock(struct btrfsic_state *state, 650 struct btrfs_fs_devices *fs_devices) 651 { 652 int ret = 0; 653 struct btrfs_super_block *selected_super; 654 struct list_head *dev_head = &fs_devices->devices; 655 struct btrfs_device *device; 656 struct btrfsic_dev_state *selected_dev_state = NULL; 657 int pass; 658 659 BUG_ON(NULL == state); 660 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); 661 if (NULL == selected_super) { 662 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 663 return -1; 664 } 665 666 list_for_each_entry(device, dev_head, dev_list) { 667 int i; 668 struct btrfsic_dev_state *dev_state; 669 670 if (!device->bdev || !device->name) 671 continue; 672 673 dev_state = btrfsic_dev_state_lookup(device->bdev); 674 BUG_ON(NULL == dev_state); 675 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 676 ret = btrfsic_process_superblock_dev_mirror( 677 state, dev_state, device, i, 678 &selected_dev_state, selected_super); 679 if (0 != ret && 0 == i) { 680 kfree(selected_super); 681 return ret; 682 } 683 } 684 } 685 686 if (NULL == state->latest_superblock) { 687 printk(KERN_INFO "btrfsic: no superblock found!\n"); 688 kfree(selected_super); 689 return -1; 690 } 691 692 state->csum_size = btrfs_super_csum_size(selected_super); 693 694 for (pass = 0; pass < 3; pass++) { 695 int num_copies; 696 int mirror_num; 697 u64 next_bytenr; 698 699 switch (pass) { 700 case 0: 701 next_bytenr = btrfs_super_root(selected_super); 702 if (state->print_mask & 703 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 704 printk(KERN_INFO "root@%llu\n", 705 (unsigned long long)next_bytenr); 706 break; 707 case 1: 708 next_bytenr = btrfs_super_chunk_root(selected_super); 709 if (state->print_mask & 710 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 711 printk(KERN_INFO "chunk@%llu\n", 712 (unsigned long long)next_bytenr); 713 break; 714 case 2: 715 next_bytenr = btrfs_super_log_root(selected_super); 716 if (0 == next_bytenr) 717 continue; 718 if (state->print_mask & 719 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 720 printk(KERN_INFO "log@%llu\n", 721 (unsigned long long)next_bytenr); 722 break; 723 } 724 725 num_copies = 726 btrfs_num_copies(state->root->fs_info, 727 next_bytenr, state->metablock_size); 728 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 729 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 730 (unsigned long long)next_bytenr, num_copies); 731 732 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 733 struct btrfsic_block *next_block; 734 struct btrfsic_block_data_ctx tmp_next_block_ctx; 735 struct btrfsic_block_link *l; 736 737 ret = btrfsic_map_block(state, next_bytenr, 738 state->metablock_size, 739 &tmp_next_block_ctx, 740 mirror_num); 741 if (ret) { 742 printk(KERN_INFO "btrfsic:" 743 " btrfsic_map_block(root @%llu," 744 " mirror %d) failed!\n", 745 (unsigned long long)next_bytenr, 746 mirror_num); 747 kfree(selected_super); 748 return -1; 749 } 750 751 next_block = btrfsic_block_hashtable_lookup( 752 tmp_next_block_ctx.dev->bdev, 753 tmp_next_block_ctx.dev_bytenr, 754 &state->block_hashtable); 755 BUG_ON(NULL == next_block); 756 757 l = btrfsic_block_link_hashtable_lookup( 758 tmp_next_block_ctx.dev->bdev, 759 tmp_next_block_ctx.dev_bytenr, 760 state->latest_superblock->dev_state-> 761 bdev, 762 state->latest_superblock->dev_bytenr, 763 &state->block_link_hashtable); 764 BUG_ON(NULL == l); 765 766 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 767 if (ret < (int)PAGE_CACHE_SIZE) { 768 printk(KERN_INFO 769 "btrfsic: read @logical %llu failed!\n", 770 (unsigned long long) 771 tmp_next_block_ctx.start); 772 btrfsic_release_block_ctx(&tmp_next_block_ctx); 773 kfree(selected_super); 774 return -1; 775 } 776 777 ret = btrfsic_process_metablock(state, 778 next_block, 779 &tmp_next_block_ctx, 780 BTRFS_MAX_LEVEL + 3, 1); 781 btrfsic_release_block_ctx(&tmp_next_block_ctx); 782 } 783 } 784 785 kfree(selected_super); 786 return ret; 787 } 788 789 static int btrfsic_process_superblock_dev_mirror( 790 struct btrfsic_state *state, 791 struct btrfsic_dev_state *dev_state, 792 struct btrfs_device *device, 793 int superblock_mirror_num, 794 struct btrfsic_dev_state **selected_dev_state, 795 struct btrfs_super_block *selected_super) 796 { 797 struct btrfs_super_block *super_tmp; 798 u64 dev_bytenr; 799 struct buffer_head *bh; 800 struct btrfsic_block *superblock_tmp; 801 int pass; 802 struct block_device *const superblock_bdev = device->bdev; 803 804 /* super block bytenr is always the unmapped device bytenr */ 805 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 806 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) 807 return -1; 808 bh = __bread(superblock_bdev, dev_bytenr / 4096, 809 BTRFS_SUPER_INFO_SIZE); 810 if (NULL == bh) 811 return -1; 812 super_tmp = (struct btrfs_super_block *) 813 (bh->b_data + (dev_bytenr & 4095)); 814 815 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 816 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, 817 sizeof(super_tmp->magic)) || 818 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 819 btrfs_super_nodesize(super_tmp) != state->metablock_size || 820 btrfs_super_leafsize(super_tmp) != state->metablock_size || 821 btrfs_super_sectorsize(super_tmp) != state->datablock_size) { 822 brelse(bh); 823 return 0; 824 } 825 826 superblock_tmp = 827 btrfsic_block_hashtable_lookup(superblock_bdev, 828 dev_bytenr, 829 &state->block_hashtable); 830 if (NULL == superblock_tmp) { 831 superblock_tmp = btrfsic_block_alloc(); 832 if (NULL == superblock_tmp) { 833 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 834 brelse(bh); 835 return -1; 836 } 837 /* for superblock, only the dev_bytenr makes sense */ 838 superblock_tmp->dev_bytenr = dev_bytenr; 839 superblock_tmp->dev_state = dev_state; 840 superblock_tmp->logical_bytenr = dev_bytenr; 841 superblock_tmp->generation = btrfs_super_generation(super_tmp); 842 superblock_tmp->is_metadata = 1; 843 superblock_tmp->is_superblock = 1; 844 superblock_tmp->is_iodone = 1; 845 superblock_tmp->never_written = 0; 846 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 847 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 848 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" 849 " @%llu (%s/%llu/%d)\n", 850 superblock_bdev, 851 rcu_str_deref(device->name), 852 (unsigned long long)dev_bytenr, 853 dev_state->name, 854 (unsigned long long)dev_bytenr, 855 superblock_mirror_num); 856 list_add(&superblock_tmp->all_blocks_node, 857 &state->all_blocks_list); 858 btrfsic_block_hashtable_add(superblock_tmp, 859 &state->block_hashtable); 860 } 861 862 /* select the one with the highest generation field */ 863 if (btrfs_super_generation(super_tmp) > 864 state->max_superblock_generation || 865 0 == state->max_superblock_generation) { 866 memcpy(selected_super, super_tmp, sizeof(*selected_super)); 867 *selected_dev_state = dev_state; 868 state->max_superblock_generation = 869 btrfs_super_generation(super_tmp); 870 state->latest_superblock = superblock_tmp; 871 } 872 873 for (pass = 0; pass < 3; pass++) { 874 u64 next_bytenr; 875 int num_copies; 876 int mirror_num; 877 const char *additional_string = NULL; 878 struct btrfs_disk_key tmp_disk_key; 879 880 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 881 tmp_disk_key.offset = 0; 882 switch (pass) { 883 case 0: 884 tmp_disk_key.objectid = 885 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 886 additional_string = "initial root "; 887 next_bytenr = btrfs_super_root(super_tmp); 888 break; 889 case 1: 890 tmp_disk_key.objectid = 891 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 892 additional_string = "initial chunk "; 893 next_bytenr = btrfs_super_chunk_root(super_tmp); 894 break; 895 case 2: 896 tmp_disk_key.objectid = 897 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 898 additional_string = "initial log "; 899 next_bytenr = btrfs_super_log_root(super_tmp); 900 if (0 == next_bytenr) 901 continue; 902 break; 903 } 904 905 num_copies = 906 btrfs_num_copies(state->root->fs_info, 907 next_bytenr, state->metablock_size); 908 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 909 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 910 (unsigned long long)next_bytenr, num_copies); 911 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 912 struct btrfsic_block *next_block; 913 struct btrfsic_block_data_ctx tmp_next_block_ctx; 914 struct btrfsic_block_link *l; 915 916 if (btrfsic_map_block(state, next_bytenr, 917 state->metablock_size, 918 &tmp_next_block_ctx, 919 mirror_num)) { 920 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 921 "bytenr @%llu, mirror %d) failed!\n", 922 (unsigned long long)next_bytenr, 923 mirror_num); 924 brelse(bh); 925 return -1; 926 } 927 928 next_block = btrfsic_block_lookup_or_add( 929 state, &tmp_next_block_ctx, 930 additional_string, 1, 1, 0, 931 mirror_num, NULL); 932 if (NULL == next_block) { 933 btrfsic_release_block_ctx(&tmp_next_block_ctx); 934 brelse(bh); 935 return -1; 936 } 937 938 next_block->disk_key = tmp_disk_key; 939 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 940 l = btrfsic_block_link_lookup_or_add( 941 state, &tmp_next_block_ctx, 942 next_block, superblock_tmp, 943 BTRFSIC_GENERATION_UNKNOWN); 944 btrfsic_release_block_ctx(&tmp_next_block_ctx); 945 if (NULL == l) { 946 brelse(bh); 947 return -1; 948 } 949 } 950 } 951 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) 952 btrfsic_dump_tree_sub(state, superblock_tmp, 0); 953 954 brelse(bh); 955 return 0; 956 } 957 958 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) 959 { 960 struct btrfsic_stack_frame *sf; 961 962 sf = kzalloc(sizeof(*sf), GFP_NOFS); 963 if (NULL == sf) 964 printk(KERN_INFO "btrfsic: alloc memory failed!\n"); 965 else 966 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; 967 return sf; 968 } 969 970 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) 971 { 972 BUG_ON(!(NULL == sf || 973 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); 974 kfree(sf); 975 } 976 977 static int btrfsic_process_metablock( 978 struct btrfsic_state *state, 979 struct btrfsic_block *const first_block, 980 struct btrfsic_block_data_ctx *const first_block_ctx, 981 int first_limit_nesting, int force_iodone_flag) 982 { 983 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 984 struct btrfsic_stack_frame *sf; 985 struct btrfsic_stack_frame *next_stack; 986 struct btrfs_header *const first_hdr = 987 (struct btrfs_header *)first_block_ctx->datav[0]; 988 989 BUG_ON(!first_hdr); 990 sf = &initial_stack_frame; 991 sf->error = 0; 992 sf->i = -1; 993 sf->limit_nesting = first_limit_nesting; 994 sf->block = first_block; 995 sf->block_ctx = first_block_ctx; 996 sf->next_block = NULL; 997 sf->hdr = first_hdr; 998 sf->prev = NULL; 999 1000 continue_with_new_stack_frame: 1001 sf->block->generation = le64_to_cpu(sf->hdr->generation); 1002 if (0 == sf->hdr->level) { 1003 struct btrfs_leaf *const leafhdr = 1004 (struct btrfs_leaf *)sf->hdr; 1005 1006 if (-1 == sf->i) { 1007 sf->nr = le32_to_cpu(leafhdr->header.nritems); 1008 1009 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1010 printk(KERN_INFO 1011 "leaf %llu items %d generation %llu" 1012 " owner %llu\n", 1013 (unsigned long long) 1014 sf->block_ctx->start, 1015 sf->nr, 1016 (unsigned long long) 1017 le64_to_cpu(leafhdr->header.generation), 1018 (unsigned long long) 1019 le64_to_cpu(leafhdr->header.owner)); 1020 } 1021 1022 continue_with_current_leaf_stack_frame: 1023 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1024 sf->i++; 1025 sf->num_copies = 0; 1026 } 1027 1028 if (sf->i < sf->nr) { 1029 struct btrfs_item disk_item; 1030 u32 disk_item_offset = 1031 (uintptr_t)(leafhdr->items + sf->i) - 1032 (uintptr_t)leafhdr; 1033 struct btrfs_disk_key *disk_key; 1034 u8 type; 1035 u32 item_offset; 1036 u32 item_size; 1037 1038 if (disk_item_offset + sizeof(struct btrfs_item) > 1039 sf->block_ctx->len) { 1040 leaf_item_out_of_bounce_error: 1041 printk(KERN_INFO 1042 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", 1043 sf->block_ctx->start, 1044 sf->block_ctx->dev->name); 1045 goto one_stack_frame_backwards; 1046 } 1047 btrfsic_read_from_block_data(sf->block_ctx, 1048 &disk_item, 1049 disk_item_offset, 1050 sizeof(struct btrfs_item)); 1051 item_offset = le32_to_cpu(disk_item.offset); 1052 item_size = le32_to_cpu(disk_item.size); 1053 disk_key = &disk_item.key; 1054 type = disk_key->type; 1055 1056 if (BTRFS_ROOT_ITEM_KEY == type) { 1057 struct btrfs_root_item root_item; 1058 u32 root_item_offset; 1059 u64 next_bytenr; 1060 1061 root_item_offset = item_offset + 1062 offsetof(struct btrfs_leaf, items); 1063 if (root_item_offset + item_size > 1064 sf->block_ctx->len) 1065 goto leaf_item_out_of_bounce_error; 1066 btrfsic_read_from_block_data( 1067 sf->block_ctx, &root_item, 1068 root_item_offset, 1069 item_size); 1070 next_bytenr = le64_to_cpu(root_item.bytenr); 1071 1072 sf->error = 1073 btrfsic_create_link_to_next_block( 1074 state, 1075 sf->block, 1076 sf->block_ctx, 1077 next_bytenr, 1078 sf->limit_nesting, 1079 &sf->next_block_ctx, 1080 &sf->next_block, 1081 force_iodone_flag, 1082 &sf->num_copies, 1083 &sf->mirror_num, 1084 disk_key, 1085 le64_to_cpu(root_item. 1086 generation)); 1087 if (sf->error) 1088 goto one_stack_frame_backwards; 1089 1090 if (NULL != sf->next_block) { 1091 struct btrfs_header *const next_hdr = 1092 (struct btrfs_header *) 1093 sf->next_block_ctx.datav[0]; 1094 1095 next_stack = 1096 btrfsic_stack_frame_alloc(); 1097 if (NULL == next_stack) { 1098 btrfsic_release_block_ctx( 1099 &sf-> 1100 next_block_ctx); 1101 goto one_stack_frame_backwards; 1102 } 1103 1104 next_stack->i = -1; 1105 next_stack->block = sf->next_block; 1106 next_stack->block_ctx = 1107 &sf->next_block_ctx; 1108 next_stack->next_block = NULL; 1109 next_stack->hdr = next_hdr; 1110 next_stack->limit_nesting = 1111 sf->limit_nesting - 1; 1112 next_stack->prev = sf; 1113 sf = next_stack; 1114 goto continue_with_new_stack_frame; 1115 } 1116 } else if (BTRFS_EXTENT_DATA_KEY == type && 1117 state->include_extent_data) { 1118 sf->error = btrfsic_handle_extent_data( 1119 state, 1120 sf->block, 1121 sf->block_ctx, 1122 item_offset, 1123 force_iodone_flag); 1124 if (sf->error) 1125 goto one_stack_frame_backwards; 1126 } 1127 1128 goto continue_with_current_leaf_stack_frame; 1129 } 1130 } else { 1131 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1132 1133 if (-1 == sf->i) { 1134 sf->nr = le32_to_cpu(nodehdr->header.nritems); 1135 1136 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1137 printk(KERN_INFO "node %llu level %d items %d" 1138 " generation %llu owner %llu\n", 1139 (unsigned long long) 1140 sf->block_ctx->start, 1141 nodehdr->header.level, sf->nr, 1142 (unsigned long long) 1143 le64_to_cpu(nodehdr->header.generation), 1144 (unsigned long long) 1145 le64_to_cpu(nodehdr->header.owner)); 1146 } 1147 1148 continue_with_current_node_stack_frame: 1149 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1150 sf->i++; 1151 sf->num_copies = 0; 1152 } 1153 1154 if (sf->i < sf->nr) { 1155 struct btrfs_key_ptr key_ptr; 1156 u32 key_ptr_offset; 1157 u64 next_bytenr; 1158 1159 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - 1160 (uintptr_t)nodehdr; 1161 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > 1162 sf->block_ctx->len) { 1163 printk(KERN_INFO 1164 "btrfsic: node item out of bounce at logical %llu, dev %s\n", 1165 sf->block_ctx->start, 1166 sf->block_ctx->dev->name); 1167 goto one_stack_frame_backwards; 1168 } 1169 btrfsic_read_from_block_data( 1170 sf->block_ctx, &key_ptr, key_ptr_offset, 1171 sizeof(struct btrfs_key_ptr)); 1172 next_bytenr = le64_to_cpu(key_ptr.blockptr); 1173 1174 sf->error = btrfsic_create_link_to_next_block( 1175 state, 1176 sf->block, 1177 sf->block_ctx, 1178 next_bytenr, 1179 sf->limit_nesting, 1180 &sf->next_block_ctx, 1181 &sf->next_block, 1182 force_iodone_flag, 1183 &sf->num_copies, 1184 &sf->mirror_num, 1185 &key_ptr.key, 1186 le64_to_cpu(key_ptr.generation)); 1187 if (sf->error) 1188 goto one_stack_frame_backwards; 1189 1190 if (NULL != sf->next_block) { 1191 struct btrfs_header *const next_hdr = 1192 (struct btrfs_header *) 1193 sf->next_block_ctx.datav[0]; 1194 1195 next_stack = btrfsic_stack_frame_alloc(); 1196 if (NULL == next_stack) 1197 goto one_stack_frame_backwards; 1198 1199 next_stack->i = -1; 1200 next_stack->block = sf->next_block; 1201 next_stack->block_ctx = &sf->next_block_ctx; 1202 next_stack->next_block = NULL; 1203 next_stack->hdr = next_hdr; 1204 next_stack->limit_nesting = 1205 sf->limit_nesting - 1; 1206 next_stack->prev = sf; 1207 sf = next_stack; 1208 goto continue_with_new_stack_frame; 1209 } 1210 1211 goto continue_with_current_node_stack_frame; 1212 } 1213 } 1214 1215 one_stack_frame_backwards: 1216 if (NULL != sf->prev) { 1217 struct btrfsic_stack_frame *const prev = sf->prev; 1218 1219 /* the one for the initial block is freed in the caller */ 1220 btrfsic_release_block_ctx(sf->block_ctx); 1221 1222 if (sf->error) { 1223 prev->error = sf->error; 1224 btrfsic_stack_frame_free(sf); 1225 sf = prev; 1226 goto one_stack_frame_backwards; 1227 } 1228 1229 btrfsic_stack_frame_free(sf); 1230 sf = prev; 1231 goto continue_with_new_stack_frame; 1232 } else { 1233 BUG_ON(&initial_stack_frame != sf); 1234 } 1235 1236 return sf->error; 1237 } 1238 1239 static void btrfsic_read_from_block_data( 1240 struct btrfsic_block_data_ctx *block_ctx, 1241 void *dstv, u32 offset, size_t len) 1242 { 1243 size_t cur; 1244 size_t offset_in_page; 1245 char *kaddr; 1246 char *dst = (char *)dstv; 1247 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); 1248 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; 1249 1250 WARN_ON(offset + len > block_ctx->len); 1251 offset_in_page = (start_offset + offset) & 1252 ((unsigned long)PAGE_CACHE_SIZE - 1); 1253 1254 while (len > 0) { 1255 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); 1256 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> 1257 PAGE_CACHE_SHIFT); 1258 kaddr = block_ctx->datav[i]; 1259 memcpy(dst, kaddr + offset_in_page, cur); 1260 1261 dst += cur; 1262 len -= cur; 1263 offset_in_page = 0; 1264 i++; 1265 } 1266 } 1267 1268 static int btrfsic_create_link_to_next_block( 1269 struct btrfsic_state *state, 1270 struct btrfsic_block *block, 1271 struct btrfsic_block_data_ctx *block_ctx, 1272 u64 next_bytenr, 1273 int limit_nesting, 1274 struct btrfsic_block_data_ctx *next_block_ctx, 1275 struct btrfsic_block **next_blockp, 1276 int force_iodone_flag, 1277 int *num_copiesp, int *mirror_nump, 1278 struct btrfs_disk_key *disk_key, 1279 u64 parent_generation) 1280 { 1281 struct btrfsic_block *next_block = NULL; 1282 int ret; 1283 struct btrfsic_block_link *l; 1284 int did_alloc_block_link; 1285 int block_was_created; 1286 1287 *next_blockp = NULL; 1288 if (0 == *num_copiesp) { 1289 *num_copiesp = 1290 btrfs_num_copies(state->root->fs_info, 1291 next_bytenr, state->metablock_size); 1292 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1293 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1294 (unsigned long long)next_bytenr, *num_copiesp); 1295 *mirror_nump = 1; 1296 } 1297 1298 if (*mirror_nump > *num_copiesp) 1299 return 0; 1300 1301 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1302 printk(KERN_INFO 1303 "btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1304 *mirror_nump); 1305 ret = btrfsic_map_block(state, next_bytenr, 1306 state->metablock_size, 1307 next_block_ctx, *mirror_nump); 1308 if (ret) { 1309 printk(KERN_INFO 1310 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1311 (unsigned long long)next_bytenr, *mirror_nump); 1312 btrfsic_release_block_ctx(next_block_ctx); 1313 *next_blockp = NULL; 1314 return -1; 1315 } 1316 1317 next_block = btrfsic_block_lookup_or_add(state, 1318 next_block_ctx, "referenced ", 1319 1, force_iodone_flag, 1320 !force_iodone_flag, 1321 *mirror_nump, 1322 &block_was_created); 1323 if (NULL == next_block) { 1324 btrfsic_release_block_ctx(next_block_ctx); 1325 *next_blockp = NULL; 1326 return -1; 1327 } 1328 if (block_was_created) { 1329 l = NULL; 1330 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 1331 } else { 1332 if (next_block->logical_bytenr != next_bytenr && 1333 !(!next_block->is_metadata && 1334 0 == next_block->logical_bytenr)) { 1335 printk(KERN_INFO 1336 "Referenced block @%llu (%s/%llu/%d)" 1337 " found in hash table, %c," 1338 " bytenr mismatch (!= stored %llu).\n", 1339 (unsigned long long)next_bytenr, 1340 next_block_ctx->dev->name, 1341 (unsigned long long)next_block_ctx->dev_bytenr, 1342 *mirror_nump, 1343 btrfsic_get_block_type(state, next_block), 1344 (unsigned long long)next_block->logical_bytenr); 1345 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1346 printk(KERN_INFO 1347 "Referenced block @%llu (%s/%llu/%d)" 1348 " found in hash table, %c.\n", 1349 (unsigned long long)next_bytenr, 1350 next_block_ctx->dev->name, 1351 (unsigned long long)next_block_ctx->dev_bytenr, 1352 *mirror_nump, 1353 btrfsic_get_block_type(state, next_block)); 1354 next_block->logical_bytenr = next_bytenr; 1355 1356 next_block->mirror_num = *mirror_nump; 1357 l = btrfsic_block_link_hashtable_lookup( 1358 next_block_ctx->dev->bdev, 1359 next_block_ctx->dev_bytenr, 1360 block_ctx->dev->bdev, 1361 block_ctx->dev_bytenr, 1362 &state->block_link_hashtable); 1363 } 1364 1365 next_block->disk_key = *disk_key; 1366 if (NULL == l) { 1367 l = btrfsic_block_link_alloc(); 1368 if (NULL == l) { 1369 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 1370 btrfsic_release_block_ctx(next_block_ctx); 1371 *next_blockp = NULL; 1372 return -1; 1373 } 1374 1375 did_alloc_block_link = 1; 1376 l->block_ref_to = next_block; 1377 l->block_ref_from = block; 1378 l->ref_cnt = 1; 1379 l->parent_generation = parent_generation; 1380 1381 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1382 btrfsic_print_add_link(state, l); 1383 1384 list_add(&l->node_ref_to, &block->ref_to_list); 1385 list_add(&l->node_ref_from, &next_block->ref_from_list); 1386 1387 btrfsic_block_link_hashtable_add(l, 1388 &state->block_link_hashtable); 1389 } else { 1390 did_alloc_block_link = 0; 1391 if (0 == limit_nesting) { 1392 l->ref_cnt++; 1393 l->parent_generation = parent_generation; 1394 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1395 btrfsic_print_add_link(state, l); 1396 } 1397 } 1398 1399 if (limit_nesting > 0 && did_alloc_block_link) { 1400 ret = btrfsic_read_block(state, next_block_ctx); 1401 if (ret < (int)next_block_ctx->len) { 1402 printk(KERN_INFO 1403 "btrfsic: read block @logical %llu failed!\n", 1404 (unsigned long long)next_bytenr); 1405 btrfsic_release_block_ctx(next_block_ctx); 1406 *next_blockp = NULL; 1407 return -1; 1408 } 1409 1410 *next_blockp = next_block; 1411 } else { 1412 *next_blockp = NULL; 1413 } 1414 (*mirror_nump)++; 1415 1416 return 0; 1417 } 1418 1419 static int btrfsic_handle_extent_data( 1420 struct btrfsic_state *state, 1421 struct btrfsic_block *block, 1422 struct btrfsic_block_data_ctx *block_ctx, 1423 u32 item_offset, int force_iodone_flag) 1424 { 1425 int ret; 1426 struct btrfs_file_extent_item file_extent_item; 1427 u64 file_extent_item_offset; 1428 u64 next_bytenr; 1429 u64 num_bytes; 1430 u64 generation; 1431 struct btrfsic_block_link *l; 1432 1433 file_extent_item_offset = offsetof(struct btrfs_leaf, items) + 1434 item_offset; 1435 if (file_extent_item_offset + 1436 offsetof(struct btrfs_file_extent_item, disk_num_bytes) > 1437 block_ctx->len) { 1438 printk(KERN_INFO 1439 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1440 block_ctx->start, block_ctx->dev->name); 1441 return -1; 1442 } 1443 1444 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1445 file_extent_item_offset, 1446 offsetof(struct btrfs_file_extent_item, disk_num_bytes)); 1447 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1448 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { 1449 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1450 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", 1451 file_extent_item.type, 1452 (unsigned long long) 1453 le64_to_cpu(file_extent_item.disk_bytenr)); 1454 return 0; 1455 } 1456 1457 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > 1458 block_ctx->len) { 1459 printk(KERN_INFO 1460 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1461 block_ctx->start, block_ctx->dev->name); 1462 return -1; 1463 } 1464 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1465 file_extent_item_offset, 1466 sizeof(struct btrfs_file_extent_item)); 1467 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + 1468 le64_to_cpu(file_extent_item.offset); 1469 generation = le64_to_cpu(file_extent_item.generation); 1470 num_bytes = le64_to_cpu(file_extent_item.num_bytes); 1471 generation = le64_to_cpu(file_extent_item.generation); 1472 1473 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1474 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1475 " offset = %llu, num_bytes = %llu\n", 1476 file_extent_item.type, 1477 (unsigned long long) 1478 le64_to_cpu(file_extent_item.disk_bytenr), 1479 (unsigned long long)le64_to_cpu(file_extent_item.offset), 1480 (unsigned long long)num_bytes); 1481 while (num_bytes > 0) { 1482 u32 chunk_len; 1483 int num_copies; 1484 int mirror_num; 1485 1486 if (num_bytes > state->datablock_size) 1487 chunk_len = state->datablock_size; 1488 else 1489 chunk_len = num_bytes; 1490 1491 num_copies = 1492 btrfs_num_copies(state->root->fs_info, 1493 next_bytenr, state->datablock_size); 1494 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1495 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1496 (unsigned long long)next_bytenr, num_copies); 1497 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1498 struct btrfsic_block_data_ctx next_block_ctx; 1499 struct btrfsic_block *next_block; 1500 int block_was_created; 1501 1502 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1503 printk(KERN_INFO "btrfsic_handle_extent_data(" 1504 "mirror_num=%d)\n", mirror_num); 1505 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1506 printk(KERN_INFO 1507 "\tdisk_bytenr = %llu, num_bytes %u\n", 1508 (unsigned long long)next_bytenr, 1509 chunk_len); 1510 ret = btrfsic_map_block(state, next_bytenr, 1511 chunk_len, &next_block_ctx, 1512 mirror_num); 1513 if (ret) { 1514 printk(KERN_INFO 1515 "btrfsic: btrfsic_map_block(@%llu," 1516 " mirror=%d) failed!\n", 1517 (unsigned long long)next_bytenr, 1518 mirror_num); 1519 return -1; 1520 } 1521 1522 next_block = btrfsic_block_lookup_or_add( 1523 state, 1524 &next_block_ctx, 1525 "referenced ", 1526 0, 1527 force_iodone_flag, 1528 !force_iodone_flag, 1529 mirror_num, 1530 &block_was_created); 1531 if (NULL == next_block) { 1532 printk(KERN_INFO 1533 "btrfsic: error, kmalloc failed!\n"); 1534 btrfsic_release_block_ctx(&next_block_ctx); 1535 return -1; 1536 } 1537 if (!block_was_created) { 1538 if (next_block->logical_bytenr != next_bytenr && 1539 !(!next_block->is_metadata && 1540 0 == next_block->logical_bytenr)) { 1541 printk(KERN_INFO 1542 "Referenced block" 1543 " @%llu (%s/%llu/%d)" 1544 " found in hash table, D," 1545 " bytenr mismatch" 1546 " (!= stored %llu).\n", 1547 (unsigned long long)next_bytenr, 1548 next_block_ctx.dev->name, 1549 (unsigned long long) 1550 next_block_ctx.dev_bytenr, 1551 mirror_num, 1552 (unsigned long long) 1553 next_block->logical_bytenr); 1554 } 1555 next_block->logical_bytenr = next_bytenr; 1556 next_block->mirror_num = mirror_num; 1557 } 1558 1559 l = btrfsic_block_link_lookup_or_add(state, 1560 &next_block_ctx, 1561 next_block, block, 1562 generation); 1563 btrfsic_release_block_ctx(&next_block_ctx); 1564 if (NULL == l) 1565 return -1; 1566 } 1567 1568 next_bytenr += chunk_len; 1569 num_bytes -= chunk_len; 1570 } 1571 1572 return 0; 1573 } 1574 1575 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 1576 struct btrfsic_block_data_ctx *block_ctx_out, 1577 int mirror_num) 1578 { 1579 int ret; 1580 u64 length; 1581 struct btrfs_bio *multi = NULL; 1582 struct btrfs_device *device; 1583 1584 length = len; 1585 ret = btrfs_map_block(state->root->fs_info, READ, 1586 bytenr, &length, &multi, mirror_num); 1587 1588 if (ret) { 1589 block_ctx_out->start = 0; 1590 block_ctx_out->dev_bytenr = 0; 1591 block_ctx_out->len = 0; 1592 block_ctx_out->dev = NULL; 1593 block_ctx_out->datav = NULL; 1594 block_ctx_out->pagev = NULL; 1595 block_ctx_out->mem_to_free = NULL; 1596 1597 return ret; 1598 } 1599 1600 device = multi->stripes[0].dev; 1601 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); 1602 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1603 block_ctx_out->start = bytenr; 1604 block_ctx_out->len = len; 1605 block_ctx_out->datav = NULL; 1606 block_ctx_out->pagev = NULL; 1607 block_ctx_out->mem_to_free = NULL; 1608 1609 kfree(multi); 1610 if (NULL == block_ctx_out->dev) { 1611 ret = -ENXIO; 1612 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); 1613 } 1614 1615 return ret; 1616 } 1617 1618 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 1619 u32 len, struct block_device *bdev, 1620 struct btrfsic_block_data_ctx *block_ctx_out) 1621 { 1622 block_ctx_out->dev = btrfsic_dev_state_lookup(bdev); 1623 block_ctx_out->dev_bytenr = bytenr; 1624 block_ctx_out->start = bytenr; 1625 block_ctx_out->len = len; 1626 block_ctx_out->datav = NULL; 1627 block_ctx_out->pagev = NULL; 1628 block_ctx_out->mem_to_free = NULL; 1629 if (NULL != block_ctx_out->dev) { 1630 return 0; 1631 } else { 1632 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n"); 1633 return -ENXIO; 1634 } 1635 } 1636 1637 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1638 { 1639 if (block_ctx->mem_to_free) { 1640 unsigned int num_pages; 1641 1642 BUG_ON(!block_ctx->datav); 1643 BUG_ON(!block_ctx->pagev); 1644 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1645 PAGE_CACHE_SHIFT; 1646 while (num_pages > 0) { 1647 num_pages--; 1648 if (block_ctx->datav[num_pages]) { 1649 kunmap(block_ctx->pagev[num_pages]); 1650 block_ctx->datav[num_pages] = NULL; 1651 } 1652 if (block_ctx->pagev[num_pages]) { 1653 __free_page(block_ctx->pagev[num_pages]); 1654 block_ctx->pagev[num_pages] = NULL; 1655 } 1656 } 1657 1658 kfree(block_ctx->mem_to_free); 1659 block_ctx->mem_to_free = NULL; 1660 block_ctx->pagev = NULL; 1661 block_ctx->datav = NULL; 1662 } 1663 } 1664 1665 static int btrfsic_read_block(struct btrfsic_state *state, 1666 struct btrfsic_block_data_ctx *block_ctx) 1667 { 1668 unsigned int num_pages; 1669 unsigned int i; 1670 u64 dev_bytenr; 1671 int ret; 1672 1673 BUG_ON(block_ctx->datav); 1674 BUG_ON(block_ctx->pagev); 1675 BUG_ON(block_ctx->mem_to_free); 1676 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { 1677 printk(KERN_INFO 1678 "btrfsic: read_block() with unaligned bytenr %llu\n", 1679 (unsigned long long)block_ctx->dev_bytenr); 1680 return -1; 1681 } 1682 1683 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1684 PAGE_CACHE_SHIFT; 1685 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + 1686 sizeof(*block_ctx->pagev)) * 1687 num_pages, GFP_NOFS); 1688 if (!block_ctx->mem_to_free) 1689 return -1; 1690 block_ctx->datav = block_ctx->mem_to_free; 1691 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); 1692 for (i = 0; i < num_pages; i++) { 1693 block_ctx->pagev[i] = alloc_page(GFP_NOFS); 1694 if (!block_ctx->pagev[i]) 1695 return -1; 1696 } 1697 1698 dev_bytenr = block_ctx->dev_bytenr; 1699 for (i = 0; i < num_pages;) { 1700 struct bio *bio; 1701 unsigned int j; 1702 DECLARE_COMPLETION_ONSTACK(complete); 1703 1704 bio = bio_alloc(GFP_NOFS, num_pages - i); 1705 if (!bio) { 1706 printk(KERN_INFO 1707 "btrfsic: bio_alloc() for %u pages failed!\n", 1708 num_pages - i); 1709 return -1; 1710 } 1711 bio->bi_bdev = block_ctx->dev->bdev; 1712 bio->bi_sector = dev_bytenr >> 9; 1713 bio->bi_end_io = btrfsic_complete_bio_end_io; 1714 bio->bi_private = &complete; 1715 1716 for (j = i; j < num_pages; j++) { 1717 ret = bio_add_page(bio, block_ctx->pagev[j], 1718 PAGE_CACHE_SIZE, 0); 1719 if (PAGE_CACHE_SIZE != ret) 1720 break; 1721 } 1722 if (j == i) { 1723 printk(KERN_INFO 1724 "btrfsic: error, failed to add a single page!\n"); 1725 return -1; 1726 } 1727 submit_bio(READ, bio); 1728 1729 /* this will also unplug the queue */ 1730 wait_for_completion(&complete); 1731 1732 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 1733 printk(KERN_INFO 1734 "btrfsic: read error at logical %llu dev %s!\n", 1735 block_ctx->start, block_ctx->dev->name); 1736 bio_put(bio); 1737 return -1; 1738 } 1739 bio_put(bio); 1740 dev_bytenr += (j - i) * PAGE_CACHE_SIZE; 1741 i = j; 1742 } 1743 for (i = 0; i < num_pages; i++) { 1744 block_ctx->datav[i] = kmap(block_ctx->pagev[i]); 1745 if (!block_ctx->datav[i]) { 1746 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", 1747 block_ctx->dev->name); 1748 return -1; 1749 } 1750 } 1751 1752 return block_ctx->len; 1753 } 1754 1755 static void btrfsic_complete_bio_end_io(struct bio *bio, int err) 1756 { 1757 complete((struct completion *)bio->bi_private); 1758 } 1759 1760 static void btrfsic_dump_database(struct btrfsic_state *state) 1761 { 1762 struct list_head *elem_all; 1763 1764 BUG_ON(NULL == state); 1765 1766 printk(KERN_INFO "all_blocks_list:\n"); 1767 list_for_each(elem_all, &state->all_blocks_list) { 1768 const struct btrfsic_block *const b_all = 1769 list_entry(elem_all, struct btrfsic_block, 1770 all_blocks_node); 1771 struct list_head *elem_ref_to; 1772 struct list_head *elem_ref_from; 1773 1774 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", 1775 btrfsic_get_block_type(state, b_all), 1776 (unsigned long long)b_all->logical_bytenr, 1777 b_all->dev_state->name, 1778 (unsigned long long)b_all->dev_bytenr, 1779 b_all->mirror_num); 1780 1781 list_for_each(elem_ref_to, &b_all->ref_to_list) { 1782 const struct btrfsic_block_link *const l = 1783 list_entry(elem_ref_to, 1784 struct btrfsic_block_link, 1785 node_ref_to); 1786 1787 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1788 " refers %u* to" 1789 " %c @%llu (%s/%llu/%d)\n", 1790 btrfsic_get_block_type(state, b_all), 1791 (unsigned long long)b_all->logical_bytenr, 1792 b_all->dev_state->name, 1793 (unsigned long long)b_all->dev_bytenr, 1794 b_all->mirror_num, 1795 l->ref_cnt, 1796 btrfsic_get_block_type(state, l->block_ref_to), 1797 (unsigned long long) 1798 l->block_ref_to->logical_bytenr, 1799 l->block_ref_to->dev_state->name, 1800 (unsigned long long)l->block_ref_to->dev_bytenr, 1801 l->block_ref_to->mirror_num); 1802 } 1803 1804 list_for_each(elem_ref_from, &b_all->ref_from_list) { 1805 const struct btrfsic_block_link *const l = 1806 list_entry(elem_ref_from, 1807 struct btrfsic_block_link, 1808 node_ref_from); 1809 1810 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1811 " is ref %u* from" 1812 " %c @%llu (%s/%llu/%d)\n", 1813 btrfsic_get_block_type(state, b_all), 1814 (unsigned long long)b_all->logical_bytenr, 1815 b_all->dev_state->name, 1816 (unsigned long long)b_all->dev_bytenr, 1817 b_all->mirror_num, 1818 l->ref_cnt, 1819 btrfsic_get_block_type(state, l->block_ref_from), 1820 (unsigned long long) 1821 l->block_ref_from->logical_bytenr, 1822 l->block_ref_from->dev_state->name, 1823 (unsigned long long) 1824 l->block_ref_from->dev_bytenr, 1825 l->block_ref_from->mirror_num); 1826 } 1827 1828 printk(KERN_INFO "\n"); 1829 } 1830 } 1831 1832 /* 1833 * Test whether the disk block contains a tree block (leaf or node) 1834 * (note that this test fails for the super block) 1835 */ 1836 static int btrfsic_test_for_metadata(struct btrfsic_state *state, 1837 char **datav, unsigned int num_pages) 1838 { 1839 struct btrfs_header *h; 1840 u8 csum[BTRFS_CSUM_SIZE]; 1841 u32 crc = ~(u32)0; 1842 unsigned int i; 1843 1844 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) 1845 return 1; /* not metadata */ 1846 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; 1847 h = (struct btrfs_header *)datav[0]; 1848 1849 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) 1850 return 1; 1851 1852 for (i = 0; i < num_pages; i++) { 1853 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); 1854 size_t sublen = i ? PAGE_CACHE_SIZE : 1855 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); 1856 1857 crc = crc32c(crc, data, sublen); 1858 } 1859 btrfs_csum_final(crc, csum); 1860 if (memcmp(csum, h->csum, state->csum_size)) 1861 return 1; 1862 1863 return 0; /* is metadata */ 1864 } 1865 1866 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1867 u64 dev_bytenr, char **mapped_datav, 1868 unsigned int num_pages, 1869 struct bio *bio, int *bio_is_patched, 1870 struct buffer_head *bh, 1871 int submit_bio_bh_rw) 1872 { 1873 int is_metadata; 1874 struct btrfsic_block *block; 1875 struct btrfsic_block_data_ctx block_ctx; 1876 int ret; 1877 struct btrfsic_state *state = dev_state->state; 1878 struct block_device *bdev = dev_state->bdev; 1879 unsigned int processed_len; 1880 1881 if (NULL != bio_is_patched) 1882 *bio_is_patched = 0; 1883 1884 again: 1885 if (num_pages == 0) 1886 return; 1887 1888 processed_len = 0; 1889 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, 1890 num_pages)); 1891 1892 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1893 &state->block_hashtable); 1894 if (NULL != block) { 1895 u64 bytenr = 0; 1896 struct list_head *elem_ref_to; 1897 struct list_head *tmp_ref_to; 1898 1899 if (block->is_superblock) { 1900 bytenr = le64_to_cpu(((struct btrfs_super_block *) 1901 mapped_datav[0])->bytenr); 1902 if (num_pages * PAGE_CACHE_SIZE < 1903 BTRFS_SUPER_INFO_SIZE) { 1904 printk(KERN_INFO 1905 "btrfsic: cannot work with too short bios!\n"); 1906 return; 1907 } 1908 is_metadata = 1; 1909 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); 1910 processed_len = BTRFS_SUPER_INFO_SIZE; 1911 if (state->print_mask & 1912 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1913 printk(KERN_INFO 1914 "[before new superblock is written]:\n"); 1915 btrfsic_dump_tree_sub(state, block, 0); 1916 } 1917 } 1918 if (is_metadata) { 1919 if (!block->is_superblock) { 1920 if (num_pages * PAGE_CACHE_SIZE < 1921 state->metablock_size) { 1922 printk(KERN_INFO 1923 "btrfsic: cannot work with too short bios!\n"); 1924 return; 1925 } 1926 processed_len = state->metablock_size; 1927 bytenr = le64_to_cpu(((struct btrfs_header *) 1928 mapped_datav[0])->bytenr); 1929 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1930 dev_state, 1931 dev_bytenr); 1932 } 1933 if (block->logical_bytenr != bytenr) { 1934 printk(KERN_INFO 1935 "Written block @%llu (%s/%llu/%d)" 1936 " found in hash table, %c," 1937 " bytenr mismatch" 1938 " (!= stored %llu).\n", 1939 (unsigned long long)bytenr, 1940 dev_state->name, 1941 (unsigned long long)dev_bytenr, 1942 block->mirror_num, 1943 btrfsic_get_block_type(state, block), 1944 (unsigned long long) 1945 block->logical_bytenr); 1946 block->logical_bytenr = bytenr; 1947 } else if (state->print_mask & 1948 BTRFSIC_PRINT_MASK_VERBOSE) 1949 printk(KERN_INFO 1950 "Written block @%llu (%s/%llu/%d)" 1951 " found in hash table, %c.\n", 1952 (unsigned long long)bytenr, 1953 dev_state->name, 1954 (unsigned long long)dev_bytenr, 1955 block->mirror_num, 1956 btrfsic_get_block_type(state, block)); 1957 } else { 1958 if (num_pages * PAGE_CACHE_SIZE < 1959 state->datablock_size) { 1960 printk(KERN_INFO 1961 "btrfsic: cannot work with too short bios!\n"); 1962 return; 1963 } 1964 processed_len = state->datablock_size; 1965 bytenr = block->logical_bytenr; 1966 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1967 printk(KERN_INFO 1968 "Written block @%llu (%s/%llu/%d)" 1969 " found in hash table, %c.\n", 1970 (unsigned long long)bytenr, 1971 dev_state->name, 1972 (unsigned long long)dev_bytenr, 1973 block->mirror_num, 1974 btrfsic_get_block_type(state, block)); 1975 } 1976 1977 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1978 printk(KERN_INFO 1979 "ref_to_list: %cE, ref_from_list: %cE\n", 1980 list_empty(&block->ref_to_list) ? ' ' : '!', 1981 list_empty(&block->ref_from_list) ? ' ' : '!'); 1982 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { 1983 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1984 " @%llu (%s/%llu/%d), old(gen=%llu," 1985 " objectid=%llu, type=%d, offset=%llu)," 1986 " new(gen=%llu)," 1987 " which is referenced by most recent superblock" 1988 " (superblockgen=%llu)!\n", 1989 btrfsic_get_block_type(state, block), 1990 (unsigned long long)bytenr, 1991 dev_state->name, 1992 (unsigned long long)dev_bytenr, 1993 block->mirror_num, 1994 (unsigned long long)block->generation, 1995 (unsigned long long) 1996 le64_to_cpu(block->disk_key.objectid), 1997 block->disk_key.type, 1998 (unsigned long long) 1999 le64_to_cpu(block->disk_key.offset), 2000 (unsigned long long) 2001 le64_to_cpu(((struct btrfs_header *) 2002 mapped_datav[0])->generation), 2003 (unsigned long long) 2004 state->max_superblock_generation); 2005 btrfsic_dump_tree(state); 2006 } 2007 2008 if (!block->is_iodone && !block->never_written) { 2009 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 2010 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," 2011 " which is not yet iodone!\n", 2012 btrfsic_get_block_type(state, block), 2013 (unsigned long long)bytenr, 2014 dev_state->name, 2015 (unsigned long long)dev_bytenr, 2016 block->mirror_num, 2017 (unsigned long long)block->generation, 2018 (unsigned long long) 2019 le64_to_cpu(((struct btrfs_header *) 2020 mapped_datav[0])->generation)); 2021 /* it would not be safe to go on */ 2022 btrfsic_dump_tree(state); 2023 goto continue_loop; 2024 } 2025 2026 /* 2027 * Clear all references of this block. Do not free 2028 * the block itself even if is not referenced anymore 2029 * because it still carries valueable information 2030 * like whether it was ever written and IO completed. 2031 */ 2032 list_for_each_safe(elem_ref_to, tmp_ref_to, 2033 &block->ref_to_list) { 2034 struct btrfsic_block_link *const l = 2035 list_entry(elem_ref_to, 2036 struct btrfsic_block_link, 2037 node_ref_to); 2038 2039 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2040 btrfsic_print_rem_link(state, l); 2041 l->ref_cnt--; 2042 if (0 == l->ref_cnt) { 2043 list_del(&l->node_ref_to); 2044 list_del(&l->node_ref_from); 2045 btrfsic_block_link_hashtable_remove(l); 2046 btrfsic_block_link_free(l); 2047 } 2048 } 2049 2050 if (block->is_superblock) 2051 ret = btrfsic_map_superblock(state, bytenr, 2052 processed_len, 2053 bdev, &block_ctx); 2054 else 2055 ret = btrfsic_map_block(state, bytenr, processed_len, 2056 &block_ctx, 0); 2057 if (ret) { 2058 printk(KERN_INFO 2059 "btrfsic: btrfsic_map_block(root @%llu)" 2060 " failed!\n", (unsigned long long)bytenr); 2061 goto continue_loop; 2062 } 2063 block_ctx.datav = mapped_datav; 2064 /* the following is required in case of writes to mirrors, 2065 * use the same that was used for the lookup */ 2066 block_ctx.dev = dev_state; 2067 block_ctx.dev_bytenr = dev_bytenr; 2068 2069 if (is_metadata || state->include_extent_data) { 2070 block->never_written = 0; 2071 block->iodone_w_error = 0; 2072 if (NULL != bio) { 2073 block->is_iodone = 0; 2074 BUG_ON(NULL == bio_is_patched); 2075 if (!*bio_is_patched) { 2076 block->orig_bio_bh_private = 2077 bio->bi_private; 2078 block->orig_bio_bh_end_io.bio = 2079 bio->bi_end_io; 2080 block->next_in_same_bio = NULL; 2081 bio->bi_private = block; 2082 bio->bi_end_io = btrfsic_bio_end_io; 2083 *bio_is_patched = 1; 2084 } else { 2085 struct btrfsic_block *chained_block = 2086 (struct btrfsic_block *) 2087 bio->bi_private; 2088 2089 BUG_ON(NULL == chained_block); 2090 block->orig_bio_bh_private = 2091 chained_block->orig_bio_bh_private; 2092 block->orig_bio_bh_end_io.bio = 2093 chained_block->orig_bio_bh_end_io. 2094 bio; 2095 block->next_in_same_bio = chained_block; 2096 bio->bi_private = block; 2097 } 2098 } else if (NULL != bh) { 2099 block->is_iodone = 0; 2100 block->orig_bio_bh_private = bh->b_private; 2101 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2102 block->next_in_same_bio = NULL; 2103 bh->b_private = block; 2104 bh->b_end_io = btrfsic_bh_end_io; 2105 } else { 2106 block->is_iodone = 1; 2107 block->orig_bio_bh_private = NULL; 2108 block->orig_bio_bh_end_io.bio = NULL; 2109 block->next_in_same_bio = NULL; 2110 } 2111 } 2112 2113 block->flush_gen = dev_state->last_flush_gen + 1; 2114 block->submit_bio_bh_rw = submit_bio_bh_rw; 2115 if (is_metadata) { 2116 block->logical_bytenr = bytenr; 2117 block->is_metadata = 1; 2118 if (block->is_superblock) { 2119 BUG_ON(PAGE_CACHE_SIZE != 2120 BTRFS_SUPER_INFO_SIZE); 2121 ret = btrfsic_process_written_superblock( 2122 state, 2123 block, 2124 (struct btrfs_super_block *) 2125 mapped_datav[0]); 2126 if (state->print_mask & 2127 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 2128 printk(KERN_INFO 2129 "[after new superblock is written]:\n"); 2130 btrfsic_dump_tree_sub(state, block, 0); 2131 } 2132 } else { 2133 block->mirror_num = 0; /* unknown */ 2134 ret = btrfsic_process_metablock( 2135 state, 2136 block, 2137 &block_ctx, 2138 0, 0); 2139 } 2140 if (ret) 2141 printk(KERN_INFO 2142 "btrfsic: btrfsic_process_metablock" 2143 "(root @%llu) failed!\n", 2144 (unsigned long long)dev_bytenr); 2145 } else { 2146 block->is_metadata = 0; 2147 block->mirror_num = 0; /* unknown */ 2148 block->generation = BTRFSIC_GENERATION_UNKNOWN; 2149 if (!state->include_extent_data 2150 && list_empty(&block->ref_from_list)) { 2151 /* 2152 * disk block is overwritten with extent 2153 * data (not meta data) and we are configured 2154 * to not include extent data: take the 2155 * chance and free the block's memory 2156 */ 2157 btrfsic_block_hashtable_remove(block); 2158 list_del(&block->all_blocks_node); 2159 btrfsic_block_free(block); 2160 } 2161 } 2162 btrfsic_release_block_ctx(&block_ctx); 2163 } else { 2164 /* block has not been found in hash table */ 2165 u64 bytenr; 2166 2167 if (!is_metadata) { 2168 processed_len = state->datablock_size; 2169 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2170 printk(KERN_INFO "Written block (%s/%llu/?)" 2171 " !found in hash table, D.\n", 2172 dev_state->name, 2173 (unsigned long long)dev_bytenr); 2174 if (!state->include_extent_data) { 2175 /* ignore that written D block */ 2176 goto continue_loop; 2177 } 2178 2179 /* this is getting ugly for the 2180 * include_extent_data case... */ 2181 bytenr = 0; /* unknown */ 2182 block_ctx.start = bytenr; 2183 block_ctx.len = processed_len; 2184 block_ctx.mem_to_free = NULL; 2185 block_ctx.pagev = NULL; 2186 } else { 2187 processed_len = state->metablock_size; 2188 bytenr = le64_to_cpu(((struct btrfs_header *) 2189 mapped_datav[0])->bytenr); 2190 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2191 dev_bytenr); 2192 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2193 printk(KERN_INFO 2194 "Written block @%llu (%s/%llu/?)" 2195 " !found in hash table, M.\n", 2196 (unsigned long long)bytenr, 2197 dev_state->name, 2198 (unsigned long long)dev_bytenr); 2199 2200 ret = btrfsic_map_block(state, bytenr, processed_len, 2201 &block_ctx, 0); 2202 if (ret) { 2203 printk(KERN_INFO 2204 "btrfsic: btrfsic_map_block(root @%llu)" 2205 " failed!\n", 2206 (unsigned long long)dev_bytenr); 2207 goto continue_loop; 2208 } 2209 } 2210 block_ctx.datav = mapped_datav; 2211 /* the following is required in case of writes to mirrors, 2212 * use the same that was used for the lookup */ 2213 block_ctx.dev = dev_state; 2214 block_ctx.dev_bytenr = dev_bytenr; 2215 2216 block = btrfsic_block_alloc(); 2217 if (NULL == block) { 2218 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2219 btrfsic_release_block_ctx(&block_ctx); 2220 goto continue_loop; 2221 } 2222 block->dev_state = dev_state; 2223 block->dev_bytenr = dev_bytenr; 2224 block->logical_bytenr = bytenr; 2225 block->is_metadata = is_metadata; 2226 block->never_written = 0; 2227 block->iodone_w_error = 0; 2228 block->mirror_num = 0; /* unknown */ 2229 block->flush_gen = dev_state->last_flush_gen + 1; 2230 block->submit_bio_bh_rw = submit_bio_bh_rw; 2231 if (NULL != bio) { 2232 block->is_iodone = 0; 2233 BUG_ON(NULL == bio_is_patched); 2234 if (!*bio_is_patched) { 2235 block->orig_bio_bh_private = bio->bi_private; 2236 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 2237 block->next_in_same_bio = NULL; 2238 bio->bi_private = block; 2239 bio->bi_end_io = btrfsic_bio_end_io; 2240 *bio_is_patched = 1; 2241 } else { 2242 struct btrfsic_block *chained_block = 2243 (struct btrfsic_block *) 2244 bio->bi_private; 2245 2246 BUG_ON(NULL == chained_block); 2247 block->orig_bio_bh_private = 2248 chained_block->orig_bio_bh_private; 2249 block->orig_bio_bh_end_io.bio = 2250 chained_block->orig_bio_bh_end_io.bio; 2251 block->next_in_same_bio = chained_block; 2252 bio->bi_private = block; 2253 } 2254 } else if (NULL != bh) { 2255 block->is_iodone = 0; 2256 block->orig_bio_bh_private = bh->b_private; 2257 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2258 block->next_in_same_bio = NULL; 2259 bh->b_private = block; 2260 bh->b_end_io = btrfsic_bh_end_io; 2261 } else { 2262 block->is_iodone = 1; 2263 block->orig_bio_bh_private = NULL; 2264 block->orig_bio_bh_end_io.bio = NULL; 2265 block->next_in_same_bio = NULL; 2266 } 2267 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2268 printk(KERN_INFO 2269 "New written %c-block @%llu (%s/%llu/%d)\n", 2270 is_metadata ? 'M' : 'D', 2271 (unsigned long long)block->logical_bytenr, 2272 block->dev_state->name, 2273 (unsigned long long)block->dev_bytenr, 2274 block->mirror_num); 2275 list_add(&block->all_blocks_node, &state->all_blocks_list); 2276 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2277 2278 if (is_metadata) { 2279 ret = btrfsic_process_metablock(state, block, 2280 &block_ctx, 0, 0); 2281 if (ret) 2282 printk(KERN_INFO 2283 "btrfsic: process_metablock(root @%llu)" 2284 " failed!\n", 2285 (unsigned long long)dev_bytenr); 2286 } 2287 btrfsic_release_block_ctx(&block_ctx); 2288 } 2289 2290 continue_loop: 2291 BUG_ON(!processed_len); 2292 dev_bytenr += processed_len; 2293 mapped_datav += processed_len >> PAGE_CACHE_SHIFT; 2294 num_pages -= processed_len >> PAGE_CACHE_SHIFT; 2295 goto again; 2296 } 2297 2298 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) 2299 { 2300 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; 2301 int iodone_w_error; 2302 2303 /* mutex is not held! This is not save if IO is not yet completed 2304 * on umount */ 2305 iodone_w_error = 0; 2306 if (bio_error_status) 2307 iodone_w_error = 1; 2308 2309 BUG_ON(NULL == block); 2310 bp->bi_private = block->orig_bio_bh_private; 2311 bp->bi_end_io = block->orig_bio_bh_end_io.bio; 2312 2313 do { 2314 struct btrfsic_block *next_block; 2315 struct btrfsic_dev_state *const dev_state = block->dev_state; 2316 2317 if ((dev_state->state->print_mask & 2318 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2319 printk(KERN_INFO 2320 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2321 bio_error_status, 2322 btrfsic_get_block_type(dev_state->state, block), 2323 (unsigned long long)block->logical_bytenr, 2324 dev_state->name, 2325 (unsigned long long)block->dev_bytenr, 2326 block->mirror_num); 2327 next_block = block->next_in_same_bio; 2328 block->iodone_w_error = iodone_w_error; 2329 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2330 dev_state->last_flush_gen++; 2331 if ((dev_state->state->print_mask & 2332 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2333 printk(KERN_INFO 2334 "bio_end_io() new %s flush_gen=%llu\n", 2335 dev_state->name, 2336 (unsigned long long) 2337 dev_state->last_flush_gen); 2338 } 2339 if (block->submit_bio_bh_rw & REQ_FUA) 2340 block->flush_gen = 0; /* FUA completed means block is 2341 * on disk */ 2342 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2343 block = next_block; 2344 } while (NULL != block); 2345 2346 bp->bi_end_io(bp, bio_error_status); 2347 } 2348 2349 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) 2350 { 2351 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private; 2352 int iodone_w_error = !uptodate; 2353 struct btrfsic_dev_state *dev_state; 2354 2355 BUG_ON(NULL == block); 2356 dev_state = block->dev_state; 2357 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2358 printk(KERN_INFO 2359 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", 2360 iodone_w_error, 2361 btrfsic_get_block_type(dev_state->state, block), 2362 (unsigned long long)block->logical_bytenr, 2363 block->dev_state->name, 2364 (unsigned long long)block->dev_bytenr, 2365 block->mirror_num); 2366 2367 block->iodone_w_error = iodone_w_error; 2368 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2369 dev_state->last_flush_gen++; 2370 if ((dev_state->state->print_mask & 2371 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2372 printk(KERN_INFO 2373 "bh_end_io() new %s flush_gen=%llu\n", 2374 dev_state->name, 2375 (unsigned long long)dev_state->last_flush_gen); 2376 } 2377 if (block->submit_bio_bh_rw & REQ_FUA) 2378 block->flush_gen = 0; /* FUA completed means block is on disk */ 2379 2380 bh->b_private = block->orig_bio_bh_private; 2381 bh->b_end_io = block->orig_bio_bh_end_io.bh; 2382 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2383 bh->b_end_io(bh, uptodate); 2384 } 2385 2386 static int btrfsic_process_written_superblock( 2387 struct btrfsic_state *state, 2388 struct btrfsic_block *const superblock, 2389 struct btrfs_super_block *const super_hdr) 2390 { 2391 int pass; 2392 2393 superblock->generation = btrfs_super_generation(super_hdr); 2394 if (!(superblock->generation > state->max_superblock_generation || 2395 0 == state->max_superblock_generation)) { 2396 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2397 printk(KERN_INFO 2398 "btrfsic: superblock @%llu (%s/%llu/%d)" 2399 " with old gen %llu <= %llu\n", 2400 (unsigned long long)superblock->logical_bytenr, 2401 superblock->dev_state->name, 2402 (unsigned long long)superblock->dev_bytenr, 2403 superblock->mirror_num, 2404 (unsigned long long) 2405 btrfs_super_generation(super_hdr), 2406 (unsigned long long) 2407 state->max_superblock_generation); 2408 } else { 2409 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2410 printk(KERN_INFO 2411 "btrfsic: got new superblock @%llu (%s/%llu/%d)" 2412 " with new gen %llu > %llu\n", 2413 (unsigned long long)superblock->logical_bytenr, 2414 superblock->dev_state->name, 2415 (unsigned long long)superblock->dev_bytenr, 2416 superblock->mirror_num, 2417 (unsigned long long) 2418 btrfs_super_generation(super_hdr), 2419 (unsigned long long) 2420 state->max_superblock_generation); 2421 2422 state->max_superblock_generation = 2423 btrfs_super_generation(super_hdr); 2424 state->latest_superblock = superblock; 2425 } 2426 2427 for (pass = 0; pass < 3; pass++) { 2428 int ret; 2429 u64 next_bytenr; 2430 struct btrfsic_block *next_block; 2431 struct btrfsic_block_data_ctx tmp_next_block_ctx; 2432 struct btrfsic_block_link *l; 2433 int num_copies; 2434 int mirror_num; 2435 const char *additional_string = NULL; 2436 struct btrfs_disk_key tmp_disk_key; 2437 2438 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 2439 tmp_disk_key.offset = 0; 2440 2441 switch (pass) { 2442 case 0: 2443 tmp_disk_key.objectid = 2444 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 2445 additional_string = "root "; 2446 next_bytenr = btrfs_super_root(super_hdr); 2447 if (state->print_mask & 2448 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2449 printk(KERN_INFO "root@%llu\n", 2450 (unsigned long long)next_bytenr); 2451 break; 2452 case 1: 2453 tmp_disk_key.objectid = 2454 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 2455 additional_string = "chunk "; 2456 next_bytenr = btrfs_super_chunk_root(super_hdr); 2457 if (state->print_mask & 2458 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2459 printk(KERN_INFO "chunk@%llu\n", 2460 (unsigned long long)next_bytenr); 2461 break; 2462 case 2: 2463 tmp_disk_key.objectid = 2464 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 2465 additional_string = "log "; 2466 next_bytenr = btrfs_super_log_root(super_hdr); 2467 if (0 == next_bytenr) 2468 continue; 2469 if (state->print_mask & 2470 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2471 printk(KERN_INFO "log@%llu\n", 2472 (unsigned long long)next_bytenr); 2473 break; 2474 } 2475 2476 num_copies = 2477 btrfs_num_copies(state->root->fs_info, 2478 next_bytenr, BTRFS_SUPER_INFO_SIZE); 2479 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2480 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2481 (unsigned long long)next_bytenr, num_copies); 2482 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2483 int was_created; 2484 2485 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2486 printk(KERN_INFO 2487 "btrfsic_process_written_superblock(" 2488 "mirror_num=%d)\n", mirror_num); 2489 ret = btrfsic_map_block(state, next_bytenr, 2490 BTRFS_SUPER_INFO_SIZE, 2491 &tmp_next_block_ctx, 2492 mirror_num); 2493 if (ret) { 2494 printk(KERN_INFO 2495 "btrfsic: btrfsic_map_block(@%llu," 2496 " mirror=%d) failed!\n", 2497 (unsigned long long)next_bytenr, 2498 mirror_num); 2499 return -1; 2500 } 2501 2502 next_block = btrfsic_block_lookup_or_add( 2503 state, 2504 &tmp_next_block_ctx, 2505 additional_string, 2506 1, 0, 1, 2507 mirror_num, 2508 &was_created); 2509 if (NULL == next_block) { 2510 printk(KERN_INFO 2511 "btrfsic: error, kmalloc failed!\n"); 2512 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2513 return -1; 2514 } 2515 2516 next_block->disk_key = tmp_disk_key; 2517 if (was_created) 2518 next_block->generation = 2519 BTRFSIC_GENERATION_UNKNOWN; 2520 l = btrfsic_block_link_lookup_or_add( 2521 state, 2522 &tmp_next_block_ctx, 2523 next_block, 2524 superblock, 2525 BTRFSIC_GENERATION_UNKNOWN); 2526 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2527 if (NULL == l) 2528 return -1; 2529 } 2530 } 2531 2532 if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { 2533 WARN_ON(1); 2534 btrfsic_dump_tree(state); 2535 } 2536 2537 return 0; 2538 } 2539 2540 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 2541 struct btrfsic_block *const block, 2542 int recursion_level) 2543 { 2544 struct list_head *elem_ref_to; 2545 int ret = 0; 2546 2547 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2548 /* 2549 * Note that this situation can happen and does not 2550 * indicate an error in regular cases. It happens 2551 * when disk blocks are freed and later reused. 2552 * The check-integrity module is not aware of any 2553 * block free operations, it just recognizes block 2554 * write operations. Therefore it keeps the linkage 2555 * information for a block until a block is 2556 * rewritten. This can temporarily cause incorrect 2557 * and even circular linkage informations. This 2558 * causes no harm unless such blocks are referenced 2559 * by the most recent super block. 2560 */ 2561 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2562 printk(KERN_INFO 2563 "btrfsic: abort cyclic linkage (case 1).\n"); 2564 2565 return ret; 2566 } 2567 2568 /* 2569 * This algorithm is recursive because the amount of used stack 2570 * space is very small and the max recursion depth is limited. 2571 */ 2572 list_for_each(elem_ref_to, &block->ref_to_list) { 2573 const struct btrfsic_block_link *const l = 2574 list_entry(elem_ref_to, struct btrfsic_block_link, 2575 node_ref_to); 2576 2577 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2578 printk(KERN_INFO 2579 "rl=%d, %c @%llu (%s/%llu/%d)" 2580 " %u* refers to %c @%llu (%s/%llu/%d)\n", 2581 recursion_level, 2582 btrfsic_get_block_type(state, block), 2583 (unsigned long long)block->logical_bytenr, 2584 block->dev_state->name, 2585 (unsigned long long)block->dev_bytenr, 2586 block->mirror_num, 2587 l->ref_cnt, 2588 btrfsic_get_block_type(state, l->block_ref_to), 2589 (unsigned long long) 2590 l->block_ref_to->logical_bytenr, 2591 l->block_ref_to->dev_state->name, 2592 (unsigned long long)l->block_ref_to->dev_bytenr, 2593 l->block_ref_to->mirror_num); 2594 if (l->block_ref_to->never_written) { 2595 printk(KERN_INFO "btrfs: attempt to write superblock" 2596 " which references block %c @%llu (%s/%llu/%d)" 2597 " which is never written!\n", 2598 btrfsic_get_block_type(state, l->block_ref_to), 2599 (unsigned long long) 2600 l->block_ref_to->logical_bytenr, 2601 l->block_ref_to->dev_state->name, 2602 (unsigned long long)l->block_ref_to->dev_bytenr, 2603 l->block_ref_to->mirror_num); 2604 ret = -1; 2605 } else if (!l->block_ref_to->is_iodone) { 2606 printk(KERN_INFO "btrfs: attempt to write superblock" 2607 " which references block %c @%llu (%s/%llu/%d)" 2608 " which is not yet iodone!\n", 2609 btrfsic_get_block_type(state, l->block_ref_to), 2610 (unsigned long long) 2611 l->block_ref_to->logical_bytenr, 2612 l->block_ref_to->dev_state->name, 2613 (unsigned long long)l->block_ref_to->dev_bytenr, 2614 l->block_ref_to->mirror_num); 2615 ret = -1; 2616 } else if (l->block_ref_to->iodone_w_error) { 2617 printk(KERN_INFO "btrfs: attempt to write superblock" 2618 " which references block %c @%llu (%s/%llu/%d)" 2619 " which has write error!\n", 2620 btrfsic_get_block_type(state, l->block_ref_to), 2621 (unsigned long long) 2622 l->block_ref_to->logical_bytenr, 2623 l->block_ref_to->dev_state->name, 2624 (unsigned long long)l->block_ref_to->dev_bytenr, 2625 l->block_ref_to->mirror_num); 2626 ret = -1; 2627 } else if (l->parent_generation != 2628 l->block_ref_to->generation && 2629 BTRFSIC_GENERATION_UNKNOWN != 2630 l->parent_generation && 2631 BTRFSIC_GENERATION_UNKNOWN != 2632 l->block_ref_to->generation) { 2633 printk(KERN_INFO "btrfs: attempt to write superblock" 2634 " which references block %c @%llu (%s/%llu/%d)" 2635 " with generation %llu !=" 2636 " parent generation %llu!\n", 2637 btrfsic_get_block_type(state, l->block_ref_to), 2638 (unsigned long long) 2639 l->block_ref_to->logical_bytenr, 2640 l->block_ref_to->dev_state->name, 2641 (unsigned long long)l->block_ref_to->dev_bytenr, 2642 l->block_ref_to->mirror_num, 2643 (unsigned long long)l->block_ref_to->generation, 2644 (unsigned long long)l->parent_generation); 2645 ret = -1; 2646 } else if (l->block_ref_to->flush_gen > 2647 l->block_ref_to->dev_state->last_flush_gen) { 2648 printk(KERN_INFO "btrfs: attempt to write superblock" 2649 " which references block %c @%llu (%s/%llu/%d)" 2650 " which is not flushed out of disk's write cache" 2651 " (block flush_gen=%llu," 2652 " dev->flush_gen=%llu)!\n", 2653 btrfsic_get_block_type(state, l->block_ref_to), 2654 (unsigned long long) 2655 l->block_ref_to->logical_bytenr, 2656 l->block_ref_to->dev_state->name, 2657 (unsigned long long)l->block_ref_to->dev_bytenr, 2658 l->block_ref_to->mirror_num, 2659 (unsigned long long)block->flush_gen, 2660 (unsigned long long) 2661 l->block_ref_to->dev_state->last_flush_gen); 2662 ret = -1; 2663 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2664 l->block_ref_to, 2665 recursion_level + 2666 1)) { 2667 ret = -1; 2668 } 2669 } 2670 2671 return ret; 2672 } 2673 2674 static int btrfsic_is_block_ref_by_superblock( 2675 const struct btrfsic_state *state, 2676 const struct btrfsic_block *block, 2677 int recursion_level) 2678 { 2679 struct list_head *elem_ref_from; 2680 2681 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2682 /* refer to comment at "abort cyclic linkage (case 1)" */ 2683 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2684 printk(KERN_INFO 2685 "btrfsic: abort cyclic linkage (case 2).\n"); 2686 2687 return 0; 2688 } 2689 2690 /* 2691 * This algorithm is recursive because the amount of used stack space 2692 * is very small and the max recursion depth is limited. 2693 */ 2694 list_for_each(elem_ref_from, &block->ref_from_list) { 2695 const struct btrfsic_block_link *const l = 2696 list_entry(elem_ref_from, struct btrfsic_block_link, 2697 node_ref_from); 2698 2699 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2700 printk(KERN_INFO 2701 "rl=%d, %c @%llu (%s/%llu/%d)" 2702 " is ref %u* from %c @%llu (%s/%llu/%d)\n", 2703 recursion_level, 2704 btrfsic_get_block_type(state, block), 2705 (unsigned long long)block->logical_bytenr, 2706 block->dev_state->name, 2707 (unsigned long long)block->dev_bytenr, 2708 block->mirror_num, 2709 l->ref_cnt, 2710 btrfsic_get_block_type(state, l->block_ref_from), 2711 (unsigned long long) 2712 l->block_ref_from->logical_bytenr, 2713 l->block_ref_from->dev_state->name, 2714 (unsigned long long) 2715 l->block_ref_from->dev_bytenr, 2716 l->block_ref_from->mirror_num); 2717 if (l->block_ref_from->is_superblock && 2718 state->latest_superblock->dev_bytenr == 2719 l->block_ref_from->dev_bytenr && 2720 state->latest_superblock->dev_state->bdev == 2721 l->block_ref_from->dev_state->bdev) 2722 return 1; 2723 else if (btrfsic_is_block_ref_by_superblock(state, 2724 l->block_ref_from, 2725 recursion_level + 2726 1)) 2727 return 1; 2728 } 2729 2730 return 0; 2731 } 2732 2733 static void btrfsic_print_add_link(const struct btrfsic_state *state, 2734 const struct btrfsic_block_link *l) 2735 { 2736 printk(KERN_INFO 2737 "Add %u* link from %c @%llu (%s/%llu/%d)" 2738 " to %c @%llu (%s/%llu/%d).\n", 2739 l->ref_cnt, 2740 btrfsic_get_block_type(state, l->block_ref_from), 2741 (unsigned long long)l->block_ref_from->logical_bytenr, 2742 l->block_ref_from->dev_state->name, 2743 (unsigned long long)l->block_ref_from->dev_bytenr, 2744 l->block_ref_from->mirror_num, 2745 btrfsic_get_block_type(state, l->block_ref_to), 2746 (unsigned long long)l->block_ref_to->logical_bytenr, 2747 l->block_ref_to->dev_state->name, 2748 (unsigned long long)l->block_ref_to->dev_bytenr, 2749 l->block_ref_to->mirror_num); 2750 } 2751 2752 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 2753 const struct btrfsic_block_link *l) 2754 { 2755 printk(KERN_INFO 2756 "Rem %u* link from %c @%llu (%s/%llu/%d)" 2757 " to %c @%llu (%s/%llu/%d).\n", 2758 l->ref_cnt, 2759 btrfsic_get_block_type(state, l->block_ref_from), 2760 (unsigned long long)l->block_ref_from->logical_bytenr, 2761 l->block_ref_from->dev_state->name, 2762 (unsigned long long)l->block_ref_from->dev_bytenr, 2763 l->block_ref_from->mirror_num, 2764 btrfsic_get_block_type(state, l->block_ref_to), 2765 (unsigned long long)l->block_ref_to->logical_bytenr, 2766 l->block_ref_to->dev_state->name, 2767 (unsigned long long)l->block_ref_to->dev_bytenr, 2768 l->block_ref_to->mirror_num); 2769 } 2770 2771 static char btrfsic_get_block_type(const struct btrfsic_state *state, 2772 const struct btrfsic_block *block) 2773 { 2774 if (block->is_superblock && 2775 state->latest_superblock->dev_bytenr == block->dev_bytenr && 2776 state->latest_superblock->dev_state->bdev == block->dev_state->bdev) 2777 return 'S'; 2778 else if (block->is_superblock) 2779 return 's'; 2780 else if (block->is_metadata) 2781 return 'M'; 2782 else 2783 return 'D'; 2784 } 2785 2786 static void btrfsic_dump_tree(const struct btrfsic_state *state) 2787 { 2788 btrfsic_dump_tree_sub(state, state->latest_superblock, 0); 2789 } 2790 2791 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 2792 const struct btrfsic_block *block, 2793 int indent_level) 2794 { 2795 struct list_head *elem_ref_to; 2796 int indent_add; 2797 static char buf[80]; 2798 int cursor_position; 2799 2800 /* 2801 * Should better fill an on-stack buffer with a complete line and 2802 * dump it at once when it is time to print a newline character. 2803 */ 2804 2805 /* 2806 * This algorithm is recursive because the amount of used stack space 2807 * is very small and the max recursion depth is limited. 2808 */ 2809 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", 2810 btrfsic_get_block_type(state, block), 2811 (unsigned long long)block->logical_bytenr, 2812 block->dev_state->name, 2813 (unsigned long long)block->dev_bytenr, 2814 block->mirror_num); 2815 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2816 printk("[...]\n"); 2817 return; 2818 } 2819 printk(buf); 2820 indent_level += indent_add; 2821 if (list_empty(&block->ref_to_list)) { 2822 printk("\n"); 2823 return; 2824 } 2825 if (block->mirror_num > 1 && 2826 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { 2827 printk(" [...]\n"); 2828 return; 2829 } 2830 2831 cursor_position = indent_level; 2832 list_for_each(elem_ref_to, &block->ref_to_list) { 2833 const struct btrfsic_block_link *const l = 2834 list_entry(elem_ref_to, struct btrfsic_block_link, 2835 node_ref_to); 2836 2837 while (cursor_position < indent_level) { 2838 printk(" "); 2839 cursor_position++; 2840 } 2841 if (l->ref_cnt > 1) 2842 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); 2843 else 2844 indent_add = sprintf(buf, " --> "); 2845 if (indent_level + indent_add > 2846 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2847 printk("[...]\n"); 2848 cursor_position = 0; 2849 continue; 2850 } 2851 2852 printk(buf); 2853 2854 btrfsic_dump_tree_sub(state, l->block_ref_to, 2855 indent_level + indent_add); 2856 cursor_position = 0; 2857 } 2858 } 2859 2860 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 2861 struct btrfsic_state *state, 2862 struct btrfsic_block_data_ctx *next_block_ctx, 2863 struct btrfsic_block *next_block, 2864 struct btrfsic_block *from_block, 2865 u64 parent_generation) 2866 { 2867 struct btrfsic_block_link *l; 2868 2869 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, 2870 next_block_ctx->dev_bytenr, 2871 from_block->dev_state->bdev, 2872 from_block->dev_bytenr, 2873 &state->block_link_hashtable); 2874 if (NULL == l) { 2875 l = btrfsic_block_link_alloc(); 2876 if (NULL == l) { 2877 printk(KERN_INFO 2878 "btrfsic: error, kmalloc" " failed!\n"); 2879 return NULL; 2880 } 2881 2882 l->block_ref_to = next_block; 2883 l->block_ref_from = from_block; 2884 l->ref_cnt = 1; 2885 l->parent_generation = parent_generation; 2886 2887 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2888 btrfsic_print_add_link(state, l); 2889 2890 list_add(&l->node_ref_to, &from_block->ref_to_list); 2891 list_add(&l->node_ref_from, &next_block->ref_from_list); 2892 2893 btrfsic_block_link_hashtable_add(l, 2894 &state->block_link_hashtable); 2895 } else { 2896 l->ref_cnt++; 2897 l->parent_generation = parent_generation; 2898 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2899 btrfsic_print_add_link(state, l); 2900 } 2901 2902 return l; 2903 } 2904 2905 static struct btrfsic_block *btrfsic_block_lookup_or_add( 2906 struct btrfsic_state *state, 2907 struct btrfsic_block_data_ctx *block_ctx, 2908 const char *additional_string, 2909 int is_metadata, 2910 int is_iodone, 2911 int never_written, 2912 int mirror_num, 2913 int *was_created) 2914 { 2915 struct btrfsic_block *block; 2916 2917 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, 2918 block_ctx->dev_bytenr, 2919 &state->block_hashtable); 2920 if (NULL == block) { 2921 struct btrfsic_dev_state *dev_state; 2922 2923 block = btrfsic_block_alloc(); 2924 if (NULL == block) { 2925 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2926 return NULL; 2927 } 2928 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev); 2929 if (NULL == dev_state) { 2930 printk(KERN_INFO 2931 "btrfsic: error, lookup dev_state failed!\n"); 2932 btrfsic_block_free(block); 2933 return NULL; 2934 } 2935 block->dev_state = dev_state; 2936 block->dev_bytenr = block_ctx->dev_bytenr; 2937 block->logical_bytenr = block_ctx->start; 2938 block->is_metadata = is_metadata; 2939 block->is_iodone = is_iodone; 2940 block->never_written = never_written; 2941 block->mirror_num = mirror_num; 2942 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2943 printk(KERN_INFO 2944 "New %s%c-block @%llu (%s/%llu/%d)\n", 2945 additional_string, 2946 btrfsic_get_block_type(state, block), 2947 (unsigned long long)block->logical_bytenr, 2948 dev_state->name, 2949 (unsigned long long)block->dev_bytenr, 2950 mirror_num); 2951 list_add(&block->all_blocks_node, &state->all_blocks_list); 2952 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2953 if (NULL != was_created) 2954 *was_created = 1; 2955 } else { 2956 if (NULL != was_created) 2957 *was_created = 0; 2958 } 2959 2960 return block; 2961 } 2962 2963 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2964 u64 bytenr, 2965 struct btrfsic_dev_state *dev_state, 2966 u64 dev_bytenr) 2967 { 2968 int num_copies; 2969 int mirror_num; 2970 int ret; 2971 struct btrfsic_block_data_ctx block_ctx; 2972 int match = 0; 2973 2974 num_copies = btrfs_num_copies(state->root->fs_info, 2975 bytenr, state->metablock_size); 2976 2977 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2978 ret = btrfsic_map_block(state, bytenr, state->metablock_size, 2979 &block_ctx, mirror_num); 2980 if (ret) { 2981 printk(KERN_INFO "btrfsic:" 2982 " btrfsic_map_block(logical @%llu," 2983 " mirror %d) failed!\n", 2984 (unsigned long long)bytenr, mirror_num); 2985 continue; 2986 } 2987 2988 if (dev_state->bdev == block_ctx.dev->bdev && 2989 dev_bytenr == block_ctx.dev_bytenr) { 2990 match++; 2991 btrfsic_release_block_ctx(&block_ctx); 2992 break; 2993 } 2994 btrfsic_release_block_ctx(&block_ctx); 2995 } 2996 2997 if (!match) { 2998 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," 2999 " buffer->log_bytenr=%llu, submit_bio(bdev=%s," 3000 " phys_bytenr=%llu)!\n", 3001 (unsigned long long)bytenr, dev_state->name, 3002 (unsigned long long)dev_bytenr); 3003 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 3004 ret = btrfsic_map_block(state, bytenr, 3005 state->metablock_size, 3006 &block_ctx, mirror_num); 3007 if (ret) 3008 continue; 3009 3010 printk(KERN_INFO "Read logical bytenr @%llu maps to" 3011 " (%s/%llu/%d)\n", 3012 (unsigned long long)bytenr, 3013 block_ctx.dev->name, 3014 (unsigned long long)block_ctx.dev_bytenr, 3015 mirror_num); 3016 } 3017 WARN_ON(1); 3018 } 3019 } 3020 3021 static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 3022 struct block_device *bdev) 3023 { 3024 struct btrfsic_dev_state *ds; 3025 3026 ds = btrfsic_dev_state_hashtable_lookup(bdev, 3027 &btrfsic_dev_state_hashtable); 3028 return ds; 3029 } 3030 3031 int btrfsic_submit_bh(int rw, struct buffer_head *bh) 3032 { 3033 struct btrfsic_dev_state *dev_state; 3034 3035 if (!btrfsic_is_initialized) 3036 return submit_bh(rw, bh); 3037 3038 mutex_lock(&btrfsic_mutex); 3039 /* since btrfsic_submit_bh() might also be called before 3040 * btrfsic_mount(), this might return NULL */ 3041 dev_state = btrfsic_dev_state_lookup(bh->b_bdev); 3042 3043 /* Only called to write the superblock (incl. FLUSH/FUA) */ 3044 if (NULL != dev_state && 3045 (rw & WRITE) && bh->b_size > 0) { 3046 u64 dev_bytenr; 3047 3048 dev_bytenr = 4096 * bh->b_blocknr; 3049 if (dev_state->state->print_mask & 3050 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3051 printk(KERN_INFO 3052 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," 3053 " size=%lu, data=%p, bdev=%p)\n", 3054 rw, (unsigned long)bh->b_blocknr, 3055 (unsigned long long)dev_bytenr, 3056 (unsigned long)bh->b_size, bh->b_data, 3057 bh->b_bdev); 3058 btrfsic_process_written_block(dev_state, dev_bytenr, 3059 &bh->b_data, 1, NULL, 3060 NULL, bh, rw); 3061 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3062 if (dev_state->state->print_mask & 3063 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3064 printk(KERN_INFO 3065 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", 3066 rw, bh->b_bdev); 3067 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3068 if ((dev_state->state->print_mask & 3069 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3070 BTRFSIC_PRINT_MASK_VERBOSE))) 3071 printk(KERN_INFO 3072 "btrfsic_submit_bh(%s) with FLUSH" 3073 " but dummy block already in use" 3074 " (ignored)!\n", 3075 dev_state->name); 3076 } else { 3077 struct btrfsic_block *const block = 3078 &dev_state->dummy_block_for_bio_bh_flush; 3079 3080 block->is_iodone = 0; 3081 block->never_written = 0; 3082 block->iodone_w_error = 0; 3083 block->flush_gen = dev_state->last_flush_gen + 1; 3084 block->submit_bio_bh_rw = rw; 3085 block->orig_bio_bh_private = bh->b_private; 3086 block->orig_bio_bh_end_io.bh = bh->b_end_io; 3087 block->next_in_same_bio = NULL; 3088 bh->b_private = block; 3089 bh->b_end_io = btrfsic_bh_end_io; 3090 } 3091 } 3092 mutex_unlock(&btrfsic_mutex); 3093 return submit_bh(rw, bh); 3094 } 3095 3096 void btrfsic_submit_bio(int rw, struct bio *bio) 3097 { 3098 struct btrfsic_dev_state *dev_state; 3099 3100 if (!btrfsic_is_initialized) { 3101 submit_bio(rw, bio); 3102 return; 3103 } 3104 3105 mutex_lock(&btrfsic_mutex); 3106 /* since btrfsic_submit_bio() is also called before 3107 * btrfsic_mount(), this might return NULL */ 3108 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); 3109 if (NULL != dev_state && 3110 (rw & WRITE) && NULL != bio->bi_io_vec) { 3111 unsigned int i; 3112 u64 dev_bytenr; 3113 int bio_is_patched; 3114 char **mapped_datav; 3115 3116 dev_bytenr = 512 * bio->bi_sector; 3117 bio_is_patched = 0; 3118 if (dev_state->state->print_mask & 3119 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3120 printk(KERN_INFO 3121 "submit_bio(rw=0x%x, bi_vcnt=%u," 3122 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", 3123 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, 3124 (unsigned long long)dev_bytenr, 3125 bio->bi_bdev); 3126 3127 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3128 GFP_NOFS); 3129 if (!mapped_datav) 3130 goto leave; 3131 for (i = 0; i < bio->bi_vcnt; i++) { 3132 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); 3133 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); 3134 if (!mapped_datav[i]) { 3135 while (i > 0) { 3136 i--; 3137 kunmap(bio->bi_io_vec[i].bv_page); 3138 } 3139 kfree(mapped_datav); 3140 goto leave; 3141 } 3142 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3143 BTRFSIC_PRINT_MASK_VERBOSE) == 3144 (dev_state->state->print_mask & 3145 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3146 BTRFSIC_PRINT_MASK_VERBOSE))) 3147 printk(KERN_INFO 3148 "#%u: page=%p, len=%u, offset=%u\n", 3149 i, bio->bi_io_vec[i].bv_page, 3150 bio->bi_io_vec[i].bv_len, 3151 bio->bi_io_vec[i].bv_offset); 3152 } 3153 btrfsic_process_written_block(dev_state, dev_bytenr, 3154 mapped_datav, bio->bi_vcnt, 3155 bio, &bio_is_patched, 3156 NULL, rw); 3157 while (i > 0) { 3158 i--; 3159 kunmap(bio->bi_io_vec[i].bv_page); 3160 } 3161 kfree(mapped_datav); 3162 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3163 if (dev_state->state->print_mask & 3164 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3165 printk(KERN_INFO 3166 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", 3167 rw, bio->bi_bdev); 3168 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3169 if ((dev_state->state->print_mask & 3170 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3171 BTRFSIC_PRINT_MASK_VERBOSE))) 3172 printk(KERN_INFO 3173 "btrfsic_submit_bio(%s) with FLUSH" 3174 " but dummy block already in use" 3175 " (ignored)!\n", 3176 dev_state->name); 3177 } else { 3178 struct btrfsic_block *const block = 3179 &dev_state->dummy_block_for_bio_bh_flush; 3180 3181 block->is_iodone = 0; 3182 block->never_written = 0; 3183 block->iodone_w_error = 0; 3184 block->flush_gen = dev_state->last_flush_gen + 1; 3185 block->submit_bio_bh_rw = rw; 3186 block->orig_bio_bh_private = bio->bi_private; 3187 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 3188 block->next_in_same_bio = NULL; 3189 bio->bi_private = block; 3190 bio->bi_end_io = btrfsic_bio_end_io; 3191 } 3192 } 3193 leave: 3194 mutex_unlock(&btrfsic_mutex); 3195 3196 submit_bio(rw, bio); 3197 } 3198 3199 int btrfsic_mount(struct btrfs_root *root, 3200 struct btrfs_fs_devices *fs_devices, 3201 int including_extent_data, u32 print_mask) 3202 { 3203 int ret; 3204 struct btrfsic_state *state; 3205 struct list_head *dev_head = &fs_devices->devices; 3206 struct btrfs_device *device; 3207 3208 if (root->nodesize != root->leafsize) { 3209 printk(KERN_INFO 3210 "btrfsic: cannot handle nodesize %d != leafsize %d!\n", 3211 root->nodesize, root->leafsize); 3212 return -1; 3213 } 3214 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { 3215 printk(KERN_INFO 3216 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3217 root->nodesize, (unsigned long)PAGE_CACHE_SIZE); 3218 return -1; 3219 } 3220 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3221 printk(KERN_INFO 3222 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3223 root->leafsize, (unsigned long)PAGE_CACHE_SIZE); 3224 return -1; 3225 } 3226 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3227 printk(KERN_INFO 3228 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3229 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); 3230 return -1; 3231 } 3232 state = kzalloc(sizeof(*state), GFP_NOFS); 3233 if (NULL == state) { 3234 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); 3235 return -1; 3236 } 3237 3238 if (!btrfsic_is_initialized) { 3239 mutex_init(&btrfsic_mutex); 3240 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); 3241 btrfsic_is_initialized = 1; 3242 } 3243 mutex_lock(&btrfsic_mutex); 3244 state->root = root; 3245 state->print_mask = print_mask; 3246 state->include_extent_data = including_extent_data; 3247 state->csum_size = 0; 3248 state->metablock_size = root->nodesize; 3249 state->datablock_size = root->sectorsize; 3250 INIT_LIST_HEAD(&state->all_blocks_list); 3251 btrfsic_block_hashtable_init(&state->block_hashtable); 3252 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 3253 state->max_superblock_generation = 0; 3254 state->latest_superblock = NULL; 3255 3256 list_for_each_entry(device, dev_head, dev_list) { 3257 struct btrfsic_dev_state *ds; 3258 char *p; 3259 3260 if (!device->bdev || !device->name) 3261 continue; 3262 3263 ds = btrfsic_dev_state_alloc(); 3264 if (NULL == ds) { 3265 printk(KERN_INFO 3266 "btrfs check-integrity: kmalloc() failed!\n"); 3267 mutex_unlock(&btrfsic_mutex); 3268 return -1; 3269 } 3270 ds->bdev = device->bdev; 3271 ds->state = state; 3272 bdevname(ds->bdev, ds->name); 3273 ds->name[BDEVNAME_SIZE - 1] = '\0'; 3274 for (p = ds->name; *p != '\0'; p++); 3275 while (p > ds->name && *p != '/') 3276 p--; 3277 if (*p == '/') 3278 p++; 3279 strlcpy(ds->name, p, sizeof(ds->name)); 3280 btrfsic_dev_state_hashtable_add(ds, 3281 &btrfsic_dev_state_hashtable); 3282 } 3283 3284 ret = btrfsic_process_superblock(state, fs_devices); 3285 if (0 != ret) { 3286 mutex_unlock(&btrfsic_mutex); 3287 btrfsic_unmount(root, fs_devices); 3288 return ret; 3289 } 3290 3291 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) 3292 btrfsic_dump_database(state); 3293 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) 3294 btrfsic_dump_tree(state); 3295 3296 mutex_unlock(&btrfsic_mutex); 3297 return 0; 3298 } 3299 3300 void btrfsic_unmount(struct btrfs_root *root, 3301 struct btrfs_fs_devices *fs_devices) 3302 { 3303 struct list_head *elem_all; 3304 struct list_head *tmp_all; 3305 struct btrfsic_state *state; 3306 struct list_head *dev_head = &fs_devices->devices; 3307 struct btrfs_device *device; 3308 3309 if (!btrfsic_is_initialized) 3310 return; 3311 3312 mutex_lock(&btrfsic_mutex); 3313 3314 state = NULL; 3315 list_for_each_entry(device, dev_head, dev_list) { 3316 struct btrfsic_dev_state *ds; 3317 3318 if (!device->bdev || !device->name) 3319 continue; 3320 3321 ds = btrfsic_dev_state_hashtable_lookup( 3322 device->bdev, 3323 &btrfsic_dev_state_hashtable); 3324 if (NULL != ds) { 3325 state = ds->state; 3326 btrfsic_dev_state_hashtable_remove(ds); 3327 btrfsic_dev_state_free(ds); 3328 } 3329 } 3330 3331 if (NULL == state) { 3332 printk(KERN_INFO 3333 "btrfsic: error, cannot find state information" 3334 " on umount!\n"); 3335 mutex_unlock(&btrfsic_mutex); 3336 return; 3337 } 3338 3339 /* 3340 * Don't care about keeping the lists' state up to date, 3341 * just free all memory that was allocated dynamically. 3342 * Free the blocks and the block_links. 3343 */ 3344 list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) { 3345 struct btrfsic_block *const b_all = 3346 list_entry(elem_all, struct btrfsic_block, 3347 all_blocks_node); 3348 struct list_head *elem_ref_to; 3349 struct list_head *tmp_ref_to; 3350 3351 list_for_each_safe(elem_ref_to, tmp_ref_to, 3352 &b_all->ref_to_list) { 3353 struct btrfsic_block_link *const l = 3354 list_entry(elem_ref_to, 3355 struct btrfsic_block_link, 3356 node_ref_to); 3357 3358 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 3359 btrfsic_print_rem_link(state, l); 3360 3361 l->ref_cnt--; 3362 if (0 == l->ref_cnt) 3363 btrfsic_block_link_free(l); 3364 } 3365 3366 if (b_all->is_iodone || b_all->never_written) 3367 btrfsic_block_free(b_all); 3368 else 3369 printk(KERN_INFO "btrfs: attempt to free %c-block" 3370 " @%llu (%s/%llu/%d) on umount which is" 3371 " not yet iodone!\n", 3372 btrfsic_get_block_type(state, b_all), 3373 (unsigned long long)b_all->logical_bytenr, 3374 b_all->dev_state->name, 3375 (unsigned long long)b_all->dev_bytenr, 3376 b_all->mirror_num); 3377 } 3378 3379 mutex_unlock(&btrfsic_mutex); 3380 3381 kfree(state); 3382 } 3383