1 /* 2 * Copyright (C) STRATO AG 2011. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 /* 20 * This module can be used to catch cases when the btrfs kernel 21 * code executes write requests to the disk that bring the file 22 * system in an inconsistent state. In such a state, a power-loss 23 * or kernel panic event would cause that the data on disk is 24 * lost or at least damaged. 25 * 26 * Code is added that examines all block write requests during 27 * runtime (including writes of the super block). Three rules 28 * are verified and an error is printed on violation of the 29 * rules: 30 * 1. It is not allowed to write a disk block which is 31 * currently referenced by the super block (either directly 32 * or indirectly). 33 * 2. When a super block is written, it is verified that all 34 * referenced (directly or indirectly) blocks fulfill the 35 * following requirements: 36 * 2a. All referenced blocks have either been present when 37 * the file system was mounted, (i.e., they have been 38 * referenced by the super block) or they have been 39 * written since then and the write completion callback 40 * was called and a FLUSH request to the device where 41 * these blocks are located was received and completed. 42 * 2b. All referenced blocks need to have a generation 43 * number which is equal to the parent's number. 44 * 45 * One issue that was found using this module was that the log 46 * tree on disk became temporarily corrupted because disk blocks 47 * that had been in use for the log tree had been freed and 48 * reused too early, while being referenced by the written super 49 * block. 50 * 51 * The search term in the kernel log that can be used to filter 52 * on the existence of detected integrity issues is 53 * "btrfs: attempt". 54 * 55 * The integrity check is enabled via mount options. These 56 * mount options are only supported if the integrity check 57 * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY. 58 * 59 * Example #1, apply integrity checks to all metadata: 60 * mount /dev/sdb1 /mnt -o check_int 61 * 62 * Example #2, apply integrity checks to all metadata and 63 * to data extents: 64 * mount /dev/sdb1 /mnt -o check_int_data 65 * 66 * Example #3, apply integrity checks to all metadata and dump 67 * the tree that the super block references to kernel messages 68 * each time after a super block was written: 69 * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263 70 * 71 * If the integrity check tool is included and activated in 72 * the mount options, plenty of kernel memory is used, and 73 * plenty of additional CPU cycles are spent. Enabling this 74 * functionality is not intended for normal use. In most 75 * cases, unless you are a btrfs developer who needs to verify 76 * the integrity of (super)-block write requests, do not 77 * enable the config option BTRFS_FS_CHECK_INTEGRITY to 78 * include and compile the integrity check tool. 79 */ 80 81 #include <linux/sched.h> 82 #include <linux/slab.h> 83 #include <linux/buffer_head.h> 84 #include <linux/mutex.h> 85 #include <linux/crc32c.h> 86 #include <linux/genhd.h> 87 #include <linux/blkdev.h> 88 #include "ctree.h" 89 #include "disk-io.h" 90 #include "transaction.h" 91 #include "extent_io.h" 92 #include "volumes.h" 93 #include "print-tree.h" 94 #include "locking.h" 95 #include "check-integrity.h" 96 #include "rcu-string.h" 97 98 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 99 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 100 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100 101 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051 102 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807 103 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530 104 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300 105 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6) /* in characters, 106 * excluding " [...]" */ 107 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1) 108 109 /* 110 * The definition of the bitmask fields for the print_mask. 111 * They are specified with the mount option check_integrity_print_mask. 112 */ 113 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE 0x00000001 114 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION 0x00000002 115 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE 0x00000004 116 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE 0x00000008 117 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH 0x00000010 118 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH 0x00000020 119 #define BTRFSIC_PRINT_MASK_VERBOSE 0x00000040 120 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE 0x00000080 121 #define BTRFSIC_PRINT_MASK_INITIAL_TREE 0x00000100 122 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES 0x00000200 123 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE 0x00000400 124 #define BTRFSIC_PRINT_MASK_NUM_COPIES 0x00000800 125 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS 0x00001000 126 127 struct btrfsic_dev_state; 128 struct btrfsic_state; 129 130 struct btrfsic_block { 131 u32 magic_num; /* only used for debug purposes */ 132 unsigned int is_metadata:1; /* if it is meta-data, not data-data */ 133 unsigned int is_superblock:1; /* if it is one of the superblocks */ 134 unsigned int is_iodone:1; /* if is done by lower subsystem */ 135 unsigned int iodone_w_error:1; /* error was indicated to endio */ 136 unsigned int never_written:1; /* block was added because it was 137 * referenced, not because it was 138 * written */ 139 unsigned int mirror_num:2; /* large enough to hold 140 * BTRFS_SUPER_MIRROR_MAX */ 141 struct btrfsic_dev_state *dev_state; 142 u64 dev_bytenr; /* key, physical byte num on disk */ 143 u64 logical_bytenr; /* logical byte num on disk */ 144 u64 generation; 145 struct btrfs_disk_key disk_key; /* extra info to print in case of 146 * issues, will not always be correct */ 147 struct list_head collision_resolving_node; /* list node */ 148 struct list_head all_blocks_node; /* list node */ 149 150 /* the following two lists contain block_link items */ 151 struct list_head ref_to_list; /* list */ 152 struct list_head ref_from_list; /* list */ 153 struct btrfsic_block *next_in_same_bio; 154 void *orig_bio_bh_private; 155 union { 156 bio_end_io_t *bio; 157 bh_end_io_t *bh; 158 } orig_bio_bh_end_io; 159 int submit_bio_bh_rw; 160 u64 flush_gen; /* only valid if !never_written */ 161 }; 162 163 /* 164 * Elements of this type are allocated dynamically and required because 165 * each block object can refer to and can be ref from multiple blocks. 166 * The key to lookup them in the hashtable is the dev_bytenr of 167 * the block ref to plus the one from the block refered from. 168 * The fact that they are searchable via a hashtable and that a 169 * ref_cnt is maintained is not required for the btrfs integrity 170 * check algorithm itself, it is only used to make the output more 171 * beautiful in case that an error is detected (an error is defined 172 * as a write operation to a block while that block is still referenced). 173 */ 174 struct btrfsic_block_link { 175 u32 magic_num; /* only used for debug purposes */ 176 u32 ref_cnt; 177 struct list_head node_ref_to; /* list node */ 178 struct list_head node_ref_from; /* list node */ 179 struct list_head collision_resolving_node; /* list node */ 180 struct btrfsic_block *block_ref_to; 181 struct btrfsic_block *block_ref_from; 182 u64 parent_generation; 183 }; 184 185 struct btrfsic_dev_state { 186 u32 magic_num; /* only used for debug purposes */ 187 struct block_device *bdev; 188 struct btrfsic_state *state; 189 struct list_head collision_resolving_node; /* list node */ 190 struct btrfsic_block dummy_block_for_bio_bh_flush; 191 u64 last_flush_gen; 192 char name[BDEVNAME_SIZE]; 193 }; 194 195 struct btrfsic_block_hashtable { 196 struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE]; 197 }; 198 199 struct btrfsic_block_link_hashtable { 200 struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE]; 201 }; 202 203 struct btrfsic_dev_state_hashtable { 204 struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE]; 205 }; 206 207 struct btrfsic_block_data_ctx { 208 u64 start; /* virtual bytenr */ 209 u64 dev_bytenr; /* physical bytenr on device */ 210 u32 len; 211 struct btrfsic_dev_state *dev; 212 char **datav; 213 struct page **pagev; 214 void *mem_to_free; 215 }; 216 217 /* This structure is used to implement recursion without occupying 218 * any stack space, refer to btrfsic_process_metablock() */ 219 struct btrfsic_stack_frame { 220 u32 magic; 221 u32 nr; 222 int error; 223 int i; 224 int limit_nesting; 225 int num_copies; 226 int mirror_num; 227 struct btrfsic_block *block; 228 struct btrfsic_block_data_ctx *block_ctx; 229 struct btrfsic_block *next_block; 230 struct btrfsic_block_data_ctx next_block_ctx; 231 struct btrfs_header *hdr; 232 struct btrfsic_stack_frame *prev; 233 }; 234 235 /* Some state per mounted filesystem */ 236 struct btrfsic_state { 237 u32 print_mask; 238 int include_extent_data; 239 int csum_size; 240 struct list_head all_blocks_list; 241 struct btrfsic_block_hashtable block_hashtable; 242 struct btrfsic_block_link_hashtable block_link_hashtable; 243 struct btrfs_root *root; 244 u64 max_superblock_generation; 245 struct btrfsic_block *latest_superblock; 246 u32 metablock_size; 247 u32 datablock_size; 248 }; 249 250 static void btrfsic_block_init(struct btrfsic_block *b); 251 static struct btrfsic_block *btrfsic_block_alloc(void); 252 static void btrfsic_block_free(struct btrfsic_block *b); 253 static void btrfsic_block_link_init(struct btrfsic_block_link *n); 254 static struct btrfsic_block_link *btrfsic_block_link_alloc(void); 255 static void btrfsic_block_link_free(struct btrfsic_block_link *n); 256 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds); 257 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void); 258 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds); 259 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h); 260 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 261 struct btrfsic_block_hashtable *h); 262 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b); 263 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 264 struct block_device *bdev, 265 u64 dev_bytenr, 266 struct btrfsic_block_hashtable *h); 267 static void btrfsic_block_link_hashtable_init( 268 struct btrfsic_block_link_hashtable *h); 269 static void btrfsic_block_link_hashtable_add( 270 struct btrfsic_block_link *l, 271 struct btrfsic_block_link_hashtable *h); 272 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l); 273 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 274 struct block_device *bdev_ref_to, 275 u64 dev_bytenr_ref_to, 276 struct block_device *bdev_ref_from, 277 u64 dev_bytenr_ref_from, 278 struct btrfsic_block_link_hashtable *h); 279 static void btrfsic_dev_state_hashtable_init( 280 struct btrfsic_dev_state_hashtable *h); 281 static void btrfsic_dev_state_hashtable_add( 282 struct btrfsic_dev_state *ds, 283 struct btrfsic_dev_state_hashtable *h); 284 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds); 285 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 286 struct block_device *bdev, 287 struct btrfsic_dev_state_hashtable *h); 288 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void); 289 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf); 290 static int btrfsic_process_superblock(struct btrfsic_state *state, 291 struct btrfs_fs_devices *fs_devices); 292 static int btrfsic_process_metablock(struct btrfsic_state *state, 293 struct btrfsic_block *block, 294 struct btrfsic_block_data_ctx *block_ctx, 295 int limit_nesting, int force_iodone_flag); 296 static void btrfsic_read_from_block_data( 297 struct btrfsic_block_data_ctx *block_ctx, 298 void *dst, u32 offset, size_t len); 299 static int btrfsic_create_link_to_next_block( 300 struct btrfsic_state *state, 301 struct btrfsic_block *block, 302 struct btrfsic_block_data_ctx 303 *block_ctx, u64 next_bytenr, 304 int limit_nesting, 305 struct btrfsic_block_data_ctx *next_block_ctx, 306 struct btrfsic_block **next_blockp, 307 int force_iodone_flag, 308 int *num_copiesp, int *mirror_nump, 309 struct btrfs_disk_key *disk_key, 310 u64 parent_generation); 311 static int btrfsic_handle_extent_data(struct btrfsic_state *state, 312 struct btrfsic_block *block, 313 struct btrfsic_block_data_ctx *block_ctx, 314 u32 item_offset, int force_iodone_flag); 315 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 316 struct btrfsic_block_data_ctx *block_ctx_out, 317 int mirror_num); 318 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 319 u32 len, struct block_device *bdev, 320 struct btrfsic_block_data_ctx *block_ctx_out); 321 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx); 322 static int btrfsic_read_block(struct btrfsic_state *state, 323 struct btrfsic_block_data_ctx *block_ctx); 324 static void btrfsic_dump_database(struct btrfsic_state *state); 325 static void btrfsic_complete_bio_end_io(struct bio *bio, int err); 326 static int btrfsic_test_for_metadata(struct btrfsic_state *state, 327 char **datav, unsigned int num_pages); 328 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 329 u64 dev_bytenr, char **mapped_datav, 330 unsigned int num_pages, 331 struct bio *bio, int *bio_is_patched, 332 struct buffer_head *bh, 333 int submit_bio_bh_rw); 334 static int btrfsic_process_written_superblock( 335 struct btrfsic_state *state, 336 struct btrfsic_block *const block, 337 struct btrfs_super_block *const super_hdr); 338 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status); 339 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate); 340 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state, 341 const struct btrfsic_block *block, 342 int recursion_level); 343 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 344 struct btrfsic_block *const block, 345 int recursion_level); 346 static void btrfsic_print_add_link(const struct btrfsic_state *state, 347 const struct btrfsic_block_link *l); 348 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 349 const struct btrfsic_block_link *l); 350 static char btrfsic_get_block_type(const struct btrfsic_state *state, 351 const struct btrfsic_block *block); 352 static void btrfsic_dump_tree(const struct btrfsic_state *state); 353 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 354 const struct btrfsic_block *block, 355 int indent_level); 356 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 357 struct btrfsic_state *state, 358 struct btrfsic_block_data_ctx *next_block_ctx, 359 struct btrfsic_block *next_block, 360 struct btrfsic_block *from_block, 361 u64 parent_generation); 362 static struct btrfsic_block *btrfsic_block_lookup_or_add( 363 struct btrfsic_state *state, 364 struct btrfsic_block_data_ctx *block_ctx, 365 const char *additional_string, 366 int is_metadata, 367 int is_iodone, 368 int never_written, 369 int mirror_num, 370 int *was_created); 371 static int btrfsic_process_superblock_dev_mirror( 372 struct btrfsic_state *state, 373 struct btrfsic_dev_state *dev_state, 374 struct btrfs_device *device, 375 int superblock_mirror_num, 376 struct btrfsic_dev_state **selected_dev_state, 377 struct btrfs_super_block *selected_super); 378 static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 379 struct block_device *bdev); 380 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 381 u64 bytenr, 382 struct btrfsic_dev_state *dev_state, 383 u64 dev_bytenr); 384 385 static struct mutex btrfsic_mutex; 386 static int btrfsic_is_initialized; 387 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable; 388 389 390 static void btrfsic_block_init(struct btrfsic_block *b) 391 { 392 b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER; 393 b->dev_state = NULL; 394 b->dev_bytenr = 0; 395 b->logical_bytenr = 0; 396 b->generation = BTRFSIC_GENERATION_UNKNOWN; 397 b->disk_key.objectid = 0; 398 b->disk_key.type = 0; 399 b->disk_key.offset = 0; 400 b->is_metadata = 0; 401 b->is_superblock = 0; 402 b->is_iodone = 0; 403 b->iodone_w_error = 0; 404 b->never_written = 0; 405 b->mirror_num = 0; 406 b->next_in_same_bio = NULL; 407 b->orig_bio_bh_private = NULL; 408 b->orig_bio_bh_end_io.bio = NULL; 409 INIT_LIST_HEAD(&b->collision_resolving_node); 410 INIT_LIST_HEAD(&b->all_blocks_node); 411 INIT_LIST_HEAD(&b->ref_to_list); 412 INIT_LIST_HEAD(&b->ref_from_list); 413 b->submit_bio_bh_rw = 0; 414 b->flush_gen = 0; 415 } 416 417 static struct btrfsic_block *btrfsic_block_alloc(void) 418 { 419 struct btrfsic_block *b; 420 421 b = kzalloc(sizeof(*b), GFP_NOFS); 422 if (NULL != b) 423 btrfsic_block_init(b); 424 425 return b; 426 } 427 428 static void btrfsic_block_free(struct btrfsic_block *b) 429 { 430 BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num)); 431 kfree(b); 432 } 433 434 static void btrfsic_block_link_init(struct btrfsic_block_link *l) 435 { 436 l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER; 437 l->ref_cnt = 1; 438 INIT_LIST_HEAD(&l->node_ref_to); 439 INIT_LIST_HEAD(&l->node_ref_from); 440 INIT_LIST_HEAD(&l->collision_resolving_node); 441 l->block_ref_to = NULL; 442 l->block_ref_from = NULL; 443 } 444 445 static struct btrfsic_block_link *btrfsic_block_link_alloc(void) 446 { 447 struct btrfsic_block_link *l; 448 449 l = kzalloc(sizeof(*l), GFP_NOFS); 450 if (NULL != l) 451 btrfsic_block_link_init(l); 452 453 return l; 454 } 455 456 static void btrfsic_block_link_free(struct btrfsic_block_link *l) 457 { 458 BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num)); 459 kfree(l); 460 } 461 462 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds) 463 { 464 ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER; 465 ds->bdev = NULL; 466 ds->state = NULL; 467 ds->name[0] = '\0'; 468 INIT_LIST_HEAD(&ds->collision_resolving_node); 469 ds->last_flush_gen = 0; 470 btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush); 471 ds->dummy_block_for_bio_bh_flush.is_iodone = 1; 472 ds->dummy_block_for_bio_bh_flush.dev_state = ds; 473 } 474 475 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void) 476 { 477 struct btrfsic_dev_state *ds; 478 479 ds = kzalloc(sizeof(*ds), GFP_NOFS); 480 if (NULL != ds) 481 btrfsic_dev_state_init(ds); 482 483 return ds; 484 } 485 486 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds) 487 { 488 BUG_ON(!(NULL == ds || 489 BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num)); 490 kfree(ds); 491 } 492 493 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h) 494 { 495 int i; 496 497 for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++) 498 INIT_LIST_HEAD(h->table + i); 499 } 500 501 static void btrfsic_block_hashtable_add(struct btrfsic_block *b, 502 struct btrfsic_block_hashtable *h) 503 { 504 const unsigned int hashval = 505 (((unsigned int)(b->dev_bytenr >> 16)) ^ 506 ((unsigned int)((uintptr_t)b->dev_state->bdev))) & 507 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 508 509 list_add(&b->collision_resolving_node, h->table + hashval); 510 } 511 512 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b) 513 { 514 list_del(&b->collision_resolving_node); 515 } 516 517 static struct btrfsic_block *btrfsic_block_hashtable_lookup( 518 struct block_device *bdev, 519 u64 dev_bytenr, 520 struct btrfsic_block_hashtable *h) 521 { 522 const unsigned int hashval = 523 (((unsigned int)(dev_bytenr >> 16)) ^ 524 ((unsigned int)((uintptr_t)bdev))) & 525 (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); 526 struct list_head *elem; 527 528 list_for_each(elem, h->table + hashval) { 529 struct btrfsic_block *const b = 530 list_entry(elem, struct btrfsic_block, 531 collision_resolving_node); 532 533 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) 534 return b; 535 } 536 537 return NULL; 538 } 539 540 static void btrfsic_block_link_hashtable_init( 541 struct btrfsic_block_link_hashtable *h) 542 { 543 int i; 544 545 for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++) 546 INIT_LIST_HEAD(h->table + i); 547 } 548 549 static void btrfsic_block_link_hashtable_add( 550 struct btrfsic_block_link *l, 551 struct btrfsic_block_link_hashtable *h) 552 { 553 const unsigned int hashval = 554 (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^ 555 ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^ 556 ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^ 557 ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev))) 558 & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 559 560 BUG_ON(NULL == l->block_ref_to); 561 BUG_ON(NULL == l->block_ref_from); 562 list_add(&l->collision_resolving_node, h->table + hashval); 563 } 564 565 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l) 566 { 567 list_del(&l->collision_resolving_node); 568 } 569 570 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( 571 struct block_device *bdev_ref_to, 572 u64 dev_bytenr_ref_to, 573 struct block_device *bdev_ref_from, 574 u64 dev_bytenr_ref_from, 575 struct btrfsic_block_link_hashtable *h) 576 { 577 const unsigned int hashval = 578 (((unsigned int)(dev_bytenr_ref_to >> 16)) ^ 579 ((unsigned int)(dev_bytenr_ref_from >> 16)) ^ 580 ((unsigned int)((uintptr_t)bdev_ref_to)) ^ 581 ((unsigned int)((uintptr_t)bdev_ref_from))) & 582 (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); 583 struct list_head *elem; 584 585 list_for_each(elem, h->table + hashval) { 586 struct btrfsic_block_link *const l = 587 list_entry(elem, struct btrfsic_block_link, 588 collision_resolving_node); 589 590 BUG_ON(NULL == l->block_ref_to); 591 BUG_ON(NULL == l->block_ref_from); 592 if (l->block_ref_to->dev_state->bdev == bdev_ref_to && 593 l->block_ref_to->dev_bytenr == dev_bytenr_ref_to && 594 l->block_ref_from->dev_state->bdev == bdev_ref_from && 595 l->block_ref_from->dev_bytenr == dev_bytenr_ref_from) 596 return l; 597 } 598 599 return NULL; 600 } 601 602 static void btrfsic_dev_state_hashtable_init( 603 struct btrfsic_dev_state_hashtable *h) 604 { 605 int i; 606 607 for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++) 608 INIT_LIST_HEAD(h->table + i); 609 } 610 611 static void btrfsic_dev_state_hashtable_add( 612 struct btrfsic_dev_state *ds, 613 struct btrfsic_dev_state_hashtable *h) 614 { 615 const unsigned int hashval = 616 (((unsigned int)((uintptr_t)ds->bdev)) & 617 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 618 619 list_add(&ds->collision_resolving_node, h->table + hashval); 620 } 621 622 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds) 623 { 624 list_del(&ds->collision_resolving_node); 625 } 626 627 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( 628 struct block_device *bdev, 629 struct btrfsic_dev_state_hashtable *h) 630 { 631 const unsigned int hashval = 632 (((unsigned int)((uintptr_t)bdev)) & 633 (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); 634 struct list_head *elem; 635 636 list_for_each(elem, h->table + hashval) { 637 struct btrfsic_dev_state *const ds = 638 list_entry(elem, struct btrfsic_dev_state, 639 collision_resolving_node); 640 641 if (ds->bdev == bdev) 642 return ds; 643 } 644 645 return NULL; 646 } 647 648 static int btrfsic_process_superblock(struct btrfsic_state *state, 649 struct btrfs_fs_devices *fs_devices) 650 { 651 int ret = 0; 652 struct btrfs_super_block *selected_super; 653 struct list_head *dev_head = &fs_devices->devices; 654 struct btrfs_device *device; 655 struct btrfsic_dev_state *selected_dev_state = NULL; 656 int pass; 657 658 BUG_ON(NULL == state); 659 selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS); 660 if (NULL == selected_super) { 661 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 662 return -1; 663 } 664 665 list_for_each_entry(device, dev_head, dev_list) { 666 int i; 667 struct btrfsic_dev_state *dev_state; 668 669 if (!device->bdev || !device->name) 670 continue; 671 672 dev_state = btrfsic_dev_state_lookup(device->bdev); 673 BUG_ON(NULL == dev_state); 674 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 675 ret = btrfsic_process_superblock_dev_mirror( 676 state, dev_state, device, i, 677 &selected_dev_state, selected_super); 678 if (0 != ret && 0 == i) { 679 kfree(selected_super); 680 return ret; 681 } 682 } 683 } 684 685 if (NULL == state->latest_superblock) { 686 printk(KERN_INFO "btrfsic: no superblock found!\n"); 687 kfree(selected_super); 688 return -1; 689 } 690 691 state->csum_size = btrfs_super_csum_size(selected_super); 692 693 for (pass = 0; pass < 3; pass++) { 694 int num_copies; 695 int mirror_num; 696 u64 next_bytenr; 697 698 switch (pass) { 699 case 0: 700 next_bytenr = btrfs_super_root(selected_super); 701 if (state->print_mask & 702 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 703 printk(KERN_INFO "root@%llu\n", 704 (unsigned long long)next_bytenr); 705 break; 706 case 1: 707 next_bytenr = btrfs_super_chunk_root(selected_super); 708 if (state->print_mask & 709 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 710 printk(KERN_INFO "chunk@%llu\n", 711 (unsigned long long)next_bytenr); 712 break; 713 case 2: 714 next_bytenr = btrfs_super_log_root(selected_super); 715 if (0 == next_bytenr) 716 continue; 717 if (state->print_mask & 718 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 719 printk(KERN_INFO "log@%llu\n", 720 (unsigned long long)next_bytenr); 721 break; 722 } 723 724 num_copies = 725 btrfs_num_copies(&state->root->fs_info->mapping_tree, 726 next_bytenr, state->metablock_size); 727 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 728 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 729 (unsigned long long)next_bytenr, num_copies); 730 731 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 732 struct btrfsic_block *next_block; 733 struct btrfsic_block_data_ctx tmp_next_block_ctx; 734 struct btrfsic_block_link *l; 735 736 ret = btrfsic_map_block(state, next_bytenr, 737 state->metablock_size, 738 &tmp_next_block_ctx, 739 mirror_num); 740 if (ret) { 741 printk(KERN_INFO "btrfsic:" 742 " btrfsic_map_block(root @%llu," 743 " mirror %d) failed!\n", 744 (unsigned long long)next_bytenr, 745 mirror_num); 746 kfree(selected_super); 747 return -1; 748 } 749 750 next_block = btrfsic_block_hashtable_lookup( 751 tmp_next_block_ctx.dev->bdev, 752 tmp_next_block_ctx.dev_bytenr, 753 &state->block_hashtable); 754 BUG_ON(NULL == next_block); 755 756 l = btrfsic_block_link_hashtable_lookup( 757 tmp_next_block_ctx.dev->bdev, 758 tmp_next_block_ctx.dev_bytenr, 759 state->latest_superblock->dev_state-> 760 bdev, 761 state->latest_superblock->dev_bytenr, 762 &state->block_link_hashtable); 763 BUG_ON(NULL == l); 764 765 ret = btrfsic_read_block(state, &tmp_next_block_ctx); 766 if (ret < (int)PAGE_CACHE_SIZE) { 767 printk(KERN_INFO 768 "btrfsic: read @logical %llu failed!\n", 769 (unsigned long long) 770 tmp_next_block_ctx.start); 771 btrfsic_release_block_ctx(&tmp_next_block_ctx); 772 kfree(selected_super); 773 return -1; 774 } 775 776 ret = btrfsic_process_metablock(state, 777 next_block, 778 &tmp_next_block_ctx, 779 BTRFS_MAX_LEVEL + 3, 1); 780 btrfsic_release_block_ctx(&tmp_next_block_ctx); 781 } 782 } 783 784 kfree(selected_super); 785 return ret; 786 } 787 788 static int btrfsic_process_superblock_dev_mirror( 789 struct btrfsic_state *state, 790 struct btrfsic_dev_state *dev_state, 791 struct btrfs_device *device, 792 int superblock_mirror_num, 793 struct btrfsic_dev_state **selected_dev_state, 794 struct btrfs_super_block *selected_super) 795 { 796 struct btrfs_super_block *super_tmp; 797 u64 dev_bytenr; 798 struct buffer_head *bh; 799 struct btrfsic_block *superblock_tmp; 800 int pass; 801 struct block_device *const superblock_bdev = device->bdev; 802 803 /* super block bytenr is always the unmapped device bytenr */ 804 dev_bytenr = btrfs_sb_offset(superblock_mirror_num); 805 if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) 806 return -1; 807 bh = __bread(superblock_bdev, dev_bytenr / 4096, 808 BTRFS_SUPER_INFO_SIZE); 809 if (NULL == bh) 810 return -1; 811 super_tmp = (struct btrfs_super_block *) 812 (bh->b_data + (dev_bytenr & 4095)); 813 814 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 815 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, 816 sizeof(super_tmp->magic)) || 817 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 818 btrfs_super_nodesize(super_tmp) != state->metablock_size || 819 btrfs_super_leafsize(super_tmp) != state->metablock_size || 820 btrfs_super_sectorsize(super_tmp) != state->datablock_size) { 821 brelse(bh); 822 return 0; 823 } 824 825 superblock_tmp = 826 btrfsic_block_hashtable_lookup(superblock_bdev, 827 dev_bytenr, 828 &state->block_hashtable); 829 if (NULL == superblock_tmp) { 830 superblock_tmp = btrfsic_block_alloc(); 831 if (NULL == superblock_tmp) { 832 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 833 brelse(bh); 834 return -1; 835 } 836 /* for superblock, only the dev_bytenr makes sense */ 837 superblock_tmp->dev_bytenr = dev_bytenr; 838 superblock_tmp->dev_state = dev_state; 839 superblock_tmp->logical_bytenr = dev_bytenr; 840 superblock_tmp->generation = btrfs_super_generation(super_tmp); 841 superblock_tmp->is_metadata = 1; 842 superblock_tmp->is_superblock = 1; 843 superblock_tmp->is_iodone = 1; 844 superblock_tmp->never_written = 0; 845 superblock_tmp->mirror_num = 1 + superblock_mirror_num; 846 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 847 printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)" 848 " @%llu (%s/%llu/%d)\n", 849 superblock_bdev, 850 rcu_str_deref(device->name), 851 (unsigned long long)dev_bytenr, 852 dev_state->name, 853 (unsigned long long)dev_bytenr, 854 superblock_mirror_num); 855 list_add(&superblock_tmp->all_blocks_node, 856 &state->all_blocks_list); 857 btrfsic_block_hashtable_add(superblock_tmp, 858 &state->block_hashtable); 859 } 860 861 /* select the one with the highest generation field */ 862 if (btrfs_super_generation(super_tmp) > 863 state->max_superblock_generation || 864 0 == state->max_superblock_generation) { 865 memcpy(selected_super, super_tmp, sizeof(*selected_super)); 866 *selected_dev_state = dev_state; 867 state->max_superblock_generation = 868 btrfs_super_generation(super_tmp); 869 state->latest_superblock = superblock_tmp; 870 } 871 872 for (pass = 0; pass < 3; pass++) { 873 u64 next_bytenr; 874 int num_copies; 875 int mirror_num; 876 const char *additional_string = NULL; 877 struct btrfs_disk_key tmp_disk_key; 878 879 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 880 tmp_disk_key.offset = 0; 881 switch (pass) { 882 case 0: 883 tmp_disk_key.objectid = 884 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 885 additional_string = "initial root "; 886 next_bytenr = btrfs_super_root(super_tmp); 887 break; 888 case 1: 889 tmp_disk_key.objectid = 890 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 891 additional_string = "initial chunk "; 892 next_bytenr = btrfs_super_chunk_root(super_tmp); 893 break; 894 case 2: 895 tmp_disk_key.objectid = 896 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 897 additional_string = "initial log "; 898 next_bytenr = btrfs_super_log_root(super_tmp); 899 if (0 == next_bytenr) 900 continue; 901 break; 902 } 903 904 num_copies = 905 btrfs_num_copies(&state->root->fs_info->mapping_tree, 906 next_bytenr, state->metablock_size); 907 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 908 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 909 (unsigned long long)next_bytenr, num_copies); 910 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 911 struct btrfsic_block *next_block; 912 struct btrfsic_block_data_ctx tmp_next_block_ctx; 913 struct btrfsic_block_link *l; 914 915 if (btrfsic_map_block(state, next_bytenr, 916 state->metablock_size, 917 &tmp_next_block_ctx, 918 mirror_num)) { 919 printk(KERN_INFO "btrfsic: btrfsic_map_block(" 920 "bytenr @%llu, mirror %d) failed!\n", 921 (unsigned long long)next_bytenr, 922 mirror_num); 923 brelse(bh); 924 return -1; 925 } 926 927 next_block = btrfsic_block_lookup_or_add( 928 state, &tmp_next_block_ctx, 929 additional_string, 1, 1, 0, 930 mirror_num, NULL); 931 if (NULL == next_block) { 932 btrfsic_release_block_ctx(&tmp_next_block_ctx); 933 brelse(bh); 934 return -1; 935 } 936 937 next_block->disk_key = tmp_disk_key; 938 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 939 l = btrfsic_block_link_lookup_or_add( 940 state, &tmp_next_block_ctx, 941 next_block, superblock_tmp, 942 BTRFSIC_GENERATION_UNKNOWN); 943 btrfsic_release_block_ctx(&tmp_next_block_ctx); 944 if (NULL == l) { 945 brelse(bh); 946 return -1; 947 } 948 } 949 } 950 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES) 951 btrfsic_dump_tree_sub(state, superblock_tmp, 0); 952 953 brelse(bh); 954 return 0; 955 } 956 957 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void) 958 { 959 struct btrfsic_stack_frame *sf; 960 961 sf = kzalloc(sizeof(*sf), GFP_NOFS); 962 if (NULL == sf) 963 printk(KERN_INFO "btrfsic: alloc memory failed!\n"); 964 else 965 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER; 966 return sf; 967 } 968 969 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf) 970 { 971 BUG_ON(!(NULL == sf || 972 BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic)); 973 kfree(sf); 974 } 975 976 static int btrfsic_process_metablock( 977 struct btrfsic_state *state, 978 struct btrfsic_block *const first_block, 979 struct btrfsic_block_data_ctx *const first_block_ctx, 980 int first_limit_nesting, int force_iodone_flag) 981 { 982 struct btrfsic_stack_frame initial_stack_frame = { 0 }; 983 struct btrfsic_stack_frame *sf; 984 struct btrfsic_stack_frame *next_stack; 985 struct btrfs_header *const first_hdr = 986 (struct btrfs_header *)first_block_ctx->datav[0]; 987 988 BUG_ON(!first_hdr); 989 sf = &initial_stack_frame; 990 sf->error = 0; 991 sf->i = -1; 992 sf->limit_nesting = first_limit_nesting; 993 sf->block = first_block; 994 sf->block_ctx = first_block_ctx; 995 sf->next_block = NULL; 996 sf->hdr = first_hdr; 997 sf->prev = NULL; 998 999 continue_with_new_stack_frame: 1000 sf->block->generation = le64_to_cpu(sf->hdr->generation); 1001 if (0 == sf->hdr->level) { 1002 struct btrfs_leaf *const leafhdr = 1003 (struct btrfs_leaf *)sf->hdr; 1004 1005 if (-1 == sf->i) { 1006 sf->nr = le32_to_cpu(leafhdr->header.nritems); 1007 1008 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1009 printk(KERN_INFO 1010 "leaf %llu items %d generation %llu" 1011 " owner %llu\n", 1012 (unsigned long long) 1013 sf->block_ctx->start, 1014 sf->nr, 1015 (unsigned long long) 1016 le64_to_cpu(leafhdr->header.generation), 1017 (unsigned long long) 1018 le64_to_cpu(leafhdr->header.owner)); 1019 } 1020 1021 continue_with_current_leaf_stack_frame: 1022 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1023 sf->i++; 1024 sf->num_copies = 0; 1025 } 1026 1027 if (sf->i < sf->nr) { 1028 struct btrfs_item disk_item; 1029 u32 disk_item_offset = 1030 (uintptr_t)(leafhdr->items + sf->i) - 1031 (uintptr_t)leafhdr; 1032 struct btrfs_disk_key *disk_key; 1033 u8 type; 1034 u32 item_offset; 1035 u32 item_size; 1036 1037 if (disk_item_offset + sizeof(struct btrfs_item) > 1038 sf->block_ctx->len) { 1039 leaf_item_out_of_bounce_error: 1040 printk(KERN_INFO 1041 "btrfsic: leaf item out of bounce at logical %llu, dev %s\n", 1042 sf->block_ctx->start, 1043 sf->block_ctx->dev->name); 1044 goto one_stack_frame_backwards; 1045 } 1046 btrfsic_read_from_block_data(sf->block_ctx, 1047 &disk_item, 1048 disk_item_offset, 1049 sizeof(struct btrfs_item)); 1050 item_offset = le32_to_cpu(disk_item.offset); 1051 item_size = le32_to_cpu(disk_item.size); 1052 disk_key = &disk_item.key; 1053 type = disk_key->type; 1054 1055 if (BTRFS_ROOT_ITEM_KEY == type) { 1056 struct btrfs_root_item root_item; 1057 u32 root_item_offset; 1058 u64 next_bytenr; 1059 1060 root_item_offset = item_offset + 1061 offsetof(struct btrfs_leaf, items); 1062 if (root_item_offset + item_size > 1063 sf->block_ctx->len) 1064 goto leaf_item_out_of_bounce_error; 1065 btrfsic_read_from_block_data( 1066 sf->block_ctx, &root_item, 1067 root_item_offset, 1068 item_size); 1069 next_bytenr = le64_to_cpu(root_item.bytenr); 1070 1071 sf->error = 1072 btrfsic_create_link_to_next_block( 1073 state, 1074 sf->block, 1075 sf->block_ctx, 1076 next_bytenr, 1077 sf->limit_nesting, 1078 &sf->next_block_ctx, 1079 &sf->next_block, 1080 force_iodone_flag, 1081 &sf->num_copies, 1082 &sf->mirror_num, 1083 disk_key, 1084 le64_to_cpu(root_item. 1085 generation)); 1086 if (sf->error) 1087 goto one_stack_frame_backwards; 1088 1089 if (NULL != sf->next_block) { 1090 struct btrfs_header *const next_hdr = 1091 (struct btrfs_header *) 1092 sf->next_block_ctx.datav[0]; 1093 1094 next_stack = 1095 btrfsic_stack_frame_alloc(); 1096 if (NULL == next_stack) { 1097 btrfsic_release_block_ctx( 1098 &sf-> 1099 next_block_ctx); 1100 goto one_stack_frame_backwards; 1101 } 1102 1103 next_stack->i = -1; 1104 next_stack->block = sf->next_block; 1105 next_stack->block_ctx = 1106 &sf->next_block_ctx; 1107 next_stack->next_block = NULL; 1108 next_stack->hdr = next_hdr; 1109 next_stack->limit_nesting = 1110 sf->limit_nesting - 1; 1111 next_stack->prev = sf; 1112 sf = next_stack; 1113 goto continue_with_new_stack_frame; 1114 } 1115 } else if (BTRFS_EXTENT_DATA_KEY == type && 1116 state->include_extent_data) { 1117 sf->error = btrfsic_handle_extent_data( 1118 state, 1119 sf->block, 1120 sf->block_ctx, 1121 item_offset, 1122 force_iodone_flag); 1123 if (sf->error) 1124 goto one_stack_frame_backwards; 1125 } 1126 1127 goto continue_with_current_leaf_stack_frame; 1128 } 1129 } else { 1130 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr; 1131 1132 if (-1 == sf->i) { 1133 sf->nr = le32_to_cpu(nodehdr->header.nritems); 1134 1135 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1136 printk(KERN_INFO "node %llu level %d items %d" 1137 " generation %llu owner %llu\n", 1138 (unsigned long long) 1139 sf->block_ctx->start, 1140 nodehdr->header.level, sf->nr, 1141 (unsigned long long) 1142 le64_to_cpu(nodehdr->header.generation), 1143 (unsigned long long) 1144 le64_to_cpu(nodehdr->header.owner)); 1145 } 1146 1147 continue_with_current_node_stack_frame: 1148 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) { 1149 sf->i++; 1150 sf->num_copies = 0; 1151 } 1152 1153 if (sf->i < sf->nr) { 1154 struct btrfs_key_ptr key_ptr; 1155 u32 key_ptr_offset; 1156 u64 next_bytenr; 1157 1158 key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) - 1159 (uintptr_t)nodehdr; 1160 if (key_ptr_offset + sizeof(struct btrfs_key_ptr) > 1161 sf->block_ctx->len) { 1162 printk(KERN_INFO 1163 "btrfsic: node item out of bounce at logical %llu, dev %s\n", 1164 sf->block_ctx->start, 1165 sf->block_ctx->dev->name); 1166 goto one_stack_frame_backwards; 1167 } 1168 btrfsic_read_from_block_data( 1169 sf->block_ctx, &key_ptr, key_ptr_offset, 1170 sizeof(struct btrfs_key_ptr)); 1171 next_bytenr = le64_to_cpu(key_ptr.blockptr); 1172 1173 sf->error = btrfsic_create_link_to_next_block( 1174 state, 1175 sf->block, 1176 sf->block_ctx, 1177 next_bytenr, 1178 sf->limit_nesting, 1179 &sf->next_block_ctx, 1180 &sf->next_block, 1181 force_iodone_flag, 1182 &sf->num_copies, 1183 &sf->mirror_num, 1184 &key_ptr.key, 1185 le64_to_cpu(key_ptr.generation)); 1186 if (sf->error) 1187 goto one_stack_frame_backwards; 1188 1189 if (NULL != sf->next_block) { 1190 struct btrfs_header *const next_hdr = 1191 (struct btrfs_header *) 1192 sf->next_block_ctx.datav[0]; 1193 1194 next_stack = btrfsic_stack_frame_alloc(); 1195 if (NULL == next_stack) 1196 goto one_stack_frame_backwards; 1197 1198 next_stack->i = -1; 1199 next_stack->block = sf->next_block; 1200 next_stack->block_ctx = &sf->next_block_ctx; 1201 next_stack->next_block = NULL; 1202 next_stack->hdr = next_hdr; 1203 next_stack->limit_nesting = 1204 sf->limit_nesting - 1; 1205 next_stack->prev = sf; 1206 sf = next_stack; 1207 goto continue_with_new_stack_frame; 1208 } 1209 1210 goto continue_with_current_node_stack_frame; 1211 } 1212 } 1213 1214 one_stack_frame_backwards: 1215 if (NULL != sf->prev) { 1216 struct btrfsic_stack_frame *const prev = sf->prev; 1217 1218 /* the one for the initial block is freed in the caller */ 1219 btrfsic_release_block_ctx(sf->block_ctx); 1220 1221 if (sf->error) { 1222 prev->error = sf->error; 1223 btrfsic_stack_frame_free(sf); 1224 sf = prev; 1225 goto one_stack_frame_backwards; 1226 } 1227 1228 btrfsic_stack_frame_free(sf); 1229 sf = prev; 1230 goto continue_with_new_stack_frame; 1231 } else { 1232 BUG_ON(&initial_stack_frame != sf); 1233 } 1234 1235 return sf->error; 1236 } 1237 1238 static void btrfsic_read_from_block_data( 1239 struct btrfsic_block_data_ctx *block_ctx, 1240 void *dstv, u32 offset, size_t len) 1241 { 1242 size_t cur; 1243 size_t offset_in_page; 1244 char *kaddr; 1245 char *dst = (char *)dstv; 1246 size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1); 1247 unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT; 1248 1249 WARN_ON(offset + len > block_ctx->len); 1250 offset_in_page = (start_offset + offset) & 1251 ((unsigned long)PAGE_CACHE_SIZE - 1); 1252 1253 while (len > 0) { 1254 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page)); 1255 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >> 1256 PAGE_CACHE_SHIFT); 1257 kaddr = block_ctx->datav[i]; 1258 memcpy(dst, kaddr + offset_in_page, cur); 1259 1260 dst += cur; 1261 len -= cur; 1262 offset_in_page = 0; 1263 i++; 1264 } 1265 } 1266 1267 static int btrfsic_create_link_to_next_block( 1268 struct btrfsic_state *state, 1269 struct btrfsic_block *block, 1270 struct btrfsic_block_data_ctx *block_ctx, 1271 u64 next_bytenr, 1272 int limit_nesting, 1273 struct btrfsic_block_data_ctx *next_block_ctx, 1274 struct btrfsic_block **next_blockp, 1275 int force_iodone_flag, 1276 int *num_copiesp, int *mirror_nump, 1277 struct btrfs_disk_key *disk_key, 1278 u64 parent_generation) 1279 { 1280 struct btrfsic_block *next_block = NULL; 1281 int ret; 1282 struct btrfsic_block_link *l; 1283 int did_alloc_block_link; 1284 int block_was_created; 1285 1286 *next_blockp = NULL; 1287 if (0 == *num_copiesp) { 1288 *num_copiesp = 1289 btrfs_num_copies(&state->root->fs_info->mapping_tree, 1290 next_bytenr, state->metablock_size); 1291 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1292 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1293 (unsigned long long)next_bytenr, *num_copiesp); 1294 *mirror_nump = 1; 1295 } 1296 1297 if (*mirror_nump > *num_copiesp) 1298 return 0; 1299 1300 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1301 printk(KERN_INFO 1302 "btrfsic_create_link_to_next_block(mirror_num=%d)\n", 1303 *mirror_nump); 1304 ret = btrfsic_map_block(state, next_bytenr, 1305 state->metablock_size, 1306 next_block_ctx, *mirror_nump); 1307 if (ret) { 1308 printk(KERN_INFO 1309 "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n", 1310 (unsigned long long)next_bytenr, *mirror_nump); 1311 btrfsic_release_block_ctx(next_block_ctx); 1312 *next_blockp = NULL; 1313 return -1; 1314 } 1315 1316 next_block = btrfsic_block_lookup_or_add(state, 1317 next_block_ctx, "referenced ", 1318 1, force_iodone_flag, 1319 !force_iodone_flag, 1320 *mirror_nump, 1321 &block_was_created); 1322 if (NULL == next_block) { 1323 btrfsic_release_block_ctx(next_block_ctx); 1324 *next_blockp = NULL; 1325 return -1; 1326 } 1327 if (block_was_created) { 1328 l = NULL; 1329 next_block->generation = BTRFSIC_GENERATION_UNKNOWN; 1330 } else { 1331 if (next_block->logical_bytenr != next_bytenr && 1332 !(!next_block->is_metadata && 1333 0 == next_block->logical_bytenr)) { 1334 printk(KERN_INFO 1335 "Referenced block @%llu (%s/%llu/%d)" 1336 " found in hash table, %c," 1337 " bytenr mismatch (!= stored %llu).\n", 1338 (unsigned long long)next_bytenr, 1339 next_block_ctx->dev->name, 1340 (unsigned long long)next_block_ctx->dev_bytenr, 1341 *mirror_nump, 1342 btrfsic_get_block_type(state, next_block), 1343 (unsigned long long)next_block->logical_bytenr); 1344 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1345 printk(KERN_INFO 1346 "Referenced block @%llu (%s/%llu/%d)" 1347 " found in hash table, %c.\n", 1348 (unsigned long long)next_bytenr, 1349 next_block_ctx->dev->name, 1350 (unsigned long long)next_block_ctx->dev_bytenr, 1351 *mirror_nump, 1352 btrfsic_get_block_type(state, next_block)); 1353 next_block->logical_bytenr = next_bytenr; 1354 1355 next_block->mirror_num = *mirror_nump; 1356 l = btrfsic_block_link_hashtable_lookup( 1357 next_block_ctx->dev->bdev, 1358 next_block_ctx->dev_bytenr, 1359 block_ctx->dev->bdev, 1360 block_ctx->dev_bytenr, 1361 &state->block_link_hashtable); 1362 } 1363 1364 next_block->disk_key = *disk_key; 1365 if (NULL == l) { 1366 l = btrfsic_block_link_alloc(); 1367 if (NULL == l) { 1368 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 1369 btrfsic_release_block_ctx(next_block_ctx); 1370 *next_blockp = NULL; 1371 return -1; 1372 } 1373 1374 did_alloc_block_link = 1; 1375 l->block_ref_to = next_block; 1376 l->block_ref_from = block; 1377 l->ref_cnt = 1; 1378 l->parent_generation = parent_generation; 1379 1380 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1381 btrfsic_print_add_link(state, l); 1382 1383 list_add(&l->node_ref_to, &block->ref_to_list); 1384 list_add(&l->node_ref_from, &next_block->ref_from_list); 1385 1386 btrfsic_block_link_hashtable_add(l, 1387 &state->block_link_hashtable); 1388 } else { 1389 did_alloc_block_link = 0; 1390 if (0 == limit_nesting) { 1391 l->ref_cnt++; 1392 l->parent_generation = parent_generation; 1393 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1394 btrfsic_print_add_link(state, l); 1395 } 1396 } 1397 1398 if (limit_nesting > 0 && did_alloc_block_link) { 1399 ret = btrfsic_read_block(state, next_block_ctx); 1400 if (ret < (int)next_block_ctx->len) { 1401 printk(KERN_INFO 1402 "btrfsic: read block @logical %llu failed!\n", 1403 (unsigned long long)next_bytenr); 1404 btrfsic_release_block_ctx(next_block_ctx); 1405 *next_blockp = NULL; 1406 return -1; 1407 } 1408 1409 *next_blockp = next_block; 1410 } else { 1411 *next_blockp = NULL; 1412 } 1413 (*mirror_nump)++; 1414 1415 return 0; 1416 } 1417 1418 static int btrfsic_handle_extent_data( 1419 struct btrfsic_state *state, 1420 struct btrfsic_block *block, 1421 struct btrfsic_block_data_ctx *block_ctx, 1422 u32 item_offset, int force_iodone_flag) 1423 { 1424 int ret; 1425 struct btrfs_file_extent_item file_extent_item; 1426 u64 file_extent_item_offset; 1427 u64 next_bytenr; 1428 u64 num_bytes; 1429 u64 generation; 1430 struct btrfsic_block_link *l; 1431 1432 file_extent_item_offset = offsetof(struct btrfs_leaf, items) + 1433 item_offset; 1434 if (file_extent_item_offset + 1435 offsetof(struct btrfs_file_extent_item, disk_num_bytes) > 1436 block_ctx->len) { 1437 printk(KERN_INFO 1438 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1439 block_ctx->start, block_ctx->dev->name); 1440 return -1; 1441 } 1442 1443 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1444 file_extent_item_offset, 1445 offsetof(struct btrfs_file_extent_item, disk_num_bytes)); 1446 if (BTRFS_FILE_EXTENT_REG != file_extent_item.type || 1447 ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) { 1448 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1449 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n", 1450 file_extent_item.type, 1451 (unsigned long long) 1452 le64_to_cpu(file_extent_item.disk_bytenr)); 1453 return 0; 1454 } 1455 1456 if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) > 1457 block_ctx->len) { 1458 printk(KERN_INFO 1459 "btrfsic: file item out of bounce at logical %llu, dev %s\n", 1460 block_ctx->start, block_ctx->dev->name); 1461 return -1; 1462 } 1463 btrfsic_read_from_block_data(block_ctx, &file_extent_item, 1464 file_extent_item_offset, 1465 sizeof(struct btrfs_file_extent_item)); 1466 next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) + 1467 le64_to_cpu(file_extent_item.offset); 1468 generation = le64_to_cpu(file_extent_item.generation); 1469 num_bytes = le64_to_cpu(file_extent_item.num_bytes); 1470 generation = le64_to_cpu(file_extent_item.generation); 1471 1472 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1473 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu," 1474 " offset = %llu, num_bytes = %llu\n", 1475 file_extent_item.type, 1476 (unsigned long long) 1477 le64_to_cpu(file_extent_item.disk_bytenr), 1478 (unsigned long long)le64_to_cpu(file_extent_item.offset), 1479 (unsigned long long)num_bytes); 1480 while (num_bytes > 0) { 1481 u32 chunk_len; 1482 int num_copies; 1483 int mirror_num; 1484 1485 if (num_bytes > state->datablock_size) 1486 chunk_len = state->datablock_size; 1487 else 1488 chunk_len = num_bytes; 1489 1490 num_copies = 1491 btrfs_num_copies(&state->root->fs_info->mapping_tree, 1492 next_bytenr, state->datablock_size); 1493 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 1494 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 1495 (unsigned long long)next_bytenr, num_copies); 1496 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 1497 struct btrfsic_block_data_ctx next_block_ctx; 1498 struct btrfsic_block *next_block; 1499 int block_was_created; 1500 1501 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1502 printk(KERN_INFO "btrfsic_handle_extent_data(" 1503 "mirror_num=%d)\n", mirror_num); 1504 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE) 1505 printk(KERN_INFO 1506 "\tdisk_bytenr = %llu, num_bytes %u\n", 1507 (unsigned long long)next_bytenr, 1508 chunk_len); 1509 ret = btrfsic_map_block(state, next_bytenr, 1510 chunk_len, &next_block_ctx, 1511 mirror_num); 1512 if (ret) { 1513 printk(KERN_INFO 1514 "btrfsic: btrfsic_map_block(@%llu," 1515 " mirror=%d) failed!\n", 1516 (unsigned long long)next_bytenr, 1517 mirror_num); 1518 return -1; 1519 } 1520 1521 next_block = btrfsic_block_lookup_or_add( 1522 state, 1523 &next_block_ctx, 1524 "referenced ", 1525 0, 1526 force_iodone_flag, 1527 !force_iodone_flag, 1528 mirror_num, 1529 &block_was_created); 1530 if (NULL == next_block) { 1531 printk(KERN_INFO 1532 "btrfsic: error, kmalloc failed!\n"); 1533 btrfsic_release_block_ctx(&next_block_ctx); 1534 return -1; 1535 } 1536 if (!block_was_created) { 1537 if (next_block->logical_bytenr != next_bytenr && 1538 !(!next_block->is_metadata && 1539 0 == next_block->logical_bytenr)) { 1540 printk(KERN_INFO 1541 "Referenced block" 1542 " @%llu (%s/%llu/%d)" 1543 " found in hash table, D," 1544 " bytenr mismatch" 1545 " (!= stored %llu).\n", 1546 (unsigned long long)next_bytenr, 1547 next_block_ctx.dev->name, 1548 (unsigned long long) 1549 next_block_ctx.dev_bytenr, 1550 mirror_num, 1551 (unsigned long long) 1552 next_block->logical_bytenr); 1553 } 1554 next_block->logical_bytenr = next_bytenr; 1555 next_block->mirror_num = mirror_num; 1556 } 1557 1558 l = btrfsic_block_link_lookup_or_add(state, 1559 &next_block_ctx, 1560 next_block, block, 1561 generation); 1562 btrfsic_release_block_ctx(&next_block_ctx); 1563 if (NULL == l) 1564 return -1; 1565 } 1566 1567 next_bytenr += chunk_len; 1568 num_bytes -= chunk_len; 1569 } 1570 1571 return 0; 1572 } 1573 1574 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, 1575 struct btrfsic_block_data_ctx *block_ctx_out, 1576 int mirror_num) 1577 { 1578 int ret; 1579 u64 length; 1580 struct btrfs_bio *multi = NULL; 1581 struct btrfs_device *device; 1582 1583 length = len; 1584 ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ, 1585 bytenr, &length, &multi, mirror_num); 1586 1587 device = multi->stripes[0].dev; 1588 block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); 1589 block_ctx_out->dev_bytenr = multi->stripes[0].physical; 1590 block_ctx_out->start = bytenr; 1591 block_ctx_out->len = len; 1592 block_ctx_out->datav = NULL; 1593 block_ctx_out->pagev = NULL; 1594 block_ctx_out->mem_to_free = NULL; 1595 1596 if (0 == ret) 1597 kfree(multi); 1598 if (NULL == block_ctx_out->dev) { 1599 ret = -ENXIO; 1600 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); 1601 } 1602 1603 return ret; 1604 } 1605 1606 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr, 1607 u32 len, struct block_device *bdev, 1608 struct btrfsic_block_data_ctx *block_ctx_out) 1609 { 1610 block_ctx_out->dev = btrfsic_dev_state_lookup(bdev); 1611 block_ctx_out->dev_bytenr = bytenr; 1612 block_ctx_out->start = bytenr; 1613 block_ctx_out->len = len; 1614 block_ctx_out->datav = NULL; 1615 block_ctx_out->pagev = NULL; 1616 block_ctx_out->mem_to_free = NULL; 1617 if (NULL != block_ctx_out->dev) { 1618 return 0; 1619 } else { 1620 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n"); 1621 return -ENXIO; 1622 } 1623 } 1624 1625 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx) 1626 { 1627 if (block_ctx->mem_to_free) { 1628 unsigned int num_pages; 1629 1630 BUG_ON(!block_ctx->datav); 1631 BUG_ON(!block_ctx->pagev); 1632 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1633 PAGE_CACHE_SHIFT; 1634 while (num_pages > 0) { 1635 num_pages--; 1636 if (block_ctx->datav[num_pages]) { 1637 kunmap(block_ctx->pagev[num_pages]); 1638 block_ctx->datav[num_pages] = NULL; 1639 } 1640 if (block_ctx->pagev[num_pages]) { 1641 __free_page(block_ctx->pagev[num_pages]); 1642 block_ctx->pagev[num_pages] = NULL; 1643 } 1644 } 1645 1646 kfree(block_ctx->mem_to_free); 1647 block_ctx->mem_to_free = NULL; 1648 block_ctx->pagev = NULL; 1649 block_ctx->datav = NULL; 1650 } 1651 } 1652 1653 static int btrfsic_read_block(struct btrfsic_state *state, 1654 struct btrfsic_block_data_ctx *block_ctx) 1655 { 1656 unsigned int num_pages; 1657 unsigned int i; 1658 u64 dev_bytenr; 1659 int ret; 1660 1661 BUG_ON(block_ctx->datav); 1662 BUG_ON(block_ctx->pagev); 1663 BUG_ON(block_ctx->mem_to_free); 1664 if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) { 1665 printk(KERN_INFO 1666 "btrfsic: read_block() with unaligned bytenr %llu\n", 1667 (unsigned long long)block_ctx->dev_bytenr); 1668 return -1; 1669 } 1670 1671 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >> 1672 PAGE_CACHE_SHIFT; 1673 block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) + 1674 sizeof(*block_ctx->pagev)) * 1675 num_pages, GFP_NOFS); 1676 if (!block_ctx->mem_to_free) 1677 return -1; 1678 block_ctx->datav = block_ctx->mem_to_free; 1679 block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages); 1680 for (i = 0; i < num_pages; i++) { 1681 block_ctx->pagev[i] = alloc_page(GFP_NOFS); 1682 if (!block_ctx->pagev[i]) 1683 return -1; 1684 } 1685 1686 dev_bytenr = block_ctx->dev_bytenr; 1687 for (i = 0; i < num_pages;) { 1688 struct bio *bio; 1689 unsigned int j; 1690 DECLARE_COMPLETION_ONSTACK(complete); 1691 1692 bio = bio_alloc(GFP_NOFS, num_pages - i); 1693 if (!bio) { 1694 printk(KERN_INFO 1695 "btrfsic: bio_alloc() for %u pages failed!\n", 1696 num_pages - i); 1697 return -1; 1698 } 1699 bio->bi_bdev = block_ctx->dev->bdev; 1700 bio->bi_sector = dev_bytenr >> 9; 1701 bio->bi_end_io = btrfsic_complete_bio_end_io; 1702 bio->bi_private = &complete; 1703 1704 for (j = i; j < num_pages; j++) { 1705 ret = bio_add_page(bio, block_ctx->pagev[j], 1706 PAGE_CACHE_SIZE, 0); 1707 if (PAGE_CACHE_SIZE != ret) 1708 break; 1709 } 1710 if (j == i) { 1711 printk(KERN_INFO 1712 "btrfsic: error, failed to add a single page!\n"); 1713 return -1; 1714 } 1715 submit_bio(READ, bio); 1716 1717 /* this will also unplug the queue */ 1718 wait_for_completion(&complete); 1719 1720 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 1721 printk(KERN_INFO 1722 "btrfsic: read error at logical %llu dev %s!\n", 1723 block_ctx->start, block_ctx->dev->name); 1724 bio_put(bio); 1725 return -1; 1726 } 1727 bio_put(bio); 1728 dev_bytenr += (j - i) * PAGE_CACHE_SIZE; 1729 i = j; 1730 } 1731 for (i = 0; i < num_pages; i++) { 1732 block_ctx->datav[i] = kmap(block_ctx->pagev[i]); 1733 if (!block_ctx->datav[i]) { 1734 printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n", 1735 block_ctx->dev->name); 1736 return -1; 1737 } 1738 } 1739 1740 return block_ctx->len; 1741 } 1742 1743 static void btrfsic_complete_bio_end_io(struct bio *bio, int err) 1744 { 1745 complete((struct completion *)bio->bi_private); 1746 } 1747 1748 static void btrfsic_dump_database(struct btrfsic_state *state) 1749 { 1750 struct list_head *elem_all; 1751 1752 BUG_ON(NULL == state); 1753 1754 printk(KERN_INFO "all_blocks_list:\n"); 1755 list_for_each(elem_all, &state->all_blocks_list) { 1756 const struct btrfsic_block *const b_all = 1757 list_entry(elem_all, struct btrfsic_block, 1758 all_blocks_node); 1759 struct list_head *elem_ref_to; 1760 struct list_head *elem_ref_from; 1761 1762 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", 1763 btrfsic_get_block_type(state, b_all), 1764 (unsigned long long)b_all->logical_bytenr, 1765 b_all->dev_state->name, 1766 (unsigned long long)b_all->dev_bytenr, 1767 b_all->mirror_num); 1768 1769 list_for_each(elem_ref_to, &b_all->ref_to_list) { 1770 const struct btrfsic_block_link *const l = 1771 list_entry(elem_ref_to, 1772 struct btrfsic_block_link, 1773 node_ref_to); 1774 1775 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1776 " refers %u* to" 1777 " %c @%llu (%s/%llu/%d)\n", 1778 btrfsic_get_block_type(state, b_all), 1779 (unsigned long long)b_all->logical_bytenr, 1780 b_all->dev_state->name, 1781 (unsigned long long)b_all->dev_bytenr, 1782 b_all->mirror_num, 1783 l->ref_cnt, 1784 btrfsic_get_block_type(state, l->block_ref_to), 1785 (unsigned long long) 1786 l->block_ref_to->logical_bytenr, 1787 l->block_ref_to->dev_state->name, 1788 (unsigned long long)l->block_ref_to->dev_bytenr, 1789 l->block_ref_to->mirror_num); 1790 } 1791 1792 list_for_each(elem_ref_from, &b_all->ref_from_list) { 1793 const struct btrfsic_block_link *const l = 1794 list_entry(elem_ref_from, 1795 struct btrfsic_block_link, 1796 node_ref_from); 1797 1798 printk(KERN_INFO " %c @%llu (%s/%llu/%d)" 1799 " is ref %u* from" 1800 " %c @%llu (%s/%llu/%d)\n", 1801 btrfsic_get_block_type(state, b_all), 1802 (unsigned long long)b_all->logical_bytenr, 1803 b_all->dev_state->name, 1804 (unsigned long long)b_all->dev_bytenr, 1805 b_all->mirror_num, 1806 l->ref_cnt, 1807 btrfsic_get_block_type(state, l->block_ref_from), 1808 (unsigned long long) 1809 l->block_ref_from->logical_bytenr, 1810 l->block_ref_from->dev_state->name, 1811 (unsigned long long) 1812 l->block_ref_from->dev_bytenr, 1813 l->block_ref_from->mirror_num); 1814 } 1815 1816 printk(KERN_INFO "\n"); 1817 } 1818 } 1819 1820 /* 1821 * Test whether the disk block contains a tree block (leaf or node) 1822 * (note that this test fails for the super block) 1823 */ 1824 static int btrfsic_test_for_metadata(struct btrfsic_state *state, 1825 char **datav, unsigned int num_pages) 1826 { 1827 struct btrfs_header *h; 1828 u8 csum[BTRFS_CSUM_SIZE]; 1829 u32 crc = ~(u32)0; 1830 unsigned int i; 1831 1832 if (num_pages * PAGE_CACHE_SIZE < state->metablock_size) 1833 return 1; /* not metadata */ 1834 num_pages = state->metablock_size >> PAGE_CACHE_SHIFT; 1835 h = (struct btrfs_header *)datav[0]; 1836 1837 if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE)) 1838 return 1; 1839 1840 for (i = 0; i < num_pages; i++) { 1841 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE); 1842 size_t sublen = i ? PAGE_CACHE_SIZE : 1843 (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE); 1844 1845 crc = crc32c(crc, data, sublen); 1846 } 1847 btrfs_csum_final(crc, csum); 1848 if (memcmp(csum, h->csum, state->csum_size)) 1849 return 1; 1850 1851 return 0; /* is metadata */ 1852 } 1853 1854 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state, 1855 u64 dev_bytenr, char **mapped_datav, 1856 unsigned int num_pages, 1857 struct bio *bio, int *bio_is_patched, 1858 struct buffer_head *bh, 1859 int submit_bio_bh_rw) 1860 { 1861 int is_metadata; 1862 struct btrfsic_block *block; 1863 struct btrfsic_block_data_ctx block_ctx; 1864 int ret; 1865 struct btrfsic_state *state = dev_state->state; 1866 struct block_device *bdev = dev_state->bdev; 1867 unsigned int processed_len; 1868 1869 if (NULL != bio_is_patched) 1870 *bio_is_patched = 0; 1871 1872 again: 1873 if (num_pages == 0) 1874 return; 1875 1876 processed_len = 0; 1877 is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav, 1878 num_pages)); 1879 1880 block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr, 1881 &state->block_hashtable); 1882 if (NULL != block) { 1883 u64 bytenr = 0; 1884 struct list_head *elem_ref_to; 1885 struct list_head *tmp_ref_to; 1886 1887 if (block->is_superblock) { 1888 bytenr = le64_to_cpu(((struct btrfs_super_block *) 1889 mapped_datav[0])->bytenr); 1890 if (num_pages * PAGE_CACHE_SIZE < 1891 BTRFS_SUPER_INFO_SIZE) { 1892 printk(KERN_INFO 1893 "btrfsic: cannot work with too short bios!\n"); 1894 return; 1895 } 1896 is_metadata = 1; 1897 BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1)); 1898 processed_len = BTRFS_SUPER_INFO_SIZE; 1899 if (state->print_mask & 1900 BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) { 1901 printk(KERN_INFO 1902 "[before new superblock is written]:\n"); 1903 btrfsic_dump_tree_sub(state, block, 0); 1904 } 1905 } 1906 if (is_metadata) { 1907 if (!block->is_superblock) { 1908 if (num_pages * PAGE_CACHE_SIZE < 1909 state->metablock_size) { 1910 printk(KERN_INFO 1911 "btrfsic: cannot work with too short bios!\n"); 1912 return; 1913 } 1914 processed_len = state->metablock_size; 1915 bytenr = le64_to_cpu(((struct btrfs_header *) 1916 mapped_datav[0])->bytenr); 1917 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, 1918 dev_state, 1919 dev_bytenr); 1920 } 1921 if (block->logical_bytenr != bytenr) { 1922 printk(KERN_INFO 1923 "Written block @%llu (%s/%llu/%d)" 1924 " found in hash table, %c," 1925 " bytenr mismatch" 1926 " (!= stored %llu).\n", 1927 (unsigned long long)bytenr, 1928 dev_state->name, 1929 (unsigned long long)dev_bytenr, 1930 block->mirror_num, 1931 btrfsic_get_block_type(state, block), 1932 (unsigned long long) 1933 block->logical_bytenr); 1934 block->logical_bytenr = bytenr; 1935 } else if (state->print_mask & 1936 BTRFSIC_PRINT_MASK_VERBOSE) 1937 printk(KERN_INFO 1938 "Written block @%llu (%s/%llu/%d)" 1939 " found in hash table, %c.\n", 1940 (unsigned long long)bytenr, 1941 dev_state->name, 1942 (unsigned long long)dev_bytenr, 1943 block->mirror_num, 1944 btrfsic_get_block_type(state, block)); 1945 } else { 1946 if (num_pages * PAGE_CACHE_SIZE < 1947 state->datablock_size) { 1948 printk(KERN_INFO 1949 "btrfsic: cannot work with too short bios!\n"); 1950 return; 1951 } 1952 processed_len = state->datablock_size; 1953 bytenr = block->logical_bytenr; 1954 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1955 printk(KERN_INFO 1956 "Written block @%llu (%s/%llu/%d)" 1957 " found in hash table, %c.\n", 1958 (unsigned long long)bytenr, 1959 dev_state->name, 1960 (unsigned long long)dev_bytenr, 1961 block->mirror_num, 1962 btrfsic_get_block_type(state, block)); 1963 } 1964 1965 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 1966 printk(KERN_INFO 1967 "ref_to_list: %cE, ref_from_list: %cE\n", 1968 list_empty(&block->ref_to_list) ? ' ' : '!', 1969 list_empty(&block->ref_from_list) ? ' ' : '!'); 1970 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) { 1971 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1972 " @%llu (%s/%llu/%d), old(gen=%llu," 1973 " objectid=%llu, type=%d, offset=%llu)," 1974 " new(gen=%llu)," 1975 " which is referenced by most recent superblock" 1976 " (superblockgen=%llu)!\n", 1977 btrfsic_get_block_type(state, block), 1978 (unsigned long long)bytenr, 1979 dev_state->name, 1980 (unsigned long long)dev_bytenr, 1981 block->mirror_num, 1982 (unsigned long long)block->generation, 1983 (unsigned long long) 1984 le64_to_cpu(block->disk_key.objectid), 1985 block->disk_key.type, 1986 (unsigned long long) 1987 le64_to_cpu(block->disk_key.offset), 1988 (unsigned long long) 1989 le64_to_cpu(((struct btrfs_header *) 1990 mapped_datav[0])->generation), 1991 (unsigned long long) 1992 state->max_superblock_generation); 1993 btrfsic_dump_tree(state); 1994 } 1995 1996 if (!block->is_iodone && !block->never_written) { 1997 printk(KERN_INFO "btrfs: attempt to overwrite %c-block" 1998 " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu," 1999 " which is not yet iodone!\n", 2000 btrfsic_get_block_type(state, block), 2001 (unsigned long long)bytenr, 2002 dev_state->name, 2003 (unsigned long long)dev_bytenr, 2004 block->mirror_num, 2005 (unsigned long long)block->generation, 2006 (unsigned long long) 2007 le64_to_cpu(((struct btrfs_header *) 2008 mapped_datav[0])->generation)); 2009 /* it would not be safe to go on */ 2010 btrfsic_dump_tree(state); 2011 goto continue_loop; 2012 } 2013 2014 /* 2015 * Clear all references of this block. Do not free 2016 * the block itself even if is not referenced anymore 2017 * because it still carries valueable information 2018 * like whether it was ever written and IO completed. 2019 */ 2020 list_for_each_safe(elem_ref_to, tmp_ref_to, 2021 &block->ref_to_list) { 2022 struct btrfsic_block_link *const l = 2023 list_entry(elem_ref_to, 2024 struct btrfsic_block_link, 2025 node_ref_to); 2026 2027 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2028 btrfsic_print_rem_link(state, l); 2029 l->ref_cnt--; 2030 if (0 == l->ref_cnt) { 2031 list_del(&l->node_ref_to); 2032 list_del(&l->node_ref_from); 2033 btrfsic_block_link_hashtable_remove(l); 2034 btrfsic_block_link_free(l); 2035 } 2036 } 2037 2038 if (block->is_superblock) 2039 ret = btrfsic_map_superblock(state, bytenr, 2040 processed_len, 2041 bdev, &block_ctx); 2042 else 2043 ret = btrfsic_map_block(state, bytenr, processed_len, 2044 &block_ctx, 0); 2045 if (ret) { 2046 printk(KERN_INFO 2047 "btrfsic: btrfsic_map_block(root @%llu)" 2048 " failed!\n", (unsigned long long)bytenr); 2049 goto continue_loop; 2050 } 2051 block_ctx.datav = mapped_datav; 2052 /* the following is required in case of writes to mirrors, 2053 * use the same that was used for the lookup */ 2054 block_ctx.dev = dev_state; 2055 block_ctx.dev_bytenr = dev_bytenr; 2056 2057 if (is_metadata || state->include_extent_data) { 2058 block->never_written = 0; 2059 block->iodone_w_error = 0; 2060 if (NULL != bio) { 2061 block->is_iodone = 0; 2062 BUG_ON(NULL == bio_is_patched); 2063 if (!*bio_is_patched) { 2064 block->orig_bio_bh_private = 2065 bio->bi_private; 2066 block->orig_bio_bh_end_io.bio = 2067 bio->bi_end_io; 2068 block->next_in_same_bio = NULL; 2069 bio->bi_private = block; 2070 bio->bi_end_io = btrfsic_bio_end_io; 2071 *bio_is_patched = 1; 2072 } else { 2073 struct btrfsic_block *chained_block = 2074 (struct btrfsic_block *) 2075 bio->bi_private; 2076 2077 BUG_ON(NULL == chained_block); 2078 block->orig_bio_bh_private = 2079 chained_block->orig_bio_bh_private; 2080 block->orig_bio_bh_end_io.bio = 2081 chained_block->orig_bio_bh_end_io. 2082 bio; 2083 block->next_in_same_bio = chained_block; 2084 bio->bi_private = block; 2085 } 2086 } else if (NULL != bh) { 2087 block->is_iodone = 0; 2088 block->orig_bio_bh_private = bh->b_private; 2089 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2090 block->next_in_same_bio = NULL; 2091 bh->b_private = block; 2092 bh->b_end_io = btrfsic_bh_end_io; 2093 } else { 2094 block->is_iodone = 1; 2095 block->orig_bio_bh_private = NULL; 2096 block->orig_bio_bh_end_io.bio = NULL; 2097 block->next_in_same_bio = NULL; 2098 } 2099 } 2100 2101 block->flush_gen = dev_state->last_flush_gen + 1; 2102 block->submit_bio_bh_rw = submit_bio_bh_rw; 2103 if (is_metadata) { 2104 block->logical_bytenr = bytenr; 2105 block->is_metadata = 1; 2106 if (block->is_superblock) { 2107 BUG_ON(PAGE_CACHE_SIZE != 2108 BTRFS_SUPER_INFO_SIZE); 2109 ret = btrfsic_process_written_superblock( 2110 state, 2111 block, 2112 (struct btrfs_super_block *) 2113 mapped_datav[0]); 2114 if (state->print_mask & 2115 BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) { 2116 printk(KERN_INFO 2117 "[after new superblock is written]:\n"); 2118 btrfsic_dump_tree_sub(state, block, 0); 2119 } 2120 } else { 2121 block->mirror_num = 0; /* unknown */ 2122 ret = btrfsic_process_metablock( 2123 state, 2124 block, 2125 &block_ctx, 2126 0, 0); 2127 } 2128 if (ret) 2129 printk(KERN_INFO 2130 "btrfsic: btrfsic_process_metablock" 2131 "(root @%llu) failed!\n", 2132 (unsigned long long)dev_bytenr); 2133 } else { 2134 block->is_metadata = 0; 2135 block->mirror_num = 0; /* unknown */ 2136 block->generation = BTRFSIC_GENERATION_UNKNOWN; 2137 if (!state->include_extent_data 2138 && list_empty(&block->ref_from_list)) { 2139 /* 2140 * disk block is overwritten with extent 2141 * data (not meta data) and we are configured 2142 * to not include extent data: take the 2143 * chance and free the block's memory 2144 */ 2145 btrfsic_block_hashtable_remove(block); 2146 list_del(&block->all_blocks_node); 2147 btrfsic_block_free(block); 2148 } 2149 } 2150 btrfsic_release_block_ctx(&block_ctx); 2151 } else { 2152 /* block has not been found in hash table */ 2153 u64 bytenr; 2154 2155 if (!is_metadata) { 2156 processed_len = state->datablock_size; 2157 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2158 printk(KERN_INFO "Written block (%s/%llu/?)" 2159 " !found in hash table, D.\n", 2160 dev_state->name, 2161 (unsigned long long)dev_bytenr); 2162 if (!state->include_extent_data) { 2163 /* ignore that written D block */ 2164 goto continue_loop; 2165 } 2166 2167 /* this is getting ugly for the 2168 * include_extent_data case... */ 2169 bytenr = 0; /* unknown */ 2170 block_ctx.start = bytenr; 2171 block_ctx.len = processed_len; 2172 block_ctx.mem_to_free = NULL; 2173 block_ctx.pagev = NULL; 2174 } else { 2175 processed_len = state->metablock_size; 2176 bytenr = le64_to_cpu(((struct btrfs_header *) 2177 mapped_datav[0])->bytenr); 2178 btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state, 2179 dev_bytenr); 2180 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2181 printk(KERN_INFO 2182 "Written block @%llu (%s/%llu/?)" 2183 " !found in hash table, M.\n", 2184 (unsigned long long)bytenr, 2185 dev_state->name, 2186 (unsigned long long)dev_bytenr); 2187 2188 ret = btrfsic_map_block(state, bytenr, processed_len, 2189 &block_ctx, 0); 2190 if (ret) { 2191 printk(KERN_INFO 2192 "btrfsic: btrfsic_map_block(root @%llu)" 2193 " failed!\n", 2194 (unsigned long long)dev_bytenr); 2195 goto continue_loop; 2196 } 2197 } 2198 block_ctx.datav = mapped_datav; 2199 /* the following is required in case of writes to mirrors, 2200 * use the same that was used for the lookup */ 2201 block_ctx.dev = dev_state; 2202 block_ctx.dev_bytenr = dev_bytenr; 2203 2204 block = btrfsic_block_alloc(); 2205 if (NULL == block) { 2206 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2207 btrfsic_release_block_ctx(&block_ctx); 2208 goto continue_loop; 2209 } 2210 block->dev_state = dev_state; 2211 block->dev_bytenr = dev_bytenr; 2212 block->logical_bytenr = bytenr; 2213 block->is_metadata = is_metadata; 2214 block->never_written = 0; 2215 block->iodone_w_error = 0; 2216 block->mirror_num = 0; /* unknown */ 2217 block->flush_gen = dev_state->last_flush_gen + 1; 2218 block->submit_bio_bh_rw = submit_bio_bh_rw; 2219 if (NULL != bio) { 2220 block->is_iodone = 0; 2221 BUG_ON(NULL == bio_is_patched); 2222 if (!*bio_is_patched) { 2223 block->orig_bio_bh_private = bio->bi_private; 2224 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 2225 block->next_in_same_bio = NULL; 2226 bio->bi_private = block; 2227 bio->bi_end_io = btrfsic_bio_end_io; 2228 *bio_is_patched = 1; 2229 } else { 2230 struct btrfsic_block *chained_block = 2231 (struct btrfsic_block *) 2232 bio->bi_private; 2233 2234 BUG_ON(NULL == chained_block); 2235 block->orig_bio_bh_private = 2236 chained_block->orig_bio_bh_private; 2237 block->orig_bio_bh_end_io.bio = 2238 chained_block->orig_bio_bh_end_io.bio; 2239 block->next_in_same_bio = chained_block; 2240 bio->bi_private = block; 2241 } 2242 } else if (NULL != bh) { 2243 block->is_iodone = 0; 2244 block->orig_bio_bh_private = bh->b_private; 2245 block->orig_bio_bh_end_io.bh = bh->b_end_io; 2246 block->next_in_same_bio = NULL; 2247 bh->b_private = block; 2248 bh->b_end_io = btrfsic_bh_end_io; 2249 } else { 2250 block->is_iodone = 1; 2251 block->orig_bio_bh_private = NULL; 2252 block->orig_bio_bh_end_io.bio = NULL; 2253 block->next_in_same_bio = NULL; 2254 } 2255 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2256 printk(KERN_INFO 2257 "New written %c-block @%llu (%s/%llu/%d)\n", 2258 is_metadata ? 'M' : 'D', 2259 (unsigned long long)block->logical_bytenr, 2260 block->dev_state->name, 2261 (unsigned long long)block->dev_bytenr, 2262 block->mirror_num); 2263 list_add(&block->all_blocks_node, &state->all_blocks_list); 2264 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2265 2266 if (is_metadata) { 2267 ret = btrfsic_process_metablock(state, block, 2268 &block_ctx, 0, 0); 2269 if (ret) 2270 printk(KERN_INFO 2271 "btrfsic: process_metablock(root @%llu)" 2272 " failed!\n", 2273 (unsigned long long)dev_bytenr); 2274 } 2275 btrfsic_release_block_ctx(&block_ctx); 2276 } 2277 2278 continue_loop: 2279 BUG_ON(!processed_len); 2280 dev_bytenr += processed_len; 2281 mapped_datav += processed_len >> PAGE_CACHE_SHIFT; 2282 num_pages -= processed_len >> PAGE_CACHE_SHIFT; 2283 goto again; 2284 } 2285 2286 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status) 2287 { 2288 struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private; 2289 int iodone_w_error; 2290 2291 /* mutex is not held! This is not save if IO is not yet completed 2292 * on umount */ 2293 iodone_w_error = 0; 2294 if (bio_error_status) 2295 iodone_w_error = 1; 2296 2297 BUG_ON(NULL == block); 2298 bp->bi_private = block->orig_bio_bh_private; 2299 bp->bi_end_io = block->orig_bio_bh_end_io.bio; 2300 2301 do { 2302 struct btrfsic_block *next_block; 2303 struct btrfsic_dev_state *const dev_state = block->dev_state; 2304 2305 if ((dev_state->state->print_mask & 2306 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2307 printk(KERN_INFO 2308 "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n", 2309 bio_error_status, 2310 btrfsic_get_block_type(dev_state->state, block), 2311 (unsigned long long)block->logical_bytenr, 2312 dev_state->name, 2313 (unsigned long long)block->dev_bytenr, 2314 block->mirror_num); 2315 next_block = block->next_in_same_bio; 2316 block->iodone_w_error = iodone_w_error; 2317 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2318 dev_state->last_flush_gen++; 2319 if ((dev_state->state->print_mask & 2320 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2321 printk(KERN_INFO 2322 "bio_end_io() new %s flush_gen=%llu\n", 2323 dev_state->name, 2324 (unsigned long long) 2325 dev_state->last_flush_gen); 2326 } 2327 if (block->submit_bio_bh_rw & REQ_FUA) 2328 block->flush_gen = 0; /* FUA completed means block is 2329 * on disk */ 2330 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2331 block = next_block; 2332 } while (NULL != block); 2333 2334 bp->bi_end_io(bp, bio_error_status); 2335 } 2336 2337 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate) 2338 { 2339 struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private; 2340 int iodone_w_error = !uptodate; 2341 struct btrfsic_dev_state *dev_state; 2342 2343 BUG_ON(NULL == block); 2344 dev_state = block->dev_state; 2345 if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2346 printk(KERN_INFO 2347 "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n", 2348 iodone_w_error, 2349 btrfsic_get_block_type(dev_state->state, block), 2350 (unsigned long long)block->logical_bytenr, 2351 block->dev_state->name, 2352 (unsigned long long)block->dev_bytenr, 2353 block->mirror_num); 2354 2355 block->iodone_w_error = iodone_w_error; 2356 if (block->submit_bio_bh_rw & REQ_FLUSH) { 2357 dev_state->last_flush_gen++; 2358 if ((dev_state->state->print_mask & 2359 BTRFSIC_PRINT_MASK_END_IO_BIO_BH)) 2360 printk(KERN_INFO 2361 "bh_end_io() new %s flush_gen=%llu\n", 2362 dev_state->name, 2363 (unsigned long long)dev_state->last_flush_gen); 2364 } 2365 if (block->submit_bio_bh_rw & REQ_FUA) 2366 block->flush_gen = 0; /* FUA completed means block is on disk */ 2367 2368 bh->b_private = block->orig_bio_bh_private; 2369 bh->b_end_io = block->orig_bio_bh_end_io.bh; 2370 block->is_iodone = 1; /* for FLUSH, this releases the block */ 2371 bh->b_end_io(bh, uptodate); 2372 } 2373 2374 static int btrfsic_process_written_superblock( 2375 struct btrfsic_state *state, 2376 struct btrfsic_block *const superblock, 2377 struct btrfs_super_block *const super_hdr) 2378 { 2379 int pass; 2380 2381 superblock->generation = btrfs_super_generation(super_hdr); 2382 if (!(superblock->generation > state->max_superblock_generation || 2383 0 == state->max_superblock_generation)) { 2384 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2385 printk(KERN_INFO 2386 "btrfsic: superblock @%llu (%s/%llu/%d)" 2387 " with old gen %llu <= %llu\n", 2388 (unsigned long long)superblock->logical_bytenr, 2389 superblock->dev_state->name, 2390 (unsigned long long)superblock->dev_bytenr, 2391 superblock->mirror_num, 2392 (unsigned long long) 2393 btrfs_super_generation(super_hdr), 2394 (unsigned long long) 2395 state->max_superblock_generation); 2396 } else { 2397 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE) 2398 printk(KERN_INFO 2399 "btrfsic: got new superblock @%llu (%s/%llu/%d)" 2400 " with new gen %llu > %llu\n", 2401 (unsigned long long)superblock->logical_bytenr, 2402 superblock->dev_state->name, 2403 (unsigned long long)superblock->dev_bytenr, 2404 superblock->mirror_num, 2405 (unsigned long long) 2406 btrfs_super_generation(super_hdr), 2407 (unsigned long long) 2408 state->max_superblock_generation); 2409 2410 state->max_superblock_generation = 2411 btrfs_super_generation(super_hdr); 2412 state->latest_superblock = superblock; 2413 } 2414 2415 for (pass = 0; pass < 3; pass++) { 2416 int ret; 2417 u64 next_bytenr; 2418 struct btrfsic_block *next_block; 2419 struct btrfsic_block_data_ctx tmp_next_block_ctx; 2420 struct btrfsic_block_link *l; 2421 int num_copies; 2422 int mirror_num; 2423 const char *additional_string = NULL; 2424 struct btrfs_disk_key tmp_disk_key; 2425 2426 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY; 2427 tmp_disk_key.offset = 0; 2428 2429 switch (pass) { 2430 case 0: 2431 tmp_disk_key.objectid = 2432 cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID); 2433 additional_string = "root "; 2434 next_bytenr = btrfs_super_root(super_hdr); 2435 if (state->print_mask & 2436 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2437 printk(KERN_INFO "root@%llu\n", 2438 (unsigned long long)next_bytenr); 2439 break; 2440 case 1: 2441 tmp_disk_key.objectid = 2442 cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID); 2443 additional_string = "chunk "; 2444 next_bytenr = btrfs_super_chunk_root(super_hdr); 2445 if (state->print_mask & 2446 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2447 printk(KERN_INFO "chunk@%llu\n", 2448 (unsigned long long)next_bytenr); 2449 break; 2450 case 2: 2451 tmp_disk_key.objectid = 2452 cpu_to_le64(BTRFS_TREE_LOG_OBJECTID); 2453 additional_string = "log "; 2454 next_bytenr = btrfs_super_log_root(super_hdr); 2455 if (0 == next_bytenr) 2456 continue; 2457 if (state->print_mask & 2458 BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION) 2459 printk(KERN_INFO "log@%llu\n", 2460 (unsigned long long)next_bytenr); 2461 break; 2462 } 2463 2464 num_copies = 2465 btrfs_num_copies(&state->root->fs_info->mapping_tree, 2466 next_bytenr, BTRFS_SUPER_INFO_SIZE); 2467 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) 2468 printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", 2469 (unsigned long long)next_bytenr, num_copies); 2470 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2471 int was_created; 2472 2473 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2474 printk(KERN_INFO 2475 "btrfsic_process_written_superblock(" 2476 "mirror_num=%d)\n", mirror_num); 2477 ret = btrfsic_map_block(state, next_bytenr, 2478 BTRFS_SUPER_INFO_SIZE, 2479 &tmp_next_block_ctx, 2480 mirror_num); 2481 if (ret) { 2482 printk(KERN_INFO 2483 "btrfsic: btrfsic_map_block(@%llu," 2484 " mirror=%d) failed!\n", 2485 (unsigned long long)next_bytenr, 2486 mirror_num); 2487 return -1; 2488 } 2489 2490 next_block = btrfsic_block_lookup_or_add( 2491 state, 2492 &tmp_next_block_ctx, 2493 additional_string, 2494 1, 0, 1, 2495 mirror_num, 2496 &was_created); 2497 if (NULL == next_block) { 2498 printk(KERN_INFO 2499 "btrfsic: error, kmalloc failed!\n"); 2500 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2501 return -1; 2502 } 2503 2504 next_block->disk_key = tmp_disk_key; 2505 if (was_created) 2506 next_block->generation = 2507 BTRFSIC_GENERATION_UNKNOWN; 2508 l = btrfsic_block_link_lookup_or_add( 2509 state, 2510 &tmp_next_block_ctx, 2511 next_block, 2512 superblock, 2513 BTRFSIC_GENERATION_UNKNOWN); 2514 btrfsic_release_block_ctx(&tmp_next_block_ctx); 2515 if (NULL == l) 2516 return -1; 2517 } 2518 } 2519 2520 if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) { 2521 WARN_ON(1); 2522 btrfsic_dump_tree(state); 2523 } 2524 2525 return 0; 2526 } 2527 2528 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, 2529 struct btrfsic_block *const block, 2530 int recursion_level) 2531 { 2532 struct list_head *elem_ref_to; 2533 int ret = 0; 2534 2535 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2536 /* 2537 * Note that this situation can happen and does not 2538 * indicate an error in regular cases. It happens 2539 * when disk blocks are freed and later reused. 2540 * The check-integrity module is not aware of any 2541 * block free operations, it just recognizes block 2542 * write operations. Therefore it keeps the linkage 2543 * information for a block until a block is 2544 * rewritten. This can temporarily cause incorrect 2545 * and even circular linkage informations. This 2546 * causes no harm unless such blocks are referenced 2547 * by the most recent super block. 2548 */ 2549 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2550 printk(KERN_INFO 2551 "btrfsic: abort cyclic linkage (case 1).\n"); 2552 2553 return ret; 2554 } 2555 2556 /* 2557 * This algorithm is recursive because the amount of used stack 2558 * space is very small and the max recursion depth is limited. 2559 */ 2560 list_for_each(elem_ref_to, &block->ref_to_list) { 2561 const struct btrfsic_block_link *const l = 2562 list_entry(elem_ref_to, struct btrfsic_block_link, 2563 node_ref_to); 2564 2565 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2566 printk(KERN_INFO 2567 "rl=%d, %c @%llu (%s/%llu/%d)" 2568 " %u* refers to %c @%llu (%s/%llu/%d)\n", 2569 recursion_level, 2570 btrfsic_get_block_type(state, block), 2571 (unsigned long long)block->logical_bytenr, 2572 block->dev_state->name, 2573 (unsigned long long)block->dev_bytenr, 2574 block->mirror_num, 2575 l->ref_cnt, 2576 btrfsic_get_block_type(state, l->block_ref_to), 2577 (unsigned long long) 2578 l->block_ref_to->logical_bytenr, 2579 l->block_ref_to->dev_state->name, 2580 (unsigned long long)l->block_ref_to->dev_bytenr, 2581 l->block_ref_to->mirror_num); 2582 if (l->block_ref_to->never_written) { 2583 printk(KERN_INFO "btrfs: attempt to write superblock" 2584 " which references block %c @%llu (%s/%llu/%d)" 2585 " which is never written!\n", 2586 btrfsic_get_block_type(state, l->block_ref_to), 2587 (unsigned long long) 2588 l->block_ref_to->logical_bytenr, 2589 l->block_ref_to->dev_state->name, 2590 (unsigned long long)l->block_ref_to->dev_bytenr, 2591 l->block_ref_to->mirror_num); 2592 ret = -1; 2593 } else if (!l->block_ref_to->is_iodone) { 2594 printk(KERN_INFO "btrfs: attempt to write superblock" 2595 " which references block %c @%llu (%s/%llu/%d)" 2596 " which is not yet iodone!\n", 2597 btrfsic_get_block_type(state, l->block_ref_to), 2598 (unsigned long long) 2599 l->block_ref_to->logical_bytenr, 2600 l->block_ref_to->dev_state->name, 2601 (unsigned long long)l->block_ref_to->dev_bytenr, 2602 l->block_ref_to->mirror_num); 2603 ret = -1; 2604 } else if (l->parent_generation != 2605 l->block_ref_to->generation && 2606 BTRFSIC_GENERATION_UNKNOWN != 2607 l->parent_generation && 2608 BTRFSIC_GENERATION_UNKNOWN != 2609 l->block_ref_to->generation) { 2610 printk(KERN_INFO "btrfs: attempt to write superblock" 2611 " which references block %c @%llu (%s/%llu/%d)" 2612 " with generation %llu !=" 2613 " parent generation %llu!\n", 2614 btrfsic_get_block_type(state, l->block_ref_to), 2615 (unsigned long long) 2616 l->block_ref_to->logical_bytenr, 2617 l->block_ref_to->dev_state->name, 2618 (unsigned long long)l->block_ref_to->dev_bytenr, 2619 l->block_ref_to->mirror_num, 2620 (unsigned long long)l->block_ref_to->generation, 2621 (unsigned long long)l->parent_generation); 2622 ret = -1; 2623 } else if (l->block_ref_to->flush_gen > 2624 l->block_ref_to->dev_state->last_flush_gen) { 2625 printk(KERN_INFO "btrfs: attempt to write superblock" 2626 " which references block %c @%llu (%s/%llu/%d)" 2627 " which is not flushed out of disk's write cache" 2628 " (block flush_gen=%llu," 2629 " dev->flush_gen=%llu)!\n", 2630 btrfsic_get_block_type(state, l->block_ref_to), 2631 (unsigned long long) 2632 l->block_ref_to->logical_bytenr, 2633 l->block_ref_to->dev_state->name, 2634 (unsigned long long)l->block_ref_to->dev_bytenr, 2635 l->block_ref_to->mirror_num, 2636 (unsigned long long)block->flush_gen, 2637 (unsigned long long) 2638 l->block_ref_to->dev_state->last_flush_gen); 2639 ret = -1; 2640 } else if (-1 == btrfsic_check_all_ref_blocks(state, 2641 l->block_ref_to, 2642 recursion_level + 2643 1)) { 2644 ret = -1; 2645 } 2646 } 2647 2648 return ret; 2649 } 2650 2651 static int btrfsic_is_block_ref_by_superblock( 2652 const struct btrfsic_state *state, 2653 const struct btrfsic_block *block, 2654 int recursion_level) 2655 { 2656 struct list_head *elem_ref_from; 2657 2658 if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { 2659 /* refer to comment at "abort cyclic linkage (case 1)" */ 2660 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2661 printk(KERN_INFO 2662 "btrfsic: abort cyclic linkage (case 2).\n"); 2663 2664 return 0; 2665 } 2666 2667 /* 2668 * This algorithm is recursive because the amount of used stack space 2669 * is very small and the max recursion depth is limited. 2670 */ 2671 list_for_each(elem_ref_from, &block->ref_from_list) { 2672 const struct btrfsic_block_link *const l = 2673 list_entry(elem_ref_from, struct btrfsic_block_link, 2674 node_ref_from); 2675 2676 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2677 printk(KERN_INFO 2678 "rl=%d, %c @%llu (%s/%llu/%d)" 2679 " is ref %u* from %c @%llu (%s/%llu/%d)\n", 2680 recursion_level, 2681 btrfsic_get_block_type(state, block), 2682 (unsigned long long)block->logical_bytenr, 2683 block->dev_state->name, 2684 (unsigned long long)block->dev_bytenr, 2685 block->mirror_num, 2686 l->ref_cnt, 2687 btrfsic_get_block_type(state, l->block_ref_from), 2688 (unsigned long long) 2689 l->block_ref_from->logical_bytenr, 2690 l->block_ref_from->dev_state->name, 2691 (unsigned long long) 2692 l->block_ref_from->dev_bytenr, 2693 l->block_ref_from->mirror_num); 2694 if (l->block_ref_from->is_superblock && 2695 state->latest_superblock->dev_bytenr == 2696 l->block_ref_from->dev_bytenr && 2697 state->latest_superblock->dev_state->bdev == 2698 l->block_ref_from->dev_state->bdev) 2699 return 1; 2700 else if (btrfsic_is_block_ref_by_superblock(state, 2701 l->block_ref_from, 2702 recursion_level + 2703 1)) 2704 return 1; 2705 } 2706 2707 return 0; 2708 } 2709 2710 static void btrfsic_print_add_link(const struct btrfsic_state *state, 2711 const struct btrfsic_block_link *l) 2712 { 2713 printk(KERN_INFO 2714 "Add %u* link from %c @%llu (%s/%llu/%d)" 2715 " to %c @%llu (%s/%llu/%d).\n", 2716 l->ref_cnt, 2717 btrfsic_get_block_type(state, l->block_ref_from), 2718 (unsigned long long)l->block_ref_from->logical_bytenr, 2719 l->block_ref_from->dev_state->name, 2720 (unsigned long long)l->block_ref_from->dev_bytenr, 2721 l->block_ref_from->mirror_num, 2722 btrfsic_get_block_type(state, l->block_ref_to), 2723 (unsigned long long)l->block_ref_to->logical_bytenr, 2724 l->block_ref_to->dev_state->name, 2725 (unsigned long long)l->block_ref_to->dev_bytenr, 2726 l->block_ref_to->mirror_num); 2727 } 2728 2729 static void btrfsic_print_rem_link(const struct btrfsic_state *state, 2730 const struct btrfsic_block_link *l) 2731 { 2732 printk(KERN_INFO 2733 "Rem %u* link from %c @%llu (%s/%llu/%d)" 2734 " to %c @%llu (%s/%llu/%d).\n", 2735 l->ref_cnt, 2736 btrfsic_get_block_type(state, l->block_ref_from), 2737 (unsigned long long)l->block_ref_from->logical_bytenr, 2738 l->block_ref_from->dev_state->name, 2739 (unsigned long long)l->block_ref_from->dev_bytenr, 2740 l->block_ref_from->mirror_num, 2741 btrfsic_get_block_type(state, l->block_ref_to), 2742 (unsigned long long)l->block_ref_to->logical_bytenr, 2743 l->block_ref_to->dev_state->name, 2744 (unsigned long long)l->block_ref_to->dev_bytenr, 2745 l->block_ref_to->mirror_num); 2746 } 2747 2748 static char btrfsic_get_block_type(const struct btrfsic_state *state, 2749 const struct btrfsic_block *block) 2750 { 2751 if (block->is_superblock && 2752 state->latest_superblock->dev_bytenr == block->dev_bytenr && 2753 state->latest_superblock->dev_state->bdev == block->dev_state->bdev) 2754 return 'S'; 2755 else if (block->is_superblock) 2756 return 's'; 2757 else if (block->is_metadata) 2758 return 'M'; 2759 else 2760 return 'D'; 2761 } 2762 2763 static void btrfsic_dump_tree(const struct btrfsic_state *state) 2764 { 2765 btrfsic_dump_tree_sub(state, state->latest_superblock, 0); 2766 } 2767 2768 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, 2769 const struct btrfsic_block *block, 2770 int indent_level) 2771 { 2772 struct list_head *elem_ref_to; 2773 int indent_add; 2774 static char buf[80]; 2775 int cursor_position; 2776 2777 /* 2778 * Should better fill an on-stack buffer with a complete line and 2779 * dump it at once when it is time to print a newline character. 2780 */ 2781 2782 /* 2783 * This algorithm is recursive because the amount of used stack space 2784 * is very small and the max recursion depth is limited. 2785 */ 2786 indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)", 2787 btrfsic_get_block_type(state, block), 2788 (unsigned long long)block->logical_bytenr, 2789 block->dev_state->name, 2790 (unsigned long long)block->dev_bytenr, 2791 block->mirror_num); 2792 if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2793 printk("[...]\n"); 2794 return; 2795 } 2796 printk(buf); 2797 indent_level += indent_add; 2798 if (list_empty(&block->ref_to_list)) { 2799 printk("\n"); 2800 return; 2801 } 2802 if (block->mirror_num > 1 && 2803 !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) { 2804 printk(" [...]\n"); 2805 return; 2806 } 2807 2808 cursor_position = indent_level; 2809 list_for_each(elem_ref_to, &block->ref_to_list) { 2810 const struct btrfsic_block_link *const l = 2811 list_entry(elem_ref_to, struct btrfsic_block_link, 2812 node_ref_to); 2813 2814 while (cursor_position < indent_level) { 2815 printk(" "); 2816 cursor_position++; 2817 } 2818 if (l->ref_cnt > 1) 2819 indent_add = sprintf(buf, " %d*--> ", l->ref_cnt); 2820 else 2821 indent_add = sprintf(buf, " --> "); 2822 if (indent_level + indent_add > 2823 BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) { 2824 printk("[...]\n"); 2825 cursor_position = 0; 2826 continue; 2827 } 2828 2829 printk(buf); 2830 2831 btrfsic_dump_tree_sub(state, l->block_ref_to, 2832 indent_level + indent_add); 2833 cursor_position = 0; 2834 } 2835 } 2836 2837 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add( 2838 struct btrfsic_state *state, 2839 struct btrfsic_block_data_ctx *next_block_ctx, 2840 struct btrfsic_block *next_block, 2841 struct btrfsic_block *from_block, 2842 u64 parent_generation) 2843 { 2844 struct btrfsic_block_link *l; 2845 2846 l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev, 2847 next_block_ctx->dev_bytenr, 2848 from_block->dev_state->bdev, 2849 from_block->dev_bytenr, 2850 &state->block_link_hashtable); 2851 if (NULL == l) { 2852 l = btrfsic_block_link_alloc(); 2853 if (NULL == l) { 2854 printk(KERN_INFO 2855 "btrfsic: error, kmalloc" " failed!\n"); 2856 return NULL; 2857 } 2858 2859 l->block_ref_to = next_block; 2860 l->block_ref_from = from_block; 2861 l->ref_cnt = 1; 2862 l->parent_generation = parent_generation; 2863 2864 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2865 btrfsic_print_add_link(state, l); 2866 2867 list_add(&l->node_ref_to, &from_block->ref_to_list); 2868 list_add(&l->node_ref_from, &next_block->ref_from_list); 2869 2870 btrfsic_block_link_hashtable_add(l, 2871 &state->block_link_hashtable); 2872 } else { 2873 l->ref_cnt++; 2874 l->parent_generation = parent_generation; 2875 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2876 btrfsic_print_add_link(state, l); 2877 } 2878 2879 return l; 2880 } 2881 2882 static struct btrfsic_block *btrfsic_block_lookup_or_add( 2883 struct btrfsic_state *state, 2884 struct btrfsic_block_data_ctx *block_ctx, 2885 const char *additional_string, 2886 int is_metadata, 2887 int is_iodone, 2888 int never_written, 2889 int mirror_num, 2890 int *was_created) 2891 { 2892 struct btrfsic_block *block; 2893 2894 block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev, 2895 block_ctx->dev_bytenr, 2896 &state->block_hashtable); 2897 if (NULL == block) { 2898 struct btrfsic_dev_state *dev_state; 2899 2900 block = btrfsic_block_alloc(); 2901 if (NULL == block) { 2902 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n"); 2903 return NULL; 2904 } 2905 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev); 2906 if (NULL == dev_state) { 2907 printk(KERN_INFO 2908 "btrfsic: error, lookup dev_state failed!\n"); 2909 btrfsic_block_free(block); 2910 return NULL; 2911 } 2912 block->dev_state = dev_state; 2913 block->dev_bytenr = block_ctx->dev_bytenr; 2914 block->logical_bytenr = block_ctx->start; 2915 block->is_metadata = is_metadata; 2916 block->is_iodone = is_iodone; 2917 block->never_written = never_written; 2918 block->mirror_num = mirror_num; 2919 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 2920 printk(KERN_INFO 2921 "New %s%c-block @%llu (%s/%llu/%d)\n", 2922 additional_string, 2923 btrfsic_get_block_type(state, block), 2924 (unsigned long long)block->logical_bytenr, 2925 dev_state->name, 2926 (unsigned long long)block->dev_bytenr, 2927 mirror_num); 2928 list_add(&block->all_blocks_node, &state->all_blocks_list); 2929 btrfsic_block_hashtable_add(block, &state->block_hashtable); 2930 if (NULL != was_created) 2931 *was_created = 1; 2932 } else { 2933 if (NULL != was_created) 2934 *was_created = 0; 2935 } 2936 2937 return block; 2938 } 2939 2940 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, 2941 u64 bytenr, 2942 struct btrfsic_dev_state *dev_state, 2943 u64 dev_bytenr) 2944 { 2945 int num_copies; 2946 int mirror_num; 2947 int ret; 2948 struct btrfsic_block_data_ctx block_ctx; 2949 int match = 0; 2950 2951 num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, 2952 bytenr, state->metablock_size); 2953 2954 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2955 ret = btrfsic_map_block(state, bytenr, state->metablock_size, 2956 &block_ctx, mirror_num); 2957 if (ret) { 2958 printk(KERN_INFO "btrfsic:" 2959 " btrfsic_map_block(logical @%llu," 2960 " mirror %d) failed!\n", 2961 (unsigned long long)bytenr, mirror_num); 2962 continue; 2963 } 2964 2965 if (dev_state->bdev == block_ctx.dev->bdev && 2966 dev_bytenr == block_ctx.dev_bytenr) { 2967 match++; 2968 btrfsic_release_block_ctx(&block_ctx); 2969 break; 2970 } 2971 btrfsic_release_block_ctx(&block_ctx); 2972 } 2973 2974 if (!match) { 2975 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio," 2976 " buffer->log_bytenr=%llu, submit_bio(bdev=%s," 2977 " phys_bytenr=%llu)!\n", 2978 (unsigned long long)bytenr, dev_state->name, 2979 (unsigned long long)dev_bytenr); 2980 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { 2981 ret = btrfsic_map_block(state, bytenr, 2982 state->metablock_size, 2983 &block_ctx, mirror_num); 2984 if (ret) 2985 continue; 2986 2987 printk(KERN_INFO "Read logical bytenr @%llu maps to" 2988 " (%s/%llu/%d)\n", 2989 (unsigned long long)bytenr, 2990 block_ctx.dev->name, 2991 (unsigned long long)block_ctx.dev_bytenr, 2992 mirror_num); 2993 } 2994 WARN_ON(1); 2995 } 2996 } 2997 2998 static struct btrfsic_dev_state *btrfsic_dev_state_lookup( 2999 struct block_device *bdev) 3000 { 3001 struct btrfsic_dev_state *ds; 3002 3003 ds = btrfsic_dev_state_hashtable_lookup(bdev, 3004 &btrfsic_dev_state_hashtable); 3005 return ds; 3006 } 3007 3008 int btrfsic_submit_bh(int rw, struct buffer_head *bh) 3009 { 3010 struct btrfsic_dev_state *dev_state; 3011 3012 if (!btrfsic_is_initialized) 3013 return submit_bh(rw, bh); 3014 3015 mutex_lock(&btrfsic_mutex); 3016 /* since btrfsic_submit_bh() might also be called before 3017 * btrfsic_mount(), this might return NULL */ 3018 dev_state = btrfsic_dev_state_lookup(bh->b_bdev); 3019 3020 /* Only called to write the superblock (incl. FLUSH/FUA) */ 3021 if (NULL != dev_state && 3022 (rw & WRITE) && bh->b_size > 0) { 3023 u64 dev_bytenr; 3024 3025 dev_bytenr = 4096 * bh->b_blocknr; 3026 if (dev_state->state->print_mask & 3027 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3028 printk(KERN_INFO 3029 "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu)," 3030 " size=%lu, data=%p, bdev=%p)\n", 3031 rw, (unsigned long)bh->b_blocknr, 3032 (unsigned long long)dev_bytenr, 3033 (unsigned long)bh->b_size, bh->b_data, 3034 bh->b_bdev); 3035 btrfsic_process_written_block(dev_state, dev_bytenr, 3036 &bh->b_data, 1, NULL, 3037 NULL, bh, rw); 3038 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3039 if (dev_state->state->print_mask & 3040 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3041 printk(KERN_INFO 3042 "submit_bh(rw=0x%x FLUSH, bdev=%p)\n", 3043 rw, bh->b_bdev); 3044 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3045 if ((dev_state->state->print_mask & 3046 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3047 BTRFSIC_PRINT_MASK_VERBOSE))) 3048 printk(KERN_INFO 3049 "btrfsic_submit_bh(%s) with FLUSH" 3050 " but dummy block already in use" 3051 " (ignored)!\n", 3052 dev_state->name); 3053 } else { 3054 struct btrfsic_block *const block = 3055 &dev_state->dummy_block_for_bio_bh_flush; 3056 3057 block->is_iodone = 0; 3058 block->never_written = 0; 3059 block->iodone_w_error = 0; 3060 block->flush_gen = dev_state->last_flush_gen + 1; 3061 block->submit_bio_bh_rw = rw; 3062 block->orig_bio_bh_private = bh->b_private; 3063 block->orig_bio_bh_end_io.bh = bh->b_end_io; 3064 block->next_in_same_bio = NULL; 3065 bh->b_private = block; 3066 bh->b_end_io = btrfsic_bh_end_io; 3067 } 3068 } 3069 mutex_unlock(&btrfsic_mutex); 3070 return submit_bh(rw, bh); 3071 } 3072 3073 void btrfsic_submit_bio(int rw, struct bio *bio) 3074 { 3075 struct btrfsic_dev_state *dev_state; 3076 3077 if (!btrfsic_is_initialized) { 3078 submit_bio(rw, bio); 3079 return; 3080 } 3081 3082 mutex_lock(&btrfsic_mutex); 3083 /* since btrfsic_submit_bio() is also called before 3084 * btrfsic_mount(), this might return NULL */ 3085 dev_state = btrfsic_dev_state_lookup(bio->bi_bdev); 3086 if (NULL != dev_state && 3087 (rw & WRITE) && NULL != bio->bi_io_vec) { 3088 unsigned int i; 3089 u64 dev_bytenr; 3090 int bio_is_patched; 3091 char **mapped_datav; 3092 3093 dev_bytenr = 512 * bio->bi_sector; 3094 bio_is_patched = 0; 3095 if (dev_state->state->print_mask & 3096 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3097 printk(KERN_INFO 3098 "submit_bio(rw=0x%x, bi_vcnt=%u," 3099 " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n", 3100 rw, bio->bi_vcnt, (unsigned long)bio->bi_sector, 3101 (unsigned long long)dev_bytenr, 3102 bio->bi_bdev); 3103 3104 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt, 3105 GFP_NOFS); 3106 if (!mapped_datav) 3107 goto leave; 3108 for (i = 0; i < bio->bi_vcnt; i++) { 3109 BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE); 3110 mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page); 3111 if (!mapped_datav[i]) { 3112 while (i > 0) { 3113 i--; 3114 kunmap(bio->bi_io_vec[i].bv_page); 3115 } 3116 kfree(mapped_datav); 3117 goto leave; 3118 } 3119 if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3120 BTRFSIC_PRINT_MASK_VERBOSE) == 3121 (dev_state->state->print_mask & 3122 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3123 BTRFSIC_PRINT_MASK_VERBOSE))) 3124 printk(KERN_INFO 3125 "#%u: page=%p, len=%u, offset=%u\n", 3126 i, bio->bi_io_vec[i].bv_page, 3127 bio->bi_io_vec[i].bv_len, 3128 bio->bi_io_vec[i].bv_offset); 3129 } 3130 btrfsic_process_written_block(dev_state, dev_bytenr, 3131 mapped_datav, bio->bi_vcnt, 3132 bio, &bio_is_patched, 3133 NULL, rw); 3134 while (i > 0) { 3135 i--; 3136 kunmap(bio->bi_io_vec[i].bv_page); 3137 } 3138 kfree(mapped_datav); 3139 } else if (NULL != dev_state && (rw & REQ_FLUSH)) { 3140 if (dev_state->state->print_mask & 3141 BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH) 3142 printk(KERN_INFO 3143 "submit_bio(rw=0x%x FLUSH, bdev=%p)\n", 3144 rw, bio->bi_bdev); 3145 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) { 3146 if ((dev_state->state->print_mask & 3147 (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH | 3148 BTRFSIC_PRINT_MASK_VERBOSE))) 3149 printk(KERN_INFO 3150 "btrfsic_submit_bio(%s) with FLUSH" 3151 " but dummy block already in use" 3152 " (ignored)!\n", 3153 dev_state->name); 3154 } else { 3155 struct btrfsic_block *const block = 3156 &dev_state->dummy_block_for_bio_bh_flush; 3157 3158 block->is_iodone = 0; 3159 block->never_written = 0; 3160 block->iodone_w_error = 0; 3161 block->flush_gen = dev_state->last_flush_gen + 1; 3162 block->submit_bio_bh_rw = rw; 3163 block->orig_bio_bh_private = bio->bi_private; 3164 block->orig_bio_bh_end_io.bio = bio->bi_end_io; 3165 block->next_in_same_bio = NULL; 3166 bio->bi_private = block; 3167 bio->bi_end_io = btrfsic_bio_end_io; 3168 } 3169 } 3170 leave: 3171 mutex_unlock(&btrfsic_mutex); 3172 3173 submit_bio(rw, bio); 3174 } 3175 3176 int btrfsic_mount(struct btrfs_root *root, 3177 struct btrfs_fs_devices *fs_devices, 3178 int including_extent_data, u32 print_mask) 3179 { 3180 int ret; 3181 struct btrfsic_state *state; 3182 struct list_head *dev_head = &fs_devices->devices; 3183 struct btrfs_device *device; 3184 3185 if (root->nodesize != root->leafsize) { 3186 printk(KERN_INFO 3187 "btrfsic: cannot handle nodesize %d != leafsize %d!\n", 3188 root->nodesize, root->leafsize); 3189 return -1; 3190 } 3191 if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) { 3192 printk(KERN_INFO 3193 "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3194 root->nodesize, (unsigned long)PAGE_CACHE_SIZE); 3195 return -1; 3196 } 3197 if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3198 printk(KERN_INFO 3199 "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3200 root->leafsize, (unsigned long)PAGE_CACHE_SIZE); 3201 return -1; 3202 } 3203 if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) { 3204 printk(KERN_INFO 3205 "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n", 3206 root->sectorsize, (unsigned long)PAGE_CACHE_SIZE); 3207 return -1; 3208 } 3209 state = kzalloc(sizeof(*state), GFP_NOFS); 3210 if (NULL == state) { 3211 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n"); 3212 return -1; 3213 } 3214 3215 if (!btrfsic_is_initialized) { 3216 mutex_init(&btrfsic_mutex); 3217 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable); 3218 btrfsic_is_initialized = 1; 3219 } 3220 mutex_lock(&btrfsic_mutex); 3221 state->root = root; 3222 state->print_mask = print_mask; 3223 state->include_extent_data = including_extent_data; 3224 state->csum_size = 0; 3225 state->metablock_size = root->nodesize; 3226 state->datablock_size = root->sectorsize; 3227 INIT_LIST_HEAD(&state->all_blocks_list); 3228 btrfsic_block_hashtable_init(&state->block_hashtable); 3229 btrfsic_block_link_hashtable_init(&state->block_link_hashtable); 3230 state->max_superblock_generation = 0; 3231 state->latest_superblock = NULL; 3232 3233 list_for_each_entry(device, dev_head, dev_list) { 3234 struct btrfsic_dev_state *ds; 3235 char *p; 3236 3237 if (!device->bdev || !device->name) 3238 continue; 3239 3240 ds = btrfsic_dev_state_alloc(); 3241 if (NULL == ds) { 3242 printk(KERN_INFO 3243 "btrfs check-integrity: kmalloc() failed!\n"); 3244 mutex_unlock(&btrfsic_mutex); 3245 return -1; 3246 } 3247 ds->bdev = device->bdev; 3248 ds->state = state; 3249 bdevname(ds->bdev, ds->name); 3250 ds->name[BDEVNAME_SIZE - 1] = '\0'; 3251 for (p = ds->name; *p != '\0'; p++); 3252 while (p > ds->name && *p != '/') 3253 p--; 3254 if (*p == '/') 3255 p++; 3256 strlcpy(ds->name, p, sizeof(ds->name)); 3257 btrfsic_dev_state_hashtable_add(ds, 3258 &btrfsic_dev_state_hashtable); 3259 } 3260 3261 ret = btrfsic_process_superblock(state, fs_devices); 3262 if (0 != ret) { 3263 mutex_unlock(&btrfsic_mutex); 3264 btrfsic_unmount(root, fs_devices); 3265 return ret; 3266 } 3267 3268 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE) 3269 btrfsic_dump_database(state); 3270 if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE) 3271 btrfsic_dump_tree(state); 3272 3273 mutex_unlock(&btrfsic_mutex); 3274 return 0; 3275 } 3276 3277 void btrfsic_unmount(struct btrfs_root *root, 3278 struct btrfs_fs_devices *fs_devices) 3279 { 3280 struct list_head *elem_all; 3281 struct list_head *tmp_all; 3282 struct btrfsic_state *state; 3283 struct list_head *dev_head = &fs_devices->devices; 3284 struct btrfs_device *device; 3285 3286 if (!btrfsic_is_initialized) 3287 return; 3288 3289 mutex_lock(&btrfsic_mutex); 3290 3291 state = NULL; 3292 list_for_each_entry(device, dev_head, dev_list) { 3293 struct btrfsic_dev_state *ds; 3294 3295 if (!device->bdev || !device->name) 3296 continue; 3297 3298 ds = btrfsic_dev_state_hashtable_lookup( 3299 device->bdev, 3300 &btrfsic_dev_state_hashtable); 3301 if (NULL != ds) { 3302 state = ds->state; 3303 btrfsic_dev_state_hashtable_remove(ds); 3304 btrfsic_dev_state_free(ds); 3305 } 3306 } 3307 3308 if (NULL == state) { 3309 printk(KERN_INFO 3310 "btrfsic: error, cannot find state information" 3311 " on umount!\n"); 3312 mutex_unlock(&btrfsic_mutex); 3313 return; 3314 } 3315 3316 /* 3317 * Don't care about keeping the lists' state up to date, 3318 * just free all memory that was allocated dynamically. 3319 * Free the blocks and the block_links. 3320 */ 3321 list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) { 3322 struct btrfsic_block *const b_all = 3323 list_entry(elem_all, struct btrfsic_block, 3324 all_blocks_node); 3325 struct list_head *elem_ref_to; 3326 struct list_head *tmp_ref_to; 3327 3328 list_for_each_safe(elem_ref_to, tmp_ref_to, 3329 &b_all->ref_to_list) { 3330 struct btrfsic_block_link *const l = 3331 list_entry(elem_ref_to, 3332 struct btrfsic_block_link, 3333 node_ref_to); 3334 3335 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) 3336 btrfsic_print_rem_link(state, l); 3337 3338 l->ref_cnt--; 3339 if (0 == l->ref_cnt) 3340 btrfsic_block_link_free(l); 3341 } 3342 3343 if (b_all->is_iodone || b_all->never_written) 3344 btrfsic_block_free(b_all); 3345 else 3346 printk(KERN_INFO "btrfs: attempt to free %c-block" 3347 " @%llu (%s/%llu/%d) on umount which is" 3348 " not yet iodone!\n", 3349 btrfsic_get_block_type(state, b_all), 3350 (unsigned long long)b_all->logical_bytenr, 3351 b_all->dev_state->name, 3352 (unsigned long long)b_all->dev_bytenr, 3353 b_all->mirror_num); 3354 } 3355 3356 mutex_unlock(&btrfsic_mutex); 3357 3358 kfree(state); 3359 } 3360