1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/bio.h> 20 #include <linux/buffer_head.h> 21 #include <asm/div64.h> 22 #include "ctree.h" 23 #include "extent_map.h" 24 #include "disk-io.h" 25 #include "transaction.h" 26 #include "print-tree.h" 27 #include "volumes.h" 28 29 struct stripe { 30 struct btrfs_device *dev; 31 u64 physical; 32 }; 33 34 struct multi_bio { 35 atomic_t stripes; 36 bio_end_io_t *end_io; 37 void *private; 38 int error; 39 }; 40 41 struct map_lookup { 42 u64 type; 43 int io_align; 44 int io_width; 45 int stripe_len; 46 int sector_size; 47 int num_stripes; 48 struct stripe stripes[]; 49 }; 50 51 #define map_lookup_size(n) (sizeof(struct map_lookup) + \ 52 (sizeof(struct stripe) * (n))) 53 54 static DEFINE_MUTEX(uuid_mutex); 55 static LIST_HEAD(fs_uuids); 56 57 int btrfs_cleanup_fs_uuids(void) 58 { 59 struct btrfs_fs_devices *fs_devices; 60 struct list_head *uuid_cur; 61 struct list_head *devices_cur; 62 struct btrfs_device *dev; 63 64 list_for_each(uuid_cur, &fs_uuids) { 65 fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, 66 list); 67 while(!list_empty(&fs_devices->devices)) { 68 devices_cur = fs_devices->devices.next; 69 dev = list_entry(devices_cur, struct btrfs_device, 70 dev_list); 71 printk("uuid cleanup finds %s\n", dev->name); 72 if (dev->bdev) { 73 printk("closing\n"); 74 close_bdev_excl(dev->bdev); 75 } 76 list_del(&dev->dev_list); 77 kfree(dev); 78 } 79 } 80 return 0; 81 } 82 83 static struct btrfs_device *__find_device(struct list_head *head, u64 devid) 84 { 85 struct btrfs_device *dev; 86 struct list_head *cur; 87 88 list_for_each(cur, head) { 89 dev = list_entry(cur, struct btrfs_device, dev_list); 90 if (dev->devid == devid) 91 return dev; 92 } 93 return NULL; 94 } 95 96 static struct btrfs_fs_devices *find_fsid(u8 *fsid) 97 { 98 struct list_head *cur; 99 struct btrfs_fs_devices *fs_devices; 100 101 list_for_each(cur, &fs_uuids) { 102 fs_devices = list_entry(cur, struct btrfs_fs_devices, list); 103 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) 104 return fs_devices; 105 } 106 return NULL; 107 } 108 109 static int device_list_add(const char *path, 110 struct btrfs_super_block *disk_super, 111 u64 devid, struct btrfs_fs_devices **fs_devices_ret) 112 { 113 struct btrfs_device *device; 114 struct btrfs_fs_devices *fs_devices; 115 u64 found_transid = btrfs_super_generation(disk_super); 116 117 fs_devices = find_fsid(disk_super->fsid); 118 if (!fs_devices) { 119 fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS); 120 if (!fs_devices) 121 return -ENOMEM; 122 INIT_LIST_HEAD(&fs_devices->devices); 123 list_add(&fs_devices->list, &fs_uuids); 124 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); 125 fs_devices->latest_devid = devid; 126 fs_devices->latest_trans = found_transid; 127 fs_devices->lowest_devid = (u64)-1; 128 fs_devices->num_devices = 0; 129 device = NULL; 130 } else { 131 device = __find_device(&fs_devices->devices, devid); 132 } 133 if (!device) { 134 device = kzalloc(sizeof(*device), GFP_NOFS); 135 if (!device) { 136 /* we can safely leave the fs_devices entry around */ 137 return -ENOMEM; 138 } 139 device->devid = devid; 140 device->name = kstrdup(path, GFP_NOFS); 141 if (!device->name) { 142 kfree(device); 143 return -ENOMEM; 144 } 145 list_add(&device->dev_list, &fs_devices->devices); 146 fs_devices->num_devices++; 147 } 148 149 if (found_transid > fs_devices->latest_trans) { 150 fs_devices->latest_devid = devid; 151 fs_devices->latest_trans = found_transid; 152 } 153 if (fs_devices->lowest_devid > devid) { 154 fs_devices->lowest_devid = devid; 155 printk("lowest devid now %Lu\n", devid); 156 } 157 *fs_devices_ret = fs_devices; 158 return 0; 159 } 160 161 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) 162 { 163 struct list_head *head = &fs_devices->devices; 164 struct list_head *cur; 165 struct btrfs_device *device; 166 167 mutex_lock(&uuid_mutex); 168 list_for_each(cur, head) { 169 device = list_entry(cur, struct btrfs_device, dev_list); 170 if (device->bdev) { 171 close_bdev_excl(device->bdev); 172 printk("close devices closes %s\n", device->name); 173 } 174 device->bdev = NULL; 175 } 176 mutex_unlock(&uuid_mutex); 177 return 0; 178 } 179 180 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 181 int flags, void *holder) 182 { 183 struct block_device *bdev; 184 struct list_head *head = &fs_devices->devices; 185 struct list_head *cur; 186 struct btrfs_device *device; 187 int ret; 188 189 mutex_lock(&uuid_mutex); 190 list_for_each(cur, head) { 191 device = list_entry(cur, struct btrfs_device, dev_list); 192 bdev = open_bdev_excl(device->name, flags, holder); 193 printk("opening %s devid %Lu\n", device->name, device->devid); 194 if (IS_ERR(bdev)) { 195 printk("open %s failed\n", device->name); 196 ret = PTR_ERR(bdev); 197 goto fail; 198 } 199 if (device->devid == fs_devices->latest_devid) 200 fs_devices->latest_bdev = bdev; 201 if (device->devid == fs_devices->lowest_devid) { 202 fs_devices->lowest_bdev = bdev; 203 printk("lowest bdev %s\n", device->name); 204 } 205 device->bdev = bdev; 206 } 207 mutex_unlock(&uuid_mutex); 208 return 0; 209 fail: 210 mutex_unlock(&uuid_mutex); 211 btrfs_close_devices(fs_devices); 212 return ret; 213 } 214 215 int btrfs_scan_one_device(const char *path, int flags, void *holder, 216 struct btrfs_fs_devices **fs_devices_ret) 217 { 218 struct btrfs_super_block *disk_super; 219 struct block_device *bdev; 220 struct buffer_head *bh; 221 int ret; 222 u64 devid; 223 224 mutex_lock(&uuid_mutex); 225 226 printk("scan one opens %s\n", path); 227 bdev = open_bdev_excl(path, flags, holder); 228 229 if (IS_ERR(bdev)) { 230 printk("open failed\n"); 231 ret = PTR_ERR(bdev); 232 goto error; 233 } 234 235 ret = set_blocksize(bdev, 4096); 236 if (ret) 237 goto error_close; 238 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); 239 if (!bh) { 240 ret = -EIO; 241 goto error_close; 242 } 243 disk_super = (struct btrfs_super_block *)bh->b_data; 244 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, 245 sizeof(disk_super->magic))) { 246 printk("no btrfs found on %s\n", path); 247 ret = -EINVAL; 248 goto error_brelse; 249 } 250 devid = le64_to_cpu(disk_super->dev_item.devid); 251 printk("found device %Lu on %s\n", devid, path); 252 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 253 254 error_brelse: 255 brelse(bh); 256 error_close: 257 close_bdev_excl(bdev); 258 printk("scan one closes bdev %s\n", path); 259 error: 260 mutex_unlock(&uuid_mutex); 261 return ret; 262 } 263 264 /* 265 * this uses a pretty simple search, the expectation is that it is 266 * called very infrequently and that a given device has a small number 267 * of extents 268 */ 269 static int find_free_dev_extent(struct btrfs_trans_handle *trans, 270 struct btrfs_device *device, 271 struct btrfs_path *path, 272 u64 num_bytes, u64 *start) 273 { 274 struct btrfs_key key; 275 struct btrfs_root *root = device->dev_root; 276 struct btrfs_dev_extent *dev_extent = NULL; 277 u64 hole_size = 0; 278 u64 last_byte = 0; 279 u64 search_start = 0; 280 u64 search_end = device->total_bytes; 281 int ret; 282 int slot = 0; 283 int start_found; 284 struct extent_buffer *l; 285 286 start_found = 0; 287 path->reada = 2; 288 289 /* FIXME use last free of some kind */ 290 291 /* we don't want to overwrite the superblock on the drive, 292 * so we make sure to start at an offset of at least 1MB 293 */ 294 search_start = max((u64)1024 * 1024, search_start); 295 key.objectid = device->devid; 296 key.offset = search_start; 297 key.type = BTRFS_DEV_EXTENT_KEY; 298 ret = btrfs_search_slot(trans, root, &key, path, 0, 0); 299 if (ret < 0) 300 goto error; 301 ret = btrfs_previous_item(root, path, 0, key.type); 302 if (ret < 0) 303 goto error; 304 l = path->nodes[0]; 305 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 306 while (1) { 307 l = path->nodes[0]; 308 slot = path->slots[0]; 309 if (slot >= btrfs_header_nritems(l)) { 310 ret = btrfs_next_leaf(root, path); 311 if (ret == 0) 312 continue; 313 if (ret < 0) 314 goto error; 315 no_more_items: 316 if (!start_found) { 317 if (search_start >= search_end) { 318 ret = -ENOSPC; 319 goto error; 320 } 321 *start = search_start; 322 start_found = 1; 323 goto check_pending; 324 } 325 *start = last_byte > search_start ? 326 last_byte : search_start; 327 if (search_end <= *start) { 328 ret = -ENOSPC; 329 goto error; 330 } 331 goto check_pending; 332 } 333 btrfs_item_key_to_cpu(l, &key, slot); 334 335 if (key.objectid < device->devid) 336 goto next; 337 338 if (key.objectid > device->devid) 339 goto no_more_items; 340 341 if (key.offset >= search_start && key.offset > last_byte && 342 start_found) { 343 if (last_byte < search_start) 344 last_byte = search_start; 345 hole_size = key.offset - last_byte; 346 if (key.offset > last_byte && 347 hole_size >= num_bytes) { 348 *start = last_byte; 349 goto check_pending; 350 } 351 } 352 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { 353 goto next; 354 } 355 356 start_found = 1; 357 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 358 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); 359 next: 360 path->slots[0]++; 361 cond_resched(); 362 } 363 check_pending: 364 /* we have to make sure we didn't find an extent that has already 365 * been allocated by the map tree or the original allocation 366 */ 367 btrfs_release_path(root, path); 368 BUG_ON(*start < search_start); 369 370 if (*start + num_bytes > search_end) { 371 ret = -ENOSPC; 372 goto error; 373 } 374 /* check for pending inserts here */ 375 return 0; 376 377 error: 378 btrfs_release_path(root, path); 379 return ret; 380 } 381 382 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, 383 struct btrfs_device *device, 384 u64 owner, u64 num_bytes, u64 *start) 385 { 386 int ret; 387 struct btrfs_path *path; 388 struct btrfs_root *root = device->dev_root; 389 struct btrfs_dev_extent *extent; 390 struct extent_buffer *leaf; 391 struct btrfs_key key; 392 393 path = btrfs_alloc_path(); 394 if (!path) 395 return -ENOMEM; 396 397 ret = find_free_dev_extent(trans, device, path, num_bytes, start); 398 if (ret) { 399 goto err; 400 } 401 402 key.objectid = device->devid; 403 key.offset = *start; 404 key.type = BTRFS_DEV_EXTENT_KEY; 405 ret = btrfs_insert_empty_item(trans, root, path, &key, 406 sizeof(*extent)); 407 BUG_ON(ret); 408 409 leaf = path->nodes[0]; 410 extent = btrfs_item_ptr(leaf, path->slots[0], 411 struct btrfs_dev_extent); 412 btrfs_set_dev_extent_owner(leaf, extent, owner); 413 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 414 btrfs_mark_buffer_dirty(leaf); 415 err: 416 btrfs_free_path(path); 417 return ret; 418 } 419 420 static int find_next_chunk(struct btrfs_root *root, u64 *objectid) 421 { 422 struct btrfs_path *path; 423 int ret; 424 struct btrfs_key key; 425 struct btrfs_key found_key; 426 427 path = btrfs_alloc_path(); 428 BUG_ON(!path); 429 430 key.objectid = (u64)-1; 431 key.offset = (u64)-1; 432 key.type = BTRFS_CHUNK_ITEM_KEY; 433 434 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 435 if (ret < 0) 436 goto error; 437 438 BUG_ON(ret == 0); 439 440 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); 441 if (ret) { 442 *objectid = 0; 443 } else { 444 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 445 path->slots[0]); 446 *objectid = found_key.objectid + found_key.offset; 447 } 448 ret = 0; 449 error: 450 btrfs_free_path(path); 451 return ret; 452 } 453 454 static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, 455 u64 *objectid) 456 { 457 int ret; 458 struct btrfs_key key; 459 struct btrfs_key found_key; 460 461 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 462 key.type = BTRFS_DEV_ITEM_KEY; 463 key.offset = (u64)-1; 464 465 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 466 if (ret < 0) 467 goto error; 468 469 BUG_ON(ret == 0); 470 471 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, 472 BTRFS_DEV_ITEM_KEY); 473 if (ret) { 474 *objectid = 1; 475 } else { 476 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 477 path->slots[0]); 478 *objectid = found_key.offset + 1; 479 } 480 ret = 0; 481 error: 482 btrfs_release_path(root, path); 483 return ret; 484 } 485 486 /* 487 * the device information is stored in the chunk root 488 * the btrfs_device struct should be fully filled in 489 */ 490 int btrfs_add_device(struct btrfs_trans_handle *trans, 491 struct btrfs_root *root, 492 struct btrfs_device *device) 493 { 494 int ret; 495 struct btrfs_path *path; 496 struct btrfs_dev_item *dev_item; 497 struct extent_buffer *leaf; 498 struct btrfs_key key; 499 unsigned long ptr; 500 u64 free_devid; 501 502 root = root->fs_info->chunk_root; 503 504 path = btrfs_alloc_path(); 505 if (!path) 506 return -ENOMEM; 507 508 ret = find_next_devid(root, path, &free_devid); 509 if (ret) 510 goto out; 511 512 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 513 key.type = BTRFS_DEV_ITEM_KEY; 514 key.offset = free_devid; 515 516 ret = btrfs_insert_empty_item(trans, root, path, &key, 517 sizeof(*dev_item)); 518 if (ret) 519 goto out; 520 521 leaf = path->nodes[0]; 522 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); 523 524 device->devid = free_devid; 525 btrfs_set_device_id(leaf, dev_item, device->devid); 526 btrfs_set_device_type(leaf, dev_item, device->type); 527 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 528 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 529 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 530 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 531 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 532 533 ptr = (unsigned long)btrfs_device_uuid(dev_item); 534 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); 535 btrfs_mark_buffer_dirty(leaf); 536 ret = 0; 537 538 out: 539 btrfs_free_path(path); 540 return ret; 541 } 542 int btrfs_update_device(struct btrfs_trans_handle *trans, 543 struct btrfs_device *device) 544 { 545 int ret; 546 struct btrfs_path *path; 547 struct btrfs_root *root; 548 struct btrfs_dev_item *dev_item; 549 struct extent_buffer *leaf; 550 struct btrfs_key key; 551 552 root = device->dev_root->fs_info->chunk_root; 553 554 path = btrfs_alloc_path(); 555 if (!path) 556 return -ENOMEM; 557 558 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 559 key.type = BTRFS_DEV_ITEM_KEY; 560 key.offset = device->devid; 561 562 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 563 if (ret < 0) 564 goto out; 565 566 if (ret > 0) { 567 ret = -ENOENT; 568 goto out; 569 } 570 571 leaf = path->nodes[0]; 572 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); 573 574 btrfs_set_device_id(leaf, dev_item, device->devid); 575 btrfs_set_device_type(leaf, dev_item, device->type); 576 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 577 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 578 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 579 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 580 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 581 btrfs_mark_buffer_dirty(leaf); 582 583 out: 584 btrfs_free_path(path); 585 return ret; 586 } 587 588 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, 589 struct btrfs_root *root, 590 struct btrfs_key *key, 591 struct btrfs_chunk *chunk, int item_size) 592 { 593 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 594 struct btrfs_disk_key disk_key; 595 u32 array_size; 596 u8 *ptr; 597 598 array_size = btrfs_super_sys_array_size(super_copy); 599 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) 600 return -EFBIG; 601 602 ptr = super_copy->sys_chunk_array + array_size; 603 btrfs_cpu_key_to_disk(&disk_key, key); 604 memcpy(ptr, &disk_key, sizeof(disk_key)); 605 ptr += sizeof(disk_key); 606 memcpy(ptr, chunk, item_size); 607 item_size += sizeof(disk_key); 608 btrfs_set_super_sys_array_size(super_copy, array_size + item_size); 609 return 0; 610 } 611 612 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 613 struct btrfs_root *extent_root, u64 *start, 614 u64 *num_bytes, u64 type) 615 { 616 u64 dev_offset; 617 struct btrfs_fs_info *info = extent_root->fs_info; 618 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; 619 struct btrfs_stripe *stripes; 620 struct btrfs_device *device = NULL; 621 struct btrfs_chunk *chunk; 622 struct list_head private_devs; 623 struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices; 624 struct list_head *cur; 625 struct extent_map_tree *em_tree; 626 struct map_lookup *map; 627 struct extent_map *em; 628 u64 physical; 629 u64 calc_size = 1024 * 1024 * 1024; 630 u64 min_free = calc_size; 631 u64 avail; 632 u64 max_avail = 0; 633 int num_stripes = 1; 634 int looped = 0; 635 int ret; 636 int index; 637 int stripe_len = 64 * 1024; 638 struct btrfs_key key; 639 640 if (list_empty(dev_list)) 641 return -ENOSPC; 642 643 if (type & (BTRFS_BLOCK_GROUP_RAID0)) 644 num_stripes = btrfs_super_num_devices(&info->super_copy); 645 if (type & (BTRFS_BLOCK_GROUP_DUP)) 646 num_stripes = 2; 647 if (type & (BTRFS_BLOCK_GROUP_RAID1)) { 648 num_stripes = min_t(u64, 2, 649 btrfs_super_num_devices(&info->super_copy)); 650 } 651 again: 652 INIT_LIST_HEAD(&private_devs); 653 cur = dev_list->next; 654 index = 0; 655 656 if (type & BTRFS_BLOCK_GROUP_DUP) 657 min_free = calc_size * 2; 658 659 /* build a private list of devices we will allocate from */ 660 while(index < num_stripes) { 661 device = list_entry(cur, struct btrfs_device, dev_list); 662 663 avail = device->total_bytes - device->bytes_used; 664 cur = cur->next; 665 if (avail > max_avail) 666 max_avail = avail; 667 if (avail >= min_free) { 668 list_move_tail(&device->dev_list, &private_devs); 669 index++; 670 if (type & BTRFS_BLOCK_GROUP_DUP) 671 index++; 672 } 673 if (cur == dev_list) 674 break; 675 } 676 if (index < num_stripes) { 677 list_splice(&private_devs, dev_list); 678 if (!looped && max_avail > 0) { 679 looped = 1; 680 calc_size = max_avail; 681 goto again; 682 } 683 return -ENOSPC; 684 } 685 686 ret = find_next_chunk(chunk_root, &key.objectid); 687 if (ret) 688 return ret; 689 690 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); 691 if (!chunk) 692 return -ENOMEM; 693 694 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 695 if (!map) { 696 kfree(chunk); 697 return -ENOMEM; 698 } 699 700 stripes = &chunk->stripe; 701 702 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) 703 *num_bytes = calc_size; 704 else 705 *num_bytes = calc_size * num_stripes; 706 707 index = 0; 708 printk("new chunk type %Lu start %Lu size %Lu\n", type, key.objectid, *num_bytes); 709 while(index < num_stripes) { 710 BUG_ON(list_empty(&private_devs)); 711 cur = private_devs.next; 712 device = list_entry(cur, struct btrfs_device, dev_list); 713 714 /* loop over this device again if we're doing a dup group */ 715 if (!(type & BTRFS_BLOCK_GROUP_DUP) || 716 (index == num_stripes - 1)) 717 list_move_tail(&device->dev_list, dev_list); 718 719 ret = btrfs_alloc_dev_extent(trans, device, 720 key.objectid, 721 calc_size, &dev_offset); 722 BUG_ON(ret); 723 printk("alloc chunk start %Lu size %Lu from dev %Lu type %Lu\n", key.objectid, calc_size, device->devid, type); 724 device->bytes_used += calc_size; 725 ret = btrfs_update_device(trans, device); 726 BUG_ON(ret); 727 728 map->stripes[index].dev = device; 729 map->stripes[index].physical = dev_offset; 730 btrfs_set_stack_stripe_devid(stripes + index, device->devid); 731 btrfs_set_stack_stripe_offset(stripes + index, dev_offset); 732 physical = dev_offset; 733 index++; 734 } 735 BUG_ON(!list_empty(&private_devs)); 736 737 /* key.objectid was set above */ 738 key.offset = *num_bytes; 739 key.type = BTRFS_CHUNK_ITEM_KEY; 740 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); 741 btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); 742 btrfs_set_stack_chunk_type(chunk, type); 743 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); 744 btrfs_set_stack_chunk_io_align(chunk, stripe_len); 745 btrfs_set_stack_chunk_io_width(chunk, stripe_len); 746 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); 747 map->sector_size = extent_root->sectorsize; 748 map->stripe_len = stripe_len; 749 map->io_align = stripe_len; 750 map->io_width = stripe_len; 751 map->type = type; 752 map->num_stripes = num_stripes; 753 754 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, 755 btrfs_chunk_item_size(num_stripes)); 756 BUG_ON(ret); 757 *start = key.objectid; 758 759 em = alloc_extent_map(GFP_NOFS); 760 if (!em) 761 return -ENOMEM; 762 em->bdev = (struct block_device *)map; 763 em->start = key.objectid; 764 em->len = key.offset; 765 em->block_start = 0; 766 767 kfree(chunk); 768 769 em_tree = &extent_root->fs_info->mapping_tree.map_tree; 770 spin_lock(&em_tree->lock); 771 ret = add_extent_mapping(em_tree, em); 772 BUG_ON(ret); 773 spin_unlock(&em_tree->lock); 774 free_extent_map(em); 775 return ret; 776 } 777 778 void btrfs_mapping_init(struct btrfs_mapping_tree *tree) 779 { 780 extent_map_tree_init(&tree->map_tree, GFP_NOFS); 781 } 782 783 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) 784 { 785 struct extent_map *em; 786 787 while(1) { 788 spin_lock(&tree->map_tree.lock); 789 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); 790 if (em) 791 remove_extent_mapping(&tree->map_tree, em); 792 spin_unlock(&tree->map_tree.lock); 793 if (!em) 794 break; 795 kfree(em->bdev); 796 /* once for us */ 797 free_extent_map(em); 798 /* once for the tree */ 799 free_extent_map(em); 800 } 801 } 802 803 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, 804 int dev_nr, u64 logical, u64 *phys, u64 *length, 805 struct btrfs_device **dev, int *total_devs) 806 { 807 struct extent_map *em; 808 struct map_lookup *map; 809 struct extent_map_tree *em_tree = &map_tree->map_tree; 810 u64 offset; 811 u64 stripe_offset; 812 u64 stripe_nr; 813 int stripe_index; 814 815 816 spin_lock(&em_tree->lock); 817 em = lookup_extent_mapping(em_tree, logical, *length); 818 BUG_ON(!em); 819 820 BUG_ON(em->start > logical || em->start + em->len < logical); 821 map = (struct map_lookup *)em->bdev; 822 offset = logical - em->start; 823 824 stripe_nr = offset; 825 /* 826 * stripe_nr counts the total number of stripes we have to stride 827 * to get to this block 828 */ 829 do_div(stripe_nr, map->stripe_len); 830 831 stripe_offset = stripe_nr * map->stripe_len; 832 BUG_ON(offset < stripe_offset); 833 834 /* stripe_offset is the offset of this block in its stripe*/ 835 stripe_offset = offset - stripe_offset; 836 837 if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 838 stripe_index = dev_nr; 839 if (rw & (1 << BIO_RW)) 840 *total_devs = map->num_stripes; 841 else { 842 int i; 843 u64 least = (u64)-1; 844 struct btrfs_device *cur; 845 846 for (i = 0; i < map->num_stripes; i++) { 847 cur = map->stripes[i].dev; 848 spin_lock(&cur->io_lock); 849 if (cur->total_ios < least) { 850 least = cur->total_ios; 851 stripe_index = i; 852 } 853 spin_unlock(&cur->io_lock); 854 } 855 *total_devs = 1; 856 } 857 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 858 if (rw == WRITE) { 859 *total_devs = map->num_stripes; 860 stripe_index = dev_nr; 861 } else { 862 stripe_index = 0; 863 *total_devs = 1; 864 } 865 } else { 866 /* 867 * after this do_div call, stripe_nr is the number of stripes 868 * on this device we have to walk to find the data, and 869 * stripe_index is the number of our device in the stripe array 870 */ 871 stripe_index = do_div(stripe_nr, map->num_stripes); 872 } 873 BUG_ON(stripe_index >= map->num_stripes); 874 *phys = map->stripes[stripe_index].physical + stripe_offset + 875 stripe_nr * map->stripe_len; 876 877 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 878 BTRFS_BLOCK_GROUP_DUP)) { 879 /* we limit the length of each bio to what fits in a stripe */ 880 *length = min_t(u64, em->len - offset, 881 map->stripe_len - stripe_offset); 882 } else { 883 *length = em->len - offset; 884 } 885 *dev = map->stripes[stripe_index].dev; 886 free_extent_map(em); 887 spin_unlock(&em_tree->lock); 888 return 0; 889 } 890 891 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23) 892 static void end_bio_multi_stripe(struct bio *bio, int err) 893 #else 894 static int end_bio_multi_stripe(struct bio *bio, 895 unsigned int bytes_done, int err) 896 #endif 897 { 898 struct multi_bio *multi = bio->bi_private; 899 900 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 901 if (bio->bi_size) 902 return 1; 903 #endif 904 if (err) 905 multi->error = err; 906 907 if (atomic_dec_and_test(&multi->stripes)) { 908 bio->bi_private = multi->private; 909 bio->bi_end_io = multi->end_io; 910 911 if (!err && multi->error) 912 err = multi->error; 913 kfree(multi); 914 915 bio_endio(bio, err); 916 } else { 917 bio_put(bio); 918 } 919 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) 920 return 0; 921 #endif 922 } 923 924 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) 925 { 926 struct btrfs_mapping_tree *map_tree; 927 struct btrfs_device *dev; 928 struct bio *first_bio = bio; 929 u64 logical = bio->bi_sector << 9; 930 u64 physical; 931 u64 length = 0; 932 u64 map_length; 933 struct bio_vec *bvec; 934 struct multi_bio *multi = NULL; 935 int i; 936 int ret; 937 int dev_nr = 0; 938 int total_devs = 1; 939 940 bio_for_each_segment(bvec, bio, i) { 941 length += bvec->bv_len; 942 } 943 944 map_tree = &root->fs_info->mapping_tree; 945 map_length = length; 946 while(dev_nr < total_devs) { 947 ret = btrfs_map_block(map_tree, rw, dev_nr, logical, 948 &physical, &map_length, &dev, 949 &total_devs); 950 if (map_length < length) { 951 printk("mapping failed logical %Lu bio len %Lu physical %Lu " 952 "len %Lu\n", logical, length, physical, map_length); 953 BUG(); 954 } 955 BUG_ON(map_length < length); 956 if (total_devs > 1) { 957 if (!multi) { 958 multi = kmalloc(sizeof(*multi), GFP_NOFS); 959 atomic_set(&multi->stripes, 1); 960 multi->end_io = bio->bi_end_io; 961 multi->private = first_bio->bi_private; 962 multi->error = 0; 963 } else { 964 atomic_inc(&multi->stripes); 965 } 966 if (dev_nr < total_devs - 1) { 967 bio = bio_clone(first_bio, GFP_NOFS); 968 BUG_ON(!bio); 969 } else { 970 bio = first_bio; 971 } 972 bio->bi_private = multi; 973 bio->bi_end_io = end_bio_multi_stripe; 974 } 975 bio->bi_sector = physical >> 9; 976 bio->bi_bdev = dev->bdev; 977 spin_lock(&dev->io_lock); 978 dev->total_ios++; 979 spin_unlock(&dev->io_lock); 980 submit_bio(rw, bio); 981 dev_nr++; 982 } 983 return 0; 984 } 985 986 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) 987 { 988 struct list_head *head = &root->fs_info->fs_devices->devices; 989 990 return __find_device(head, devid); 991 } 992 993 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, 994 struct extent_buffer *leaf, 995 struct btrfs_chunk *chunk) 996 { 997 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; 998 struct map_lookup *map; 999 struct extent_map *em; 1000 u64 logical; 1001 u64 length; 1002 u64 devid; 1003 int num_stripes; 1004 int ret; 1005 int i; 1006 1007 logical = key->objectid; 1008 length = key->offset; 1009 spin_lock(&map_tree->map_tree.lock); 1010 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); 1011 1012 /* already mapped? */ 1013 if (em && em->start <= logical && em->start + em->len > logical) { 1014 free_extent_map(em); 1015 spin_unlock(&map_tree->map_tree.lock); 1016 return 0; 1017 } else if (em) { 1018 free_extent_map(em); 1019 } 1020 spin_unlock(&map_tree->map_tree.lock); 1021 1022 map = kzalloc(sizeof(*map), GFP_NOFS); 1023 if (!map) 1024 return -ENOMEM; 1025 1026 em = alloc_extent_map(GFP_NOFS); 1027 if (!em) 1028 return -ENOMEM; 1029 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 1030 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 1031 if (!map) { 1032 free_extent_map(em); 1033 return -ENOMEM; 1034 } 1035 1036 em->bdev = (struct block_device *)map; 1037 em->start = logical; 1038 em->len = length; 1039 em->block_start = 0; 1040 1041 map->num_stripes = num_stripes; 1042 map->io_width = btrfs_chunk_io_width(leaf, chunk); 1043 map->io_align = btrfs_chunk_io_align(leaf, chunk); 1044 map->sector_size = btrfs_chunk_sector_size(leaf, chunk); 1045 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 1046 map->type = btrfs_chunk_type(leaf, chunk); 1047 for (i = 0; i < num_stripes; i++) { 1048 map->stripes[i].physical = 1049 btrfs_stripe_offset_nr(leaf, chunk, i); 1050 devid = btrfs_stripe_devid_nr(leaf, chunk, i); 1051 map->stripes[i].dev = btrfs_find_device(root, devid); 1052 if (!map->stripes[i].dev) { 1053 kfree(map); 1054 free_extent_map(em); 1055 return -EIO; 1056 } 1057 } 1058 1059 spin_lock(&map_tree->map_tree.lock); 1060 ret = add_extent_mapping(&map_tree->map_tree, em); 1061 BUG_ON(ret); 1062 spin_unlock(&map_tree->map_tree.lock); 1063 free_extent_map(em); 1064 1065 return 0; 1066 } 1067 1068 static int fill_device_from_item(struct extent_buffer *leaf, 1069 struct btrfs_dev_item *dev_item, 1070 struct btrfs_device *device) 1071 { 1072 unsigned long ptr; 1073 1074 device->devid = btrfs_device_id(leaf, dev_item); 1075 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); 1076 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); 1077 device->type = btrfs_device_type(leaf, dev_item); 1078 device->io_align = btrfs_device_io_align(leaf, dev_item); 1079 device->io_width = btrfs_device_io_width(leaf, dev_item); 1080 device->sector_size = btrfs_device_sector_size(leaf, dev_item); 1081 1082 ptr = (unsigned long)btrfs_device_uuid(dev_item); 1083 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); 1084 1085 return 0; 1086 } 1087 1088 static int read_one_dev(struct btrfs_root *root, 1089 struct extent_buffer *leaf, 1090 struct btrfs_dev_item *dev_item) 1091 { 1092 struct btrfs_device *device; 1093 u64 devid; 1094 int ret; 1095 1096 devid = btrfs_device_id(leaf, dev_item); 1097 device = btrfs_find_device(root, devid); 1098 if (!device) { 1099 printk("warning devid %Lu not found already\n", devid); 1100 device = kmalloc(sizeof(*device), GFP_NOFS); 1101 if (!device) 1102 return -ENOMEM; 1103 list_add(&device->dev_list, 1104 &root->fs_info->fs_devices->devices); 1105 device->total_ios = 0; 1106 spin_lock_init(&device->io_lock); 1107 } 1108 1109 fill_device_from_item(leaf, dev_item, device); 1110 device->dev_root = root->fs_info->dev_root; 1111 ret = 0; 1112 #if 0 1113 ret = btrfs_open_device(device); 1114 if (ret) { 1115 kfree(device); 1116 } 1117 #endif 1118 return ret; 1119 } 1120 1121 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) 1122 { 1123 struct btrfs_dev_item *dev_item; 1124 1125 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, 1126 dev_item); 1127 return read_one_dev(root, buf, dev_item); 1128 } 1129 1130 int btrfs_read_sys_array(struct btrfs_root *root) 1131 { 1132 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 1133 struct extent_buffer *sb = root->fs_info->sb_buffer; 1134 struct btrfs_disk_key *disk_key; 1135 struct btrfs_chunk *chunk; 1136 struct btrfs_key key; 1137 u32 num_stripes; 1138 u32 array_size; 1139 u32 len = 0; 1140 u8 *ptr; 1141 unsigned long sb_ptr; 1142 u32 cur; 1143 int ret; 1144 1145 array_size = btrfs_super_sys_array_size(super_copy); 1146 1147 /* 1148 * we do this loop twice, once for the device items and 1149 * once for all of the chunks. This way there are device 1150 * structs filled in for every chunk 1151 */ 1152 ptr = super_copy->sys_chunk_array; 1153 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); 1154 cur = 0; 1155 1156 while (cur < array_size) { 1157 disk_key = (struct btrfs_disk_key *)ptr; 1158 btrfs_disk_key_to_cpu(&key, disk_key); 1159 1160 len = sizeof(*disk_key); 1161 ptr += len; 1162 sb_ptr += len; 1163 cur += len; 1164 1165 if (key.type == BTRFS_CHUNK_ITEM_KEY) { 1166 chunk = (struct btrfs_chunk *)sb_ptr; 1167 ret = read_one_chunk(root, &key, sb, chunk); 1168 BUG_ON(ret); 1169 num_stripes = btrfs_chunk_num_stripes(sb, chunk); 1170 len = btrfs_chunk_item_size(num_stripes); 1171 } else { 1172 BUG(); 1173 } 1174 ptr += len; 1175 sb_ptr += len; 1176 cur += len; 1177 } 1178 return 0; 1179 } 1180 1181 int btrfs_read_chunk_tree(struct btrfs_root *root) 1182 { 1183 struct btrfs_path *path; 1184 struct extent_buffer *leaf; 1185 struct btrfs_key key; 1186 struct btrfs_key found_key; 1187 int ret; 1188 int slot; 1189 1190 root = root->fs_info->chunk_root; 1191 1192 path = btrfs_alloc_path(); 1193 if (!path) 1194 return -ENOMEM; 1195 1196 /* first we search for all of the device items, and then we 1197 * read in all of the chunk items. This way we can create chunk 1198 * mappings that reference all of the devices that are afound 1199 */ 1200 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 1201 key.offset = 0; 1202 key.type = 0; 1203 again: 1204 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1205 while(1) { 1206 leaf = path->nodes[0]; 1207 slot = path->slots[0]; 1208 if (slot >= btrfs_header_nritems(leaf)) { 1209 ret = btrfs_next_leaf(root, path); 1210 if (ret == 0) 1211 continue; 1212 if (ret < 0) 1213 goto error; 1214 break; 1215 } 1216 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1217 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 1218 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) 1219 break; 1220 if (found_key.type == BTRFS_DEV_ITEM_KEY) { 1221 struct btrfs_dev_item *dev_item; 1222 dev_item = btrfs_item_ptr(leaf, slot, 1223 struct btrfs_dev_item); 1224 ret = read_one_dev(root, leaf, dev_item); 1225 BUG_ON(ret); 1226 } 1227 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { 1228 struct btrfs_chunk *chunk; 1229 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 1230 ret = read_one_chunk(root, &found_key, leaf, chunk); 1231 } 1232 path->slots[0]++; 1233 } 1234 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 1235 key.objectid = 0; 1236 btrfs_release_path(root, path); 1237 goto again; 1238 } 1239 1240 btrfs_free_path(path); 1241 ret = 0; 1242 error: 1243 return ret; 1244 } 1245 1246