1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/bio.h> 20 #include <linux/buffer_head.h> 21 #include <asm/div64.h> 22 #include "ctree.h" 23 #include "extent_map.h" 24 #include "disk-io.h" 25 #include "transaction.h" 26 #include "print-tree.h" 27 #include "volumes.h" 28 29 struct stripe { 30 struct btrfs_device *dev; 31 u64 physical; 32 }; 33 34 struct map_lookup { 35 u64 type; 36 int io_align; 37 int io_width; 38 int stripe_len; 39 int sector_size; 40 int num_stripes; 41 struct stripe stripes[]; 42 }; 43 44 #define map_lookup_size(n) (sizeof(struct map_lookup) + \ 45 (sizeof(struct stripe) * (n))) 46 47 static DEFINE_MUTEX(uuid_mutex); 48 static LIST_HEAD(fs_uuids); 49 50 int btrfs_cleanup_fs_uuids(void) 51 { 52 struct btrfs_fs_devices *fs_devices; 53 struct list_head *uuid_cur; 54 struct list_head *devices_cur; 55 struct btrfs_device *dev; 56 57 list_for_each(uuid_cur, &fs_uuids) { 58 fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, 59 list); 60 while(!list_empty(&fs_devices->devices)) { 61 devices_cur = fs_devices->devices.next; 62 dev = list_entry(devices_cur, struct btrfs_device, 63 dev_list); 64 printk("uuid cleanup finds %s\n", dev->name); 65 if (dev->bdev) { 66 printk("closing\n"); 67 close_bdev_excl(dev->bdev); 68 } 69 list_del(&dev->dev_list); 70 kfree(dev); 71 } 72 } 73 return 0; 74 } 75 76 static struct btrfs_device *__find_device(struct list_head *head, u64 devid) 77 { 78 struct btrfs_device *dev; 79 struct list_head *cur; 80 81 list_for_each(cur, head) { 82 dev = list_entry(cur, struct btrfs_device, dev_list); 83 if (dev->devid == devid) 84 return dev; 85 } 86 return NULL; 87 } 88 89 static struct btrfs_fs_devices *find_fsid(u8 *fsid) 90 { 91 struct list_head *cur; 92 struct btrfs_fs_devices *fs_devices; 93 94 list_for_each(cur, &fs_uuids) { 95 fs_devices = list_entry(cur, struct btrfs_fs_devices, list); 96 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) 97 return fs_devices; 98 } 99 return NULL; 100 } 101 102 static int device_list_add(const char *path, 103 struct btrfs_super_block *disk_super, 104 u64 devid, struct btrfs_fs_devices **fs_devices_ret) 105 { 106 struct btrfs_device *device; 107 struct btrfs_fs_devices *fs_devices; 108 u64 found_transid = btrfs_super_generation(disk_super); 109 110 fs_devices = find_fsid(disk_super->fsid); 111 if (!fs_devices) { 112 fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS); 113 if (!fs_devices) 114 return -ENOMEM; 115 INIT_LIST_HEAD(&fs_devices->devices); 116 list_add(&fs_devices->list, &fs_uuids); 117 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); 118 fs_devices->latest_devid = devid; 119 fs_devices->latest_trans = found_transid; 120 fs_devices->lowest_devid = (u64)-1; 121 fs_devices->num_devices = 0; 122 device = NULL; 123 } else { 124 device = __find_device(&fs_devices->devices, devid); 125 } 126 if (!device) { 127 device = kzalloc(sizeof(*device), GFP_NOFS); 128 if (!device) { 129 /* we can safely leave the fs_devices entry around */ 130 return -ENOMEM; 131 } 132 device->devid = devid; 133 device->name = kstrdup(path, GFP_NOFS); 134 if (!device->name) { 135 kfree(device); 136 return -ENOMEM; 137 } 138 list_add(&device->dev_list, &fs_devices->devices); 139 fs_devices->num_devices++; 140 } 141 142 if (found_transid > fs_devices->latest_trans) { 143 fs_devices->latest_devid = devid; 144 fs_devices->latest_trans = found_transid; 145 } 146 if (fs_devices->lowest_devid > devid) { 147 fs_devices->lowest_devid = devid; 148 printk("lowest devid now %Lu\n", devid); 149 } 150 *fs_devices_ret = fs_devices; 151 return 0; 152 } 153 154 int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) 155 { 156 struct list_head *head = &fs_devices->devices; 157 struct list_head *cur; 158 struct btrfs_device *device; 159 160 mutex_lock(&uuid_mutex); 161 list_for_each(cur, head) { 162 device = list_entry(cur, struct btrfs_device, dev_list); 163 if (device->bdev) { 164 close_bdev_excl(device->bdev); 165 printk("close devices closes %s\n", device->name); 166 } 167 device->bdev = NULL; 168 } 169 mutex_unlock(&uuid_mutex); 170 return 0; 171 } 172 173 int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, 174 int flags, void *holder) 175 { 176 struct block_device *bdev; 177 struct list_head *head = &fs_devices->devices; 178 struct list_head *cur; 179 struct btrfs_device *device; 180 int ret; 181 182 mutex_lock(&uuid_mutex); 183 list_for_each(cur, head) { 184 device = list_entry(cur, struct btrfs_device, dev_list); 185 bdev = open_bdev_excl(device->name, flags, holder); 186 printk("opening %s devid %Lu\n", device->name, device->devid); 187 if (IS_ERR(bdev)) { 188 printk("open %s failed\n", device->name); 189 ret = PTR_ERR(bdev); 190 goto fail; 191 } 192 if (device->devid == fs_devices->latest_devid) 193 fs_devices->latest_bdev = bdev; 194 if (device->devid == fs_devices->lowest_devid) { 195 fs_devices->lowest_bdev = bdev; 196 printk("lowest bdev %s\n", device->name); 197 } 198 device->bdev = bdev; 199 } 200 mutex_unlock(&uuid_mutex); 201 return 0; 202 fail: 203 mutex_unlock(&uuid_mutex); 204 btrfs_close_devices(fs_devices); 205 return ret; 206 } 207 208 int btrfs_scan_one_device(const char *path, int flags, void *holder, 209 struct btrfs_fs_devices **fs_devices_ret) 210 { 211 struct btrfs_super_block *disk_super; 212 struct block_device *bdev; 213 struct buffer_head *bh; 214 int ret; 215 u64 devid; 216 217 mutex_lock(&uuid_mutex); 218 219 printk("scan one opens %s\n", path); 220 bdev = open_bdev_excl(path, flags, holder); 221 222 if (IS_ERR(bdev)) { 223 printk("open failed\n"); 224 ret = PTR_ERR(bdev); 225 goto error; 226 } 227 228 ret = set_blocksize(bdev, 4096); 229 if (ret) 230 goto error_close; 231 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); 232 if (!bh) { 233 ret = -EIO; 234 goto error_close; 235 } 236 disk_super = (struct btrfs_super_block *)bh->b_data; 237 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, 238 sizeof(disk_super->magic))) { 239 printk("no btrfs found on %s\n", path); 240 ret = -ENOENT; 241 goto error_brelse; 242 } 243 devid = le64_to_cpu(disk_super->dev_item.devid); 244 printk("found device %Lu on %s\n", devid, path); 245 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 246 247 error_brelse: 248 brelse(bh); 249 error_close: 250 close_bdev_excl(bdev); 251 printk("scan one closes bdev %s\n", path); 252 error: 253 mutex_unlock(&uuid_mutex); 254 return ret; 255 } 256 257 /* 258 * this uses a pretty simple search, the expectation is that it is 259 * called very infrequently and that a given device has a small number 260 * of extents 261 */ 262 static int find_free_dev_extent(struct btrfs_trans_handle *trans, 263 struct btrfs_device *device, 264 struct btrfs_path *path, 265 u64 num_bytes, u64 *start) 266 { 267 struct btrfs_key key; 268 struct btrfs_root *root = device->dev_root; 269 struct btrfs_dev_extent *dev_extent = NULL; 270 u64 hole_size = 0; 271 u64 last_byte = 0; 272 u64 search_start = 0; 273 u64 search_end = device->total_bytes; 274 int ret; 275 int slot = 0; 276 int start_found; 277 struct extent_buffer *l; 278 279 start_found = 0; 280 path->reada = 2; 281 282 /* FIXME use last free of some kind */ 283 284 /* we don't want to overwrite the superblock on the drive, 285 * so we make sure to start at an offset of at least 1MB 286 */ 287 search_start = max((u64)1024 * 1024, search_start); 288 key.objectid = device->devid; 289 key.offset = search_start; 290 key.type = BTRFS_DEV_EXTENT_KEY; 291 ret = btrfs_search_slot(trans, root, &key, path, 0, 0); 292 if (ret < 0) 293 goto error; 294 ret = btrfs_previous_item(root, path, 0, key.type); 295 if (ret < 0) 296 goto error; 297 l = path->nodes[0]; 298 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 299 while (1) { 300 l = path->nodes[0]; 301 slot = path->slots[0]; 302 if (slot >= btrfs_header_nritems(l)) { 303 ret = btrfs_next_leaf(root, path); 304 if (ret == 0) 305 continue; 306 if (ret < 0) 307 goto error; 308 no_more_items: 309 if (!start_found) { 310 if (search_start >= search_end) { 311 ret = -ENOSPC; 312 goto error; 313 } 314 *start = search_start; 315 start_found = 1; 316 goto check_pending; 317 } 318 *start = last_byte > search_start ? 319 last_byte : search_start; 320 if (search_end <= *start) { 321 ret = -ENOSPC; 322 goto error; 323 } 324 goto check_pending; 325 } 326 btrfs_item_key_to_cpu(l, &key, slot); 327 328 if (key.objectid < device->devid) 329 goto next; 330 331 if (key.objectid > device->devid) 332 goto no_more_items; 333 334 if (key.offset >= search_start && key.offset > last_byte && 335 start_found) { 336 if (last_byte < search_start) 337 last_byte = search_start; 338 hole_size = key.offset - last_byte; 339 if (key.offset > last_byte && 340 hole_size >= num_bytes) { 341 *start = last_byte; 342 goto check_pending; 343 } 344 } 345 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) { 346 goto next; 347 } 348 349 start_found = 1; 350 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); 351 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent); 352 next: 353 path->slots[0]++; 354 cond_resched(); 355 } 356 check_pending: 357 /* we have to make sure we didn't find an extent that has already 358 * been allocated by the map tree or the original allocation 359 */ 360 btrfs_release_path(root, path); 361 BUG_ON(*start < search_start); 362 363 if (*start + num_bytes > search_end) { 364 ret = -ENOSPC; 365 goto error; 366 } 367 /* check for pending inserts here */ 368 return 0; 369 370 error: 371 btrfs_release_path(root, path); 372 return ret; 373 } 374 375 int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, 376 struct btrfs_device *device, 377 u64 owner, u64 num_bytes, u64 *start) 378 { 379 int ret; 380 struct btrfs_path *path; 381 struct btrfs_root *root = device->dev_root; 382 struct btrfs_dev_extent *extent; 383 struct extent_buffer *leaf; 384 struct btrfs_key key; 385 386 path = btrfs_alloc_path(); 387 if (!path) 388 return -ENOMEM; 389 390 ret = find_free_dev_extent(trans, device, path, num_bytes, start); 391 if (ret) { 392 goto err; 393 } 394 395 key.objectid = device->devid; 396 key.offset = *start; 397 key.type = BTRFS_DEV_EXTENT_KEY; 398 ret = btrfs_insert_empty_item(trans, root, path, &key, 399 sizeof(*extent)); 400 BUG_ON(ret); 401 402 leaf = path->nodes[0]; 403 extent = btrfs_item_ptr(leaf, path->slots[0], 404 struct btrfs_dev_extent); 405 btrfs_set_dev_extent_owner(leaf, extent, owner); 406 btrfs_set_dev_extent_length(leaf, extent, num_bytes); 407 btrfs_mark_buffer_dirty(leaf); 408 err: 409 btrfs_free_path(path); 410 return ret; 411 } 412 413 static int find_next_chunk(struct btrfs_root *root, u64 *objectid) 414 { 415 struct btrfs_path *path; 416 int ret; 417 struct btrfs_key key; 418 struct btrfs_key found_key; 419 420 path = btrfs_alloc_path(); 421 BUG_ON(!path); 422 423 key.objectid = (u64)-1; 424 key.offset = (u64)-1; 425 key.type = BTRFS_CHUNK_ITEM_KEY; 426 427 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 428 if (ret < 0) 429 goto error; 430 431 BUG_ON(ret == 0); 432 433 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY); 434 if (ret) { 435 *objectid = 0; 436 } else { 437 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 438 path->slots[0]); 439 *objectid = found_key.objectid + found_key.offset; 440 } 441 ret = 0; 442 error: 443 btrfs_free_path(path); 444 return ret; 445 } 446 447 static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path, 448 u64 *objectid) 449 { 450 int ret; 451 struct btrfs_key key; 452 struct btrfs_key found_key; 453 454 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 455 key.type = BTRFS_DEV_ITEM_KEY; 456 key.offset = (u64)-1; 457 458 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 459 if (ret < 0) 460 goto error; 461 462 BUG_ON(ret == 0); 463 464 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, 465 BTRFS_DEV_ITEM_KEY); 466 if (ret) { 467 *objectid = 1; 468 } else { 469 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 470 path->slots[0]); 471 *objectid = found_key.offset + 1; 472 } 473 ret = 0; 474 error: 475 btrfs_release_path(root, path); 476 return ret; 477 } 478 479 /* 480 * the device information is stored in the chunk root 481 * the btrfs_device struct should be fully filled in 482 */ 483 int btrfs_add_device(struct btrfs_trans_handle *trans, 484 struct btrfs_root *root, 485 struct btrfs_device *device) 486 { 487 int ret; 488 struct btrfs_path *path; 489 struct btrfs_dev_item *dev_item; 490 struct extent_buffer *leaf; 491 struct btrfs_key key; 492 unsigned long ptr; 493 u64 free_devid; 494 495 root = root->fs_info->chunk_root; 496 497 path = btrfs_alloc_path(); 498 if (!path) 499 return -ENOMEM; 500 501 ret = find_next_devid(root, path, &free_devid); 502 if (ret) 503 goto out; 504 505 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 506 key.type = BTRFS_DEV_ITEM_KEY; 507 key.offset = free_devid; 508 509 ret = btrfs_insert_empty_item(trans, root, path, &key, 510 sizeof(*dev_item)); 511 if (ret) 512 goto out; 513 514 leaf = path->nodes[0]; 515 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); 516 517 device->devid = free_devid; 518 btrfs_set_device_id(leaf, dev_item, device->devid); 519 btrfs_set_device_type(leaf, dev_item, device->type); 520 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 521 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 522 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 523 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 524 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 525 526 ptr = (unsigned long)btrfs_device_uuid(dev_item); 527 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); 528 btrfs_mark_buffer_dirty(leaf); 529 ret = 0; 530 531 out: 532 btrfs_free_path(path); 533 return ret; 534 } 535 int btrfs_update_device(struct btrfs_trans_handle *trans, 536 struct btrfs_device *device) 537 { 538 int ret; 539 struct btrfs_path *path; 540 struct btrfs_root *root; 541 struct btrfs_dev_item *dev_item; 542 struct extent_buffer *leaf; 543 struct btrfs_key key; 544 545 root = device->dev_root->fs_info->chunk_root; 546 547 path = btrfs_alloc_path(); 548 if (!path) 549 return -ENOMEM; 550 551 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 552 key.type = BTRFS_DEV_ITEM_KEY; 553 key.offset = device->devid; 554 555 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 556 if (ret < 0) 557 goto out; 558 559 if (ret > 0) { 560 ret = -ENOENT; 561 goto out; 562 } 563 564 leaf = path->nodes[0]; 565 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); 566 567 btrfs_set_device_id(leaf, dev_item, device->devid); 568 btrfs_set_device_type(leaf, dev_item, device->type); 569 btrfs_set_device_io_align(leaf, dev_item, device->io_align); 570 btrfs_set_device_io_width(leaf, dev_item, device->io_width); 571 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size); 572 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes); 573 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used); 574 btrfs_mark_buffer_dirty(leaf); 575 576 out: 577 btrfs_free_path(path); 578 return ret; 579 } 580 581 int btrfs_add_system_chunk(struct btrfs_trans_handle *trans, 582 struct btrfs_root *root, 583 struct btrfs_key *key, 584 struct btrfs_chunk *chunk, int item_size) 585 { 586 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 587 struct btrfs_disk_key disk_key; 588 u32 array_size; 589 u8 *ptr; 590 591 array_size = btrfs_super_sys_array_size(super_copy); 592 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) 593 return -EFBIG; 594 595 ptr = super_copy->sys_chunk_array + array_size; 596 btrfs_cpu_key_to_disk(&disk_key, key); 597 memcpy(ptr, &disk_key, sizeof(disk_key)); 598 ptr += sizeof(disk_key); 599 memcpy(ptr, chunk, item_size); 600 item_size += sizeof(disk_key); 601 btrfs_set_super_sys_array_size(super_copy, array_size + item_size); 602 return 0; 603 } 604 605 int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 606 struct btrfs_root *extent_root, u64 *start, 607 u64 *num_bytes, u64 type) 608 { 609 u64 dev_offset; 610 struct btrfs_fs_info *info = extent_root->fs_info; 611 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; 612 struct btrfs_stripe *stripes; 613 struct btrfs_device *device = NULL; 614 struct btrfs_chunk *chunk; 615 struct list_head private_devs; 616 struct list_head *dev_list = &extent_root->fs_info->fs_devices->devices; 617 struct list_head *cur; 618 struct extent_map_tree *em_tree; 619 struct map_lookup *map; 620 struct extent_map *em; 621 u64 physical; 622 u64 calc_size = 1024 * 1024 * 1024; 623 u64 avail; 624 u64 max_avail = 0; 625 int num_stripes = 1; 626 int looped = 0; 627 int ret; 628 int index; 629 int stripe_len = 64 * 1024; 630 struct btrfs_key key; 631 632 if (list_empty(dev_list)) 633 return -ENOSPC; 634 635 if (type & BTRFS_BLOCK_GROUP_RAID0) 636 num_stripes = btrfs_super_num_devices(&info->super_copy); 637 if (type & BTRFS_BLOCK_GROUP_DATA) 638 stripe_len = 64 * 1024; 639 if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) 640 stripe_len = 32 * 1024; 641 again: 642 INIT_LIST_HEAD(&private_devs); 643 cur = dev_list->next; 644 index = 0; 645 /* build a private list of devices we will allocate from */ 646 while(index < num_stripes) { 647 device = list_entry(cur, struct btrfs_device, dev_list); 648 avail = device->total_bytes - device->bytes_used; 649 cur = cur->next; 650 if (avail > max_avail) 651 max_avail = avail; 652 if (avail >= calc_size) { 653 list_move_tail(&device->dev_list, &private_devs); 654 index++; 655 } 656 if (cur == dev_list) 657 break; 658 } 659 if (index < num_stripes) { 660 list_splice(&private_devs, dev_list); 661 if (!looped && max_avail > 0) { 662 looped = 1; 663 calc_size = max_avail; 664 goto again; 665 } 666 return -ENOSPC; 667 } 668 669 ret = find_next_chunk(chunk_root, &key.objectid); 670 if (ret) 671 return ret; 672 673 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS); 674 if (!chunk) 675 return -ENOMEM; 676 677 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 678 if (!map) { 679 kfree(chunk); 680 return -ENOMEM; 681 } 682 683 stripes = &chunk->stripe; 684 685 *num_bytes = calc_size * num_stripes; 686 index = 0; 687 while(index < num_stripes) { 688 BUG_ON(list_empty(&private_devs)); 689 cur = private_devs.next; 690 device = list_entry(cur, struct btrfs_device, dev_list); 691 list_move_tail(&device->dev_list, dev_list); 692 693 ret = btrfs_alloc_dev_extent(trans, device, 694 key.objectid, 695 calc_size, &dev_offset); 696 BUG_ON(ret); 697 printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); 698 device->bytes_used += calc_size; 699 ret = btrfs_update_device(trans, device); 700 BUG_ON(ret); 701 702 map->stripes[index].dev = device; 703 map->stripes[index].physical = dev_offset; 704 btrfs_set_stack_stripe_devid(stripes + index, device->devid); 705 btrfs_set_stack_stripe_offset(stripes + index, dev_offset); 706 physical = dev_offset; 707 index++; 708 } 709 BUG_ON(!list_empty(&private_devs)); 710 711 /* key.objectid was set above */ 712 key.offset = *num_bytes; 713 key.type = BTRFS_CHUNK_ITEM_KEY; 714 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); 715 btrfs_set_stack_chunk_stripe_len(chunk, stripe_len); 716 btrfs_set_stack_chunk_type(chunk, type); 717 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); 718 btrfs_set_stack_chunk_io_align(chunk, stripe_len); 719 btrfs_set_stack_chunk_io_width(chunk, stripe_len); 720 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); 721 map->sector_size = extent_root->sectorsize; 722 map->stripe_len = stripe_len; 723 map->io_align = stripe_len; 724 map->io_width = stripe_len; 725 map->type = type; 726 map->num_stripes = num_stripes; 727 728 ret = btrfs_insert_item(trans, chunk_root, &key, chunk, 729 btrfs_chunk_item_size(num_stripes)); 730 BUG_ON(ret); 731 *start = key.objectid; 732 733 em = alloc_extent_map(GFP_NOFS); 734 if (!em) 735 return -ENOMEM; 736 em->bdev = (struct block_device *)map; 737 em->start = key.objectid; 738 em->len = key.offset; 739 em->block_start = 0; 740 741 kfree(chunk); 742 743 em_tree = &extent_root->fs_info->mapping_tree.map_tree; 744 spin_lock(&em_tree->lock); 745 ret = add_extent_mapping(em_tree, em); 746 BUG_ON(ret); 747 spin_unlock(&em_tree->lock); 748 free_extent_map(em); 749 return ret; 750 } 751 752 void btrfs_mapping_init(struct btrfs_mapping_tree *tree) 753 { 754 extent_map_tree_init(&tree->map_tree, GFP_NOFS); 755 } 756 757 void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) 758 { 759 struct extent_map *em; 760 761 while(1) { 762 spin_lock(&tree->map_tree.lock); 763 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1); 764 if (em) 765 remove_extent_mapping(&tree->map_tree, em); 766 spin_unlock(&tree->map_tree.lock); 767 if (!em) 768 break; 769 kfree(em->bdev); 770 /* once for us */ 771 free_extent_map(em); 772 /* once for the tree */ 773 free_extent_map(em); 774 } 775 } 776 777 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, 778 u64 logical, u64 *phys, u64 *length, 779 struct btrfs_device **dev) 780 { 781 struct extent_map *em; 782 struct map_lookup *map; 783 struct extent_map_tree *em_tree = &map_tree->map_tree; 784 u64 offset; 785 u64 stripe_offset; 786 u64 stripe_nr; 787 int stripe_index; 788 789 790 spin_lock(&em_tree->lock); 791 em = lookup_extent_mapping(em_tree, logical, *length); 792 BUG_ON(!em); 793 794 BUG_ON(em->start > logical || em->start + em->len < logical); 795 map = (struct map_lookup *)em->bdev; 796 offset = logical - em->start; 797 798 stripe_nr = offset; 799 /* 800 * stripe_nr counts the total number of stripes we have to stride 801 * to get to this block 802 */ 803 do_div(stripe_nr, map->stripe_len); 804 805 stripe_offset = stripe_nr * map->stripe_len; 806 BUG_ON(offset < stripe_offset); 807 808 /* stripe_offset is the offset of this block in its stripe*/ 809 stripe_offset = offset - stripe_offset; 810 811 /* 812 * after this do_div call, stripe_nr is the number of stripes 813 * on this device we have to walk to find the data, and 814 * stripe_index is the number of our device in the stripe array 815 */ 816 stripe_index = do_div(stripe_nr, map->num_stripes); 817 818 BUG_ON(stripe_index >= map->num_stripes); 819 820 *phys = map->stripes[stripe_index].physical + stripe_offset + 821 stripe_nr * map->stripe_len; 822 823 if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 824 /* we limit the length of each bio to what fits in a stripe */ 825 *length = min_t(u64, em->len - offset, 826 map->stripe_len - stripe_offset); 827 } else { 828 *length = em->len - offset; 829 } 830 *dev = map->stripes[stripe_index].dev; 831 free_extent_map(em); 832 spin_unlock(&em_tree->lock); 833 return 0; 834 } 835 836 int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio) 837 { 838 struct btrfs_mapping_tree *map_tree; 839 struct btrfs_device *dev; 840 u64 logical = bio->bi_sector << 9; 841 u64 physical; 842 u64 length = 0; 843 u64 map_length; 844 struct bio_vec *bvec; 845 int i; 846 int ret; 847 848 bio_for_each_segment(bvec, bio, i) { 849 length += bvec->bv_len; 850 } 851 map_tree = &root->fs_info->mapping_tree; 852 map_length = length; 853 ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); 854 if (map_length < length) { 855 printk("mapping failed logical %Lu bio len %Lu physical %Lu " 856 "len %Lu\n", logical, length, physical, map_length); 857 BUG(); 858 } 859 BUG_ON(map_length < length); 860 bio->bi_sector = physical >> 9; 861 bio->bi_bdev = dev->bdev; 862 submit_bio(rw, bio); 863 return 0; 864 } 865 866 struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid) 867 { 868 struct list_head *head = &root->fs_info->fs_devices->devices; 869 870 return __find_device(head, devid); 871 } 872 873 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, 874 struct extent_buffer *leaf, 875 struct btrfs_chunk *chunk) 876 { 877 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; 878 struct map_lookup *map; 879 struct extent_map *em; 880 u64 logical; 881 u64 length; 882 u64 devid; 883 int num_stripes; 884 int ret; 885 int i; 886 887 logical = key->objectid; 888 length = key->offset; 889 spin_lock(&map_tree->map_tree.lock); 890 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1); 891 892 /* already mapped? */ 893 if (em && em->start <= logical && em->start + em->len > logical) { 894 free_extent_map(em); 895 spin_unlock(&map_tree->map_tree.lock); 896 return 0; 897 } else if (em) { 898 free_extent_map(em); 899 } 900 spin_unlock(&map_tree->map_tree.lock); 901 902 map = kzalloc(sizeof(*map), GFP_NOFS); 903 if (!map) 904 return -ENOMEM; 905 906 em = alloc_extent_map(GFP_NOFS); 907 if (!em) 908 return -ENOMEM; 909 num_stripes = btrfs_chunk_num_stripes(leaf, chunk); 910 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); 911 if (!map) { 912 free_extent_map(em); 913 return -ENOMEM; 914 } 915 916 em->bdev = (struct block_device *)map; 917 em->start = logical; 918 em->len = length; 919 em->block_start = 0; 920 921 map->num_stripes = num_stripes; 922 map->io_width = btrfs_chunk_io_width(leaf, chunk); 923 map->io_align = btrfs_chunk_io_align(leaf, chunk); 924 map->sector_size = btrfs_chunk_sector_size(leaf, chunk); 925 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk); 926 map->type = btrfs_chunk_type(leaf, chunk); 927 for (i = 0; i < num_stripes; i++) { 928 map->stripes[i].physical = 929 btrfs_stripe_offset_nr(leaf, chunk, i); 930 devid = btrfs_stripe_devid_nr(leaf, chunk, i); 931 map->stripes[i].dev = btrfs_find_device(root, devid); 932 if (!map->stripes[i].dev) { 933 kfree(map); 934 free_extent_map(em); 935 return -EIO; 936 } 937 } 938 939 spin_lock(&map_tree->map_tree.lock); 940 ret = add_extent_mapping(&map_tree->map_tree, em); 941 BUG_ON(ret); 942 spin_unlock(&map_tree->map_tree.lock); 943 free_extent_map(em); 944 945 return 0; 946 } 947 948 static int fill_device_from_item(struct extent_buffer *leaf, 949 struct btrfs_dev_item *dev_item, 950 struct btrfs_device *device) 951 { 952 unsigned long ptr; 953 954 device->devid = btrfs_device_id(leaf, dev_item); 955 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item); 956 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item); 957 device->type = btrfs_device_type(leaf, dev_item); 958 device->io_align = btrfs_device_io_align(leaf, dev_item); 959 device->io_width = btrfs_device_io_width(leaf, dev_item); 960 device->sector_size = btrfs_device_sector_size(leaf, dev_item); 961 962 ptr = (unsigned long)btrfs_device_uuid(dev_item); 963 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_DEV_UUID_SIZE); 964 965 return 0; 966 } 967 968 static int read_one_dev(struct btrfs_root *root, 969 struct extent_buffer *leaf, 970 struct btrfs_dev_item *dev_item) 971 { 972 struct btrfs_device *device; 973 u64 devid; 974 int ret; 975 976 devid = btrfs_device_id(leaf, dev_item); 977 device = btrfs_find_device(root, devid); 978 if (!device) { 979 printk("warning devid %Lu not found already\n", devid); 980 device = kmalloc(sizeof(*device), GFP_NOFS); 981 if (!device) 982 return -ENOMEM; 983 list_add(&device->dev_list, 984 &root->fs_info->fs_devices->devices); 985 } 986 987 fill_device_from_item(leaf, dev_item, device); 988 device->dev_root = root->fs_info->dev_root; 989 ret = 0; 990 #if 0 991 ret = btrfs_open_device(device); 992 if (ret) { 993 kfree(device); 994 } 995 #endif 996 return ret; 997 } 998 999 int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf) 1000 { 1001 struct btrfs_dev_item *dev_item; 1002 1003 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block, 1004 dev_item); 1005 return read_one_dev(root, buf, dev_item); 1006 } 1007 1008 int btrfs_read_sys_array(struct btrfs_root *root) 1009 { 1010 struct btrfs_super_block *super_copy = &root->fs_info->super_copy; 1011 struct extent_buffer *sb = root->fs_info->sb_buffer; 1012 struct btrfs_disk_key *disk_key; 1013 struct btrfs_chunk *chunk; 1014 struct btrfs_key key; 1015 u32 num_stripes; 1016 u32 array_size; 1017 u32 len = 0; 1018 u8 *ptr; 1019 unsigned long sb_ptr; 1020 u32 cur; 1021 int ret; 1022 1023 array_size = btrfs_super_sys_array_size(super_copy); 1024 1025 /* 1026 * we do this loop twice, once for the device items and 1027 * once for all of the chunks. This way there are device 1028 * structs filled in for every chunk 1029 */ 1030 ptr = super_copy->sys_chunk_array; 1031 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array); 1032 cur = 0; 1033 1034 while (cur < array_size) { 1035 disk_key = (struct btrfs_disk_key *)ptr; 1036 btrfs_disk_key_to_cpu(&key, disk_key); 1037 1038 len = sizeof(*disk_key); 1039 ptr += len; 1040 sb_ptr += len; 1041 cur += len; 1042 1043 if (key.type == BTRFS_CHUNK_ITEM_KEY) { 1044 chunk = (struct btrfs_chunk *)sb_ptr; 1045 ret = read_one_chunk(root, &key, sb, chunk); 1046 BUG_ON(ret); 1047 num_stripes = btrfs_chunk_num_stripes(sb, chunk); 1048 len = btrfs_chunk_item_size(num_stripes); 1049 } else { 1050 BUG(); 1051 } 1052 ptr += len; 1053 sb_ptr += len; 1054 cur += len; 1055 } 1056 return 0; 1057 } 1058 1059 int btrfs_read_chunk_tree(struct btrfs_root *root) 1060 { 1061 struct btrfs_path *path; 1062 struct extent_buffer *leaf; 1063 struct btrfs_key key; 1064 struct btrfs_key found_key; 1065 int ret; 1066 int slot; 1067 1068 root = root->fs_info->chunk_root; 1069 1070 path = btrfs_alloc_path(); 1071 if (!path) 1072 return -ENOMEM; 1073 1074 /* first we search for all of the device items, and then we 1075 * read in all of the chunk items. This way we can create chunk 1076 * mappings that reference all of the devices that are afound 1077 */ 1078 key.objectid = BTRFS_DEV_ITEMS_OBJECTID; 1079 key.offset = 0; 1080 key.type = 0; 1081 again: 1082 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 1083 while(1) { 1084 leaf = path->nodes[0]; 1085 slot = path->slots[0]; 1086 if (slot >= btrfs_header_nritems(leaf)) { 1087 ret = btrfs_next_leaf(root, path); 1088 if (ret == 0) 1089 continue; 1090 if (ret < 0) 1091 goto error; 1092 break; 1093 } 1094 btrfs_item_key_to_cpu(leaf, &found_key, slot); 1095 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 1096 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) 1097 break; 1098 if (found_key.type == BTRFS_DEV_ITEM_KEY) { 1099 struct btrfs_dev_item *dev_item; 1100 dev_item = btrfs_item_ptr(leaf, slot, 1101 struct btrfs_dev_item); 1102 ret = read_one_dev(root, leaf, dev_item); 1103 BUG_ON(ret); 1104 } 1105 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { 1106 struct btrfs_chunk *chunk; 1107 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); 1108 ret = read_one_chunk(root, &found_key, leaf, chunk); 1109 } 1110 path->slots[0]++; 1111 } 1112 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { 1113 key.objectid = 0; 1114 btrfs_release_path(root, path); 1115 goto again; 1116 } 1117 1118 btrfs_free_path(path); 1119 ret = 0; 1120 error: 1121 return ret; 1122 } 1123 1124