1 /* 2 * Copyright (C) 2007,2008 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include "ctree.h" 21 #include "disk-io.h" 22 #include "transaction.h" 23 #include "print-tree.h" 24 #include "locking.h" 25 26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 27 *root, struct btrfs_path *path, int level); 28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 29 *root, struct btrfs_key *ins_key, 30 struct btrfs_path *path, int data_size, int extend); 31 static int push_node_left(struct btrfs_trans_handle *trans, 32 struct btrfs_root *root, struct extent_buffer *dst, 33 struct extent_buffer *src, int empty); 34 static int balance_node_right(struct btrfs_trans_handle *trans, 35 struct btrfs_root *root, 36 struct extent_buffer *dst_buf, 37 struct extent_buffer *src_buf); 38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39 struct btrfs_path *path, int level, int slot); 40 41 struct btrfs_path *btrfs_alloc_path(void) 42 { 43 struct btrfs_path *path; 44 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 45 if (path) 46 path->reada = 1; 47 return path; 48 } 49 50 /* 51 * set all locked nodes in the path to blocking locks. This should 52 * be done before scheduling 53 */ 54 noinline void btrfs_set_path_blocking(struct btrfs_path *p) 55 { 56 int i; 57 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 58 if (p->nodes[i] && p->locks[i]) 59 btrfs_set_lock_blocking(p->nodes[i]); 60 } 61 } 62 63 /* 64 * reset all the locked nodes in the patch to spinning locks. 65 */ 66 noinline void btrfs_clear_path_blocking(struct btrfs_path *p) 67 { 68 int i; 69 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 70 if (p->nodes[i] && p->locks[i]) 71 btrfs_clear_lock_blocking(p->nodes[i]); 72 } 73 } 74 75 /* this also releases the path */ 76 void btrfs_free_path(struct btrfs_path *p) 77 { 78 btrfs_release_path(NULL, p); 79 kmem_cache_free(btrfs_path_cachep, p); 80 } 81 82 /* 83 * path release drops references on the extent buffers in the path 84 * and it drops any locks held by this path 85 * 86 * It is safe to call this on paths that no locks or extent buffers held. 87 */ 88 noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 89 { 90 int i; 91 92 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 93 p->slots[i] = 0; 94 if (!p->nodes[i]) 95 continue; 96 if (p->locks[i]) { 97 btrfs_tree_unlock(p->nodes[i]); 98 p->locks[i] = 0; 99 } 100 free_extent_buffer(p->nodes[i]); 101 p->nodes[i] = NULL; 102 } 103 } 104 105 /* 106 * safely gets a reference on the root node of a tree. A lock 107 * is not taken, so a concurrent writer may put a different node 108 * at the root of the tree. See btrfs_lock_root_node for the 109 * looping required. 110 * 111 * The extent buffer returned by this has a reference taken, so 112 * it won't disappear. It may stop being the root of the tree 113 * at any time because there are no locks held. 114 */ 115 struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 116 { 117 struct extent_buffer *eb; 118 spin_lock(&root->node_lock); 119 eb = root->node; 120 extent_buffer_get(eb); 121 spin_unlock(&root->node_lock); 122 return eb; 123 } 124 125 /* loop around taking references on and locking the root node of the 126 * tree until you end up with a lock on the root. A locked buffer 127 * is returned, with a reference held. 128 */ 129 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 130 { 131 struct extent_buffer *eb; 132 133 while (1) { 134 eb = btrfs_root_node(root); 135 btrfs_tree_lock(eb); 136 137 spin_lock(&root->node_lock); 138 if (eb == root->node) { 139 spin_unlock(&root->node_lock); 140 break; 141 } 142 spin_unlock(&root->node_lock); 143 144 btrfs_tree_unlock(eb); 145 free_extent_buffer(eb); 146 } 147 return eb; 148 } 149 150 /* cowonly root (everything not a reference counted cow subvolume), just get 151 * put onto a simple dirty list. transaction.c walks this to make sure they 152 * get properly updated on disk. 153 */ 154 static void add_root_to_dirty_list(struct btrfs_root *root) 155 { 156 if (root->track_dirty && list_empty(&root->dirty_list)) { 157 list_add(&root->dirty_list, 158 &root->fs_info->dirty_cowonly_roots); 159 } 160 } 161 162 /* 163 * used by snapshot creation to make a copy of a root for a tree with 164 * a given objectid. The buffer with the new root node is returned in 165 * cow_ret, and this func returns zero on success or a negative error code. 166 */ 167 int btrfs_copy_root(struct btrfs_trans_handle *trans, 168 struct btrfs_root *root, 169 struct extent_buffer *buf, 170 struct extent_buffer **cow_ret, u64 new_root_objectid) 171 { 172 struct extent_buffer *cow; 173 u32 nritems; 174 int ret = 0; 175 int level; 176 struct btrfs_root *new_root; 177 178 new_root = kmalloc(sizeof(*new_root), GFP_NOFS); 179 if (!new_root) 180 return -ENOMEM; 181 182 memcpy(new_root, root, sizeof(*new_root)); 183 new_root->root_key.objectid = new_root_objectid; 184 185 WARN_ON(root->ref_cows && trans->transid != 186 root->fs_info->running_transaction->transid); 187 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 188 189 level = btrfs_header_level(buf); 190 nritems = btrfs_header_nritems(buf); 191 192 cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, 193 new_root_objectid, trans->transid, 194 level, buf->start, 0); 195 if (IS_ERR(cow)) { 196 kfree(new_root); 197 return PTR_ERR(cow); 198 } 199 200 copy_extent_buffer(cow, buf, 0, 0, cow->len); 201 btrfs_set_header_bytenr(cow, cow->start); 202 btrfs_set_header_generation(cow, trans->transid); 203 btrfs_set_header_owner(cow, new_root_objectid); 204 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 205 206 write_extent_buffer(cow, root->fs_info->fsid, 207 (unsigned long)btrfs_header_fsid(cow), 208 BTRFS_FSID_SIZE); 209 210 WARN_ON(btrfs_header_generation(buf) > trans->transid); 211 ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); 212 kfree(new_root); 213 214 if (ret) 215 return ret; 216 217 btrfs_mark_buffer_dirty(cow); 218 *cow_ret = cow; 219 return 0; 220 } 221 222 /* 223 * does the dirty work in cow of a single block. The parent block (if 224 * supplied) is updated to point to the new cow copy. The new buffer is marked 225 * dirty and returned locked. If you modify the block it needs to be marked 226 * dirty again. 227 * 228 * search_start -- an allocation hint for the new block 229 * 230 * empty_size -- a hint that you plan on doing more cow. This is the size in 231 * bytes the allocator should try to find free next to the block it returns. 232 * This is just a hint and may be ignored by the allocator. 233 * 234 * prealloc_dest -- if you have already reserved a destination for the cow, 235 * this uses that block instead of allocating a new one. 236 * btrfs_alloc_reserved_extent is used to finish the allocation. 237 */ 238 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 239 struct btrfs_root *root, 240 struct extent_buffer *buf, 241 struct extent_buffer *parent, int parent_slot, 242 struct extent_buffer **cow_ret, 243 u64 search_start, u64 empty_size, 244 u64 prealloc_dest) 245 { 246 u64 parent_start; 247 struct extent_buffer *cow; 248 u32 nritems; 249 int ret = 0; 250 int level; 251 int unlock_orig = 0; 252 253 if (*cow_ret == buf) 254 unlock_orig = 1; 255 256 WARN_ON(!btrfs_tree_locked(buf)); 257 258 if (parent) 259 parent_start = parent->start; 260 else 261 parent_start = 0; 262 263 WARN_ON(root->ref_cows && trans->transid != 264 root->fs_info->running_transaction->transid); 265 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 266 267 level = btrfs_header_level(buf); 268 nritems = btrfs_header_nritems(buf); 269 270 if (prealloc_dest) { 271 struct btrfs_key ins; 272 273 ins.objectid = prealloc_dest; 274 ins.offset = buf->len; 275 ins.type = BTRFS_EXTENT_ITEM_KEY; 276 277 ret = btrfs_alloc_reserved_extent(trans, root, parent_start, 278 root->root_key.objectid, 279 trans->transid, level, &ins); 280 BUG_ON(ret); 281 cow = btrfs_init_new_buffer(trans, root, prealloc_dest, 282 buf->len); 283 } else { 284 cow = btrfs_alloc_free_block(trans, root, buf->len, 285 parent_start, 286 root->root_key.objectid, 287 trans->transid, level, 288 search_start, empty_size); 289 } 290 if (IS_ERR(cow)) 291 return PTR_ERR(cow); 292 293 /* cow is set to blocking by btrfs_init_new_buffer */ 294 295 copy_extent_buffer(cow, buf, 0, 0, cow->len); 296 btrfs_set_header_bytenr(cow, cow->start); 297 btrfs_set_header_generation(cow, trans->transid); 298 btrfs_set_header_owner(cow, root->root_key.objectid); 299 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 300 301 write_extent_buffer(cow, root->fs_info->fsid, 302 (unsigned long)btrfs_header_fsid(cow), 303 BTRFS_FSID_SIZE); 304 305 WARN_ON(btrfs_header_generation(buf) > trans->transid); 306 if (btrfs_header_generation(buf) != trans->transid) { 307 u32 nr_extents; 308 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); 309 if (ret) 310 return ret; 311 312 ret = btrfs_cache_ref(trans, root, buf, nr_extents); 313 WARN_ON(ret); 314 } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) { 315 /* 316 * There are only two places that can drop reference to 317 * tree blocks owned by living reloc trees, one is here, 318 * the other place is btrfs_drop_subtree. In both places, 319 * we check reference count while tree block is locked. 320 * Furthermore, if reference count is one, it won't get 321 * increased by someone else. 322 */ 323 u32 refs; 324 ret = btrfs_lookup_extent_ref(trans, root, buf->start, 325 buf->len, &refs); 326 BUG_ON(ret); 327 if (refs == 1) { 328 ret = btrfs_update_ref(trans, root, buf, cow, 329 0, nritems); 330 clean_tree_block(trans, root, buf); 331 } else { 332 ret = btrfs_inc_ref(trans, root, buf, cow, NULL); 333 } 334 BUG_ON(ret); 335 } else { 336 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); 337 if (ret) 338 return ret; 339 clean_tree_block(trans, root, buf); 340 } 341 342 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 343 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); 344 WARN_ON(ret); 345 } 346 347 if (buf == root->node) { 348 WARN_ON(parent && parent != buf); 349 350 spin_lock(&root->node_lock); 351 root->node = cow; 352 extent_buffer_get(cow); 353 spin_unlock(&root->node_lock); 354 355 if (buf != root->commit_root) { 356 btrfs_free_extent(trans, root, buf->start, 357 buf->len, buf->start, 358 root->root_key.objectid, 359 btrfs_header_generation(buf), 360 level, 1); 361 } 362 free_extent_buffer(buf); 363 add_root_to_dirty_list(root); 364 } else { 365 btrfs_set_node_blockptr(parent, parent_slot, 366 cow->start); 367 WARN_ON(trans->transid == 0); 368 btrfs_set_node_ptr_generation(parent, parent_slot, 369 trans->transid); 370 btrfs_mark_buffer_dirty(parent); 371 WARN_ON(btrfs_header_generation(parent) != trans->transid); 372 btrfs_free_extent(trans, root, buf->start, buf->len, 373 parent_start, btrfs_header_owner(parent), 374 btrfs_header_generation(parent), level, 1); 375 } 376 if (unlock_orig) 377 btrfs_tree_unlock(buf); 378 free_extent_buffer(buf); 379 btrfs_mark_buffer_dirty(cow); 380 *cow_ret = cow; 381 return 0; 382 } 383 384 /* 385 * cows a single block, see __btrfs_cow_block for the real work. 386 * This version of it has extra checks so that a block isn't cow'd more than 387 * once per transaction, as long as it hasn't been written yet 388 */ 389 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 390 struct btrfs_root *root, struct extent_buffer *buf, 391 struct extent_buffer *parent, int parent_slot, 392 struct extent_buffer **cow_ret, u64 prealloc_dest) 393 { 394 u64 search_start; 395 int ret; 396 397 if (trans->transaction != root->fs_info->running_transaction) { 398 printk(KERN_CRIT "trans %llu running %llu\n", 399 (unsigned long long)trans->transid, 400 (unsigned long long) 401 root->fs_info->running_transaction->transid); 402 WARN_ON(1); 403 } 404 if (trans->transid != root->fs_info->generation) { 405 printk(KERN_CRIT "trans %llu running %llu\n", 406 (unsigned long long)trans->transid, 407 (unsigned long long)root->fs_info->generation); 408 WARN_ON(1); 409 } 410 411 if (btrfs_header_generation(buf) == trans->transid && 412 btrfs_header_owner(buf) == root->root_key.objectid && 413 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 414 *cow_ret = buf; 415 WARN_ON(prealloc_dest); 416 return 0; 417 } 418 419 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 420 421 if (parent) 422 btrfs_set_lock_blocking(parent); 423 btrfs_set_lock_blocking(buf); 424 425 ret = __btrfs_cow_block(trans, root, buf, parent, 426 parent_slot, cow_ret, search_start, 0, 427 prealloc_dest); 428 return ret; 429 } 430 431 /* 432 * helper function for defrag to decide if two blocks pointed to by a 433 * node are actually close by 434 */ 435 static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 436 { 437 if (blocknr < other && other - (blocknr + blocksize) < 32768) 438 return 1; 439 if (blocknr > other && blocknr - (other + blocksize) < 32768) 440 return 1; 441 return 0; 442 } 443 444 /* 445 * compare two keys in a memcmp fashion 446 */ 447 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 448 { 449 struct btrfs_key k1; 450 451 btrfs_disk_key_to_cpu(&k1, disk); 452 453 if (k1.objectid > k2->objectid) 454 return 1; 455 if (k1.objectid < k2->objectid) 456 return -1; 457 if (k1.type > k2->type) 458 return 1; 459 if (k1.type < k2->type) 460 return -1; 461 if (k1.offset > k2->offset) 462 return 1; 463 if (k1.offset < k2->offset) 464 return -1; 465 return 0; 466 } 467 468 /* 469 * same as comp_keys only with two btrfs_key's 470 */ 471 static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 472 { 473 if (k1->objectid > k2->objectid) 474 return 1; 475 if (k1->objectid < k2->objectid) 476 return -1; 477 if (k1->type > k2->type) 478 return 1; 479 if (k1->type < k2->type) 480 return -1; 481 if (k1->offset > k2->offset) 482 return 1; 483 if (k1->offset < k2->offset) 484 return -1; 485 return 0; 486 } 487 488 /* 489 * this is used by the defrag code to go through all the 490 * leaves pointed to by a node and reallocate them so that 491 * disk order is close to key order 492 */ 493 int btrfs_realloc_node(struct btrfs_trans_handle *trans, 494 struct btrfs_root *root, struct extent_buffer *parent, 495 int start_slot, int cache_only, u64 *last_ret, 496 struct btrfs_key *progress) 497 { 498 struct extent_buffer *cur; 499 u64 blocknr; 500 u64 gen; 501 u64 search_start = *last_ret; 502 u64 last_block = 0; 503 u64 other; 504 u32 parent_nritems; 505 int end_slot; 506 int i; 507 int err = 0; 508 int parent_level; 509 int uptodate; 510 u32 blocksize; 511 int progress_passed = 0; 512 struct btrfs_disk_key disk_key; 513 514 parent_level = btrfs_header_level(parent); 515 if (cache_only && parent_level != 1) 516 return 0; 517 518 if (trans->transaction != root->fs_info->running_transaction) 519 WARN_ON(1); 520 if (trans->transid != root->fs_info->generation) 521 WARN_ON(1); 522 523 parent_nritems = btrfs_header_nritems(parent); 524 blocksize = btrfs_level_size(root, parent_level - 1); 525 end_slot = parent_nritems; 526 527 if (parent_nritems == 1) 528 return 0; 529 530 btrfs_set_lock_blocking(parent); 531 532 for (i = start_slot; i < end_slot; i++) { 533 int close = 1; 534 535 if (!parent->map_token) { 536 map_extent_buffer(parent, 537 btrfs_node_key_ptr_offset(i), 538 sizeof(struct btrfs_key_ptr), 539 &parent->map_token, &parent->kaddr, 540 &parent->map_start, &parent->map_len, 541 KM_USER1); 542 } 543 btrfs_node_key(parent, &disk_key, i); 544 if (!progress_passed && comp_keys(&disk_key, progress) < 0) 545 continue; 546 547 progress_passed = 1; 548 blocknr = btrfs_node_blockptr(parent, i); 549 gen = btrfs_node_ptr_generation(parent, i); 550 if (last_block == 0) 551 last_block = blocknr; 552 553 if (i > 0) { 554 other = btrfs_node_blockptr(parent, i - 1); 555 close = close_blocks(blocknr, other, blocksize); 556 } 557 if (!close && i < end_slot - 2) { 558 other = btrfs_node_blockptr(parent, i + 1); 559 close = close_blocks(blocknr, other, blocksize); 560 } 561 if (close) { 562 last_block = blocknr; 563 continue; 564 } 565 if (parent->map_token) { 566 unmap_extent_buffer(parent, parent->map_token, 567 KM_USER1); 568 parent->map_token = NULL; 569 } 570 571 cur = btrfs_find_tree_block(root, blocknr, blocksize); 572 if (cur) 573 uptodate = btrfs_buffer_uptodate(cur, gen); 574 else 575 uptodate = 0; 576 if (!cur || !uptodate) { 577 if (cache_only) { 578 free_extent_buffer(cur); 579 continue; 580 } 581 if (!cur) { 582 cur = read_tree_block(root, blocknr, 583 blocksize, gen); 584 } else if (!uptodate) { 585 btrfs_read_buffer(cur, gen); 586 } 587 } 588 if (search_start == 0) 589 search_start = last_block; 590 591 btrfs_tree_lock(cur); 592 btrfs_set_lock_blocking(cur); 593 err = __btrfs_cow_block(trans, root, cur, parent, i, 594 &cur, search_start, 595 min(16 * blocksize, 596 (end_slot - i) * blocksize), 0); 597 if (err) { 598 btrfs_tree_unlock(cur); 599 free_extent_buffer(cur); 600 break; 601 } 602 search_start = cur->start; 603 last_block = cur->start; 604 *last_ret = search_start; 605 btrfs_tree_unlock(cur); 606 free_extent_buffer(cur); 607 } 608 if (parent->map_token) { 609 unmap_extent_buffer(parent, parent->map_token, 610 KM_USER1); 611 parent->map_token = NULL; 612 } 613 return err; 614 } 615 616 /* 617 * The leaf data grows from end-to-front in the node. 618 * this returns the address of the start of the last item, 619 * which is the stop of the leaf data stack 620 */ 621 static inline unsigned int leaf_data_end(struct btrfs_root *root, 622 struct extent_buffer *leaf) 623 { 624 u32 nr = btrfs_header_nritems(leaf); 625 if (nr == 0) 626 return BTRFS_LEAF_DATA_SIZE(root); 627 return btrfs_item_offset_nr(leaf, nr - 1); 628 } 629 630 /* 631 * extra debugging checks to make sure all the items in a key are 632 * well formed and in the proper order 633 */ 634 static int check_node(struct btrfs_root *root, struct btrfs_path *path, 635 int level) 636 { 637 struct extent_buffer *parent = NULL; 638 struct extent_buffer *node = path->nodes[level]; 639 struct btrfs_disk_key parent_key; 640 struct btrfs_disk_key node_key; 641 int parent_slot; 642 int slot; 643 struct btrfs_key cpukey; 644 u32 nritems = btrfs_header_nritems(node); 645 646 if (path->nodes[level + 1]) 647 parent = path->nodes[level + 1]; 648 649 slot = path->slots[level]; 650 BUG_ON(nritems == 0); 651 if (parent) { 652 parent_slot = path->slots[level + 1]; 653 btrfs_node_key(parent, &parent_key, parent_slot); 654 btrfs_node_key(node, &node_key, 0); 655 BUG_ON(memcmp(&parent_key, &node_key, 656 sizeof(struct btrfs_disk_key))); 657 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 658 btrfs_header_bytenr(node)); 659 } 660 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); 661 if (slot != 0) { 662 btrfs_node_key_to_cpu(node, &cpukey, slot - 1); 663 btrfs_node_key(node, &node_key, slot); 664 BUG_ON(comp_keys(&node_key, &cpukey) <= 0); 665 } 666 if (slot < nritems - 1) { 667 btrfs_node_key_to_cpu(node, &cpukey, slot + 1); 668 btrfs_node_key(node, &node_key, slot); 669 BUG_ON(comp_keys(&node_key, &cpukey) >= 0); 670 } 671 return 0; 672 } 673 674 /* 675 * extra checking to make sure all the items in a leaf are 676 * well formed and in the proper order 677 */ 678 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, 679 int level) 680 { 681 struct extent_buffer *leaf = path->nodes[level]; 682 struct extent_buffer *parent = NULL; 683 int parent_slot; 684 struct btrfs_key cpukey; 685 struct btrfs_disk_key parent_key; 686 struct btrfs_disk_key leaf_key; 687 int slot = path->slots[0]; 688 689 u32 nritems = btrfs_header_nritems(leaf); 690 691 if (path->nodes[level + 1]) 692 parent = path->nodes[level + 1]; 693 694 if (nritems == 0) 695 return 0; 696 697 if (parent) { 698 parent_slot = path->slots[level + 1]; 699 btrfs_node_key(parent, &parent_key, parent_slot); 700 btrfs_item_key(leaf, &leaf_key, 0); 701 702 BUG_ON(memcmp(&parent_key, &leaf_key, 703 sizeof(struct btrfs_disk_key))); 704 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 705 btrfs_header_bytenr(leaf)); 706 } 707 if (slot != 0 && slot < nritems - 1) { 708 btrfs_item_key(leaf, &leaf_key, slot); 709 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); 710 if (comp_keys(&leaf_key, &cpukey) <= 0) { 711 btrfs_print_leaf(root, leaf); 712 printk(KERN_CRIT "slot %d offset bad key\n", slot); 713 BUG_ON(1); 714 } 715 if (btrfs_item_offset_nr(leaf, slot - 1) != 716 btrfs_item_end_nr(leaf, slot)) { 717 btrfs_print_leaf(root, leaf); 718 printk(KERN_CRIT "slot %d offset bad\n", slot); 719 BUG_ON(1); 720 } 721 } 722 if (slot < nritems - 1) { 723 btrfs_item_key(leaf, &leaf_key, slot); 724 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); 725 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); 726 if (btrfs_item_offset_nr(leaf, slot) != 727 btrfs_item_end_nr(leaf, slot + 1)) { 728 btrfs_print_leaf(root, leaf); 729 printk(KERN_CRIT "slot %d offset bad\n", slot); 730 BUG_ON(1); 731 } 732 } 733 BUG_ON(btrfs_item_offset_nr(leaf, 0) + 734 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); 735 return 0; 736 } 737 738 static noinline int check_block(struct btrfs_root *root, 739 struct btrfs_path *path, int level) 740 { 741 return 0; 742 if (level == 0) 743 return check_leaf(root, path, level); 744 return check_node(root, path, level); 745 } 746 747 /* 748 * search for key in the extent_buffer. The items start at offset p, 749 * and they are item_size apart. There are 'max' items in p. 750 * 751 * the slot in the array is returned via slot, and it points to 752 * the place where you would insert key if it is not found in 753 * the array. 754 * 755 * slot may point to max if the key is bigger than all of the keys 756 */ 757 static noinline int generic_bin_search(struct extent_buffer *eb, 758 unsigned long p, 759 int item_size, struct btrfs_key *key, 760 int max, int *slot) 761 { 762 int low = 0; 763 int high = max; 764 int mid; 765 int ret; 766 struct btrfs_disk_key *tmp = NULL; 767 struct btrfs_disk_key unaligned; 768 unsigned long offset; 769 char *map_token = NULL; 770 char *kaddr = NULL; 771 unsigned long map_start = 0; 772 unsigned long map_len = 0; 773 int err; 774 775 while (low < high) { 776 mid = (low + high) / 2; 777 offset = p + mid * item_size; 778 779 if (!map_token || offset < map_start || 780 (offset + sizeof(struct btrfs_disk_key)) > 781 map_start + map_len) { 782 if (map_token) { 783 unmap_extent_buffer(eb, map_token, KM_USER0); 784 map_token = NULL; 785 } 786 787 err = map_private_extent_buffer(eb, offset, 788 sizeof(struct btrfs_disk_key), 789 &map_token, &kaddr, 790 &map_start, &map_len, KM_USER0); 791 792 if (!err) { 793 tmp = (struct btrfs_disk_key *)(kaddr + offset - 794 map_start); 795 } else { 796 read_extent_buffer(eb, &unaligned, 797 offset, sizeof(unaligned)); 798 tmp = &unaligned; 799 } 800 801 } else { 802 tmp = (struct btrfs_disk_key *)(kaddr + offset - 803 map_start); 804 } 805 ret = comp_keys(tmp, key); 806 807 if (ret < 0) 808 low = mid + 1; 809 else if (ret > 0) 810 high = mid; 811 else { 812 *slot = mid; 813 if (map_token) 814 unmap_extent_buffer(eb, map_token, KM_USER0); 815 return 0; 816 } 817 } 818 *slot = low; 819 if (map_token) 820 unmap_extent_buffer(eb, map_token, KM_USER0); 821 return 1; 822 } 823 824 /* 825 * simple bin_search frontend that does the right thing for 826 * leaves vs nodes 827 */ 828 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 829 int level, int *slot) 830 { 831 if (level == 0) { 832 return generic_bin_search(eb, 833 offsetof(struct btrfs_leaf, items), 834 sizeof(struct btrfs_item), 835 key, btrfs_header_nritems(eb), 836 slot); 837 } else { 838 return generic_bin_search(eb, 839 offsetof(struct btrfs_node, ptrs), 840 sizeof(struct btrfs_key_ptr), 841 key, btrfs_header_nritems(eb), 842 slot); 843 } 844 return -1; 845 } 846 847 /* given a node and slot number, this reads the blocks it points to. The 848 * extent buffer is returned with a reference taken (but unlocked). 849 * NULL is returned on error. 850 */ 851 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, 852 struct extent_buffer *parent, int slot) 853 { 854 int level = btrfs_header_level(parent); 855 if (slot < 0) 856 return NULL; 857 if (slot >= btrfs_header_nritems(parent)) 858 return NULL; 859 860 BUG_ON(level == 0); 861 862 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 863 btrfs_level_size(root, level - 1), 864 btrfs_node_ptr_generation(parent, slot)); 865 } 866 867 /* 868 * node level balancing, used to make sure nodes are in proper order for 869 * item deletion. We balance from the top down, so we have to make sure 870 * that a deletion won't leave an node completely empty later on. 871 */ 872 static noinline int balance_level(struct btrfs_trans_handle *trans, 873 struct btrfs_root *root, 874 struct btrfs_path *path, int level) 875 { 876 struct extent_buffer *right = NULL; 877 struct extent_buffer *mid; 878 struct extent_buffer *left = NULL; 879 struct extent_buffer *parent = NULL; 880 int ret = 0; 881 int wret; 882 int pslot; 883 int orig_slot = path->slots[level]; 884 int err_on_enospc = 0; 885 u64 orig_ptr; 886 887 if (level == 0) 888 return 0; 889 890 mid = path->nodes[level]; 891 892 WARN_ON(!path->locks[level]); 893 WARN_ON(btrfs_header_generation(mid) != trans->transid); 894 895 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 896 897 if (level < BTRFS_MAX_LEVEL - 1) 898 parent = path->nodes[level + 1]; 899 pslot = path->slots[level + 1]; 900 901 /* 902 * deal with the case where there is only one pointer in the root 903 * by promoting the node below to a root 904 */ 905 if (!parent) { 906 struct extent_buffer *child; 907 908 if (btrfs_header_nritems(mid) != 1) 909 return 0; 910 911 /* promote the child to a root */ 912 child = read_node_slot(root, mid, 0); 913 BUG_ON(!child); 914 btrfs_tree_lock(child); 915 btrfs_set_lock_blocking(child); 916 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); 917 BUG_ON(ret); 918 919 spin_lock(&root->node_lock); 920 root->node = child; 921 spin_unlock(&root->node_lock); 922 923 ret = btrfs_update_extent_ref(trans, root, child->start, 924 mid->start, child->start, 925 root->root_key.objectid, 926 trans->transid, level - 1); 927 BUG_ON(ret); 928 929 add_root_to_dirty_list(root); 930 btrfs_tree_unlock(child); 931 932 path->locks[level] = 0; 933 path->nodes[level] = NULL; 934 clean_tree_block(trans, root, mid); 935 btrfs_tree_unlock(mid); 936 /* once for the path */ 937 free_extent_buffer(mid); 938 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 939 mid->start, root->root_key.objectid, 940 btrfs_header_generation(mid), 941 level, 1); 942 /* once for the root ptr */ 943 free_extent_buffer(mid); 944 return ret; 945 } 946 if (btrfs_header_nritems(mid) > 947 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 948 return 0; 949 950 if (btrfs_header_nritems(mid) < 2) 951 err_on_enospc = 1; 952 953 left = read_node_slot(root, parent, pslot - 1); 954 if (left) { 955 btrfs_tree_lock(left); 956 btrfs_set_lock_blocking(left); 957 wret = btrfs_cow_block(trans, root, left, 958 parent, pslot - 1, &left, 0); 959 if (wret) { 960 ret = wret; 961 goto enospc; 962 } 963 } 964 right = read_node_slot(root, parent, pslot + 1); 965 if (right) { 966 btrfs_tree_lock(right); 967 btrfs_set_lock_blocking(right); 968 wret = btrfs_cow_block(trans, root, right, 969 parent, pslot + 1, &right, 0); 970 if (wret) { 971 ret = wret; 972 goto enospc; 973 } 974 } 975 976 /* first, try to make some room in the middle buffer */ 977 if (left) { 978 orig_slot += btrfs_header_nritems(left); 979 wret = push_node_left(trans, root, left, mid, 1); 980 if (wret < 0) 981 ret = wret; 982 if (btrfs_header_nritems(mid) < 2) 983 err_on_enospc = 1; 984 } 985 986 /* 987 * then try to empty the right most buffer into the middle 988 */ 989 if (right) { 990 wret = push_node_left(trans, root, mid, right, 1); 991 if (wret < 0 && wret != -ENOSPC) 992 ret = wret; 993 if (btrfs_header_nritems(right) == 0) { 994 u64 bytenr = right->start; 995 u64 generation = btrfs_header_generation(parent); 996 u32 blocksize = right->len; 997 998 clean_tree_block(trans, root, right); 999 btrfs_tree_unlock(right); 1000 free_extent_buffer(right); 1001 right = NULL; 1002 wret = del_ptr(trans, root, path, level + 1, pslot + 1003 1); 1004 if (wret) 1005 ret = wret; 1006 wret = btrfs_free_extent(trans, root, bytenr, 1007 blocksize, parent->start, 1008 btrfs_header_owner(parent), 1009 generation, level, 1); 1010 if (wret) 1011 ret = wret; 1012 } else { 1013 struct btrfs_disk_key right_key; 1014 btrfs_node_key(right, &right_key, 0); 1015 btrfs_set_node_key(parent, &right_key, pslot + 1); 1016 btrfs_mark_buffer_dirty(parent); 1017 } 1018 } 1019 if (btrfs_header_nritems(mid) == 1) { 1020 /* 1021 * we're not allowed to leave a node with one item in the 1022 * tree during a delete. A deletion from lower in the tree 1023 * could try to delete the only pointer in this node. 1024 * So, pull some keys from the left. 1025 * There has to be a left pointer at this point because 1026 * otherwise we would have pulled some pointers from the 1027 * right 1028 */ 1029 BUG_ON(!left); 1030 wret = balance_node_right(trans, root, mid, left); 1031 if (wret < 0) { 1032 ret = wret; 1033 goto enospc; 1034 } 1035 if (wret == 1) { 1036 wret = push_node_left(trans, root, left, mid, 1); 1037 if (wret < 0) 1038 ret = wret; 1039 } 1040 BUG_ON(wret == 1); 1041 } 1042 if (btrfs_header_nritems(mid) == 0) { 1043 /* we've managed to empty the middle node, drop it */ 1044 u64 root_gen = btrfs_header_generation(parent); 1045 u64 bytenr = mid->start; 1046 u32 blocksize = mid->len; 1047 1048 clean_tree_block(trans, root, mid); 1049 btrfs_tree_unlock(mid); 1050 free_extent_buffer(mid); 1051 mid = NULL; 1052 wret = del_ptr(trans, root, path, level + 1, pslot); 1053 if (wret) 1054 ret = wret; 1055 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 1056 parent->start, 1057 btrfs_header_owner(parent), 1058 root_gen, level, 1); 1059 if (wret) 1060 ret = wret; 1061 } else { 1062 /* update the parent key to reflect our changes */ 1063 struct btrfs_disk_key mid_key; 1064 btrfs_node_key(mid, &mid_key, 0); 1065 btrfs_set_node_key(parent, &mid_key, pslot); 1066 btrfs_mark_buffer_dirty(parent); 1067 } 1068 1069 /* update the path */ 1070 if (left) { 1071 if (btrfs_header_nritems(left) > orig_slot) { 1072 extent_buffer_get(left); 1073 /* left was locked after cow */ 1074 path->nodes[level] = left; 1075 path->slots[level + 1] -= 1; 1076 path->slots[level] = orig_slot; 1077 if (mid) { 1078 btrfs_tree_unlock(mid); 1079 free_extent_buffer(mid); 1080 } 1081 } else { 1082 orig_slot -= btrfs_header_nritems(left); 1083 path->slots[level] = orig_slot; 1084 } 1085 } 1086 /* double check we haven't messed things up */ 1087 check_block(root, path, level); 1088 if (orig_ptr != 1089 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1090 BUG(); 1091 enospc: 1092 if (right) { 1093 btrfs_tree_unlock(right); 1094 free_extent_buffer(right); 1095 } 1096 if (left) { 1097 if (path->nodes[level] != left) 1098 btrfs_tree_unlock(left); 1099 free_extent_buffer(left); 1100 } 1101 return ret; 1102 } 1103 1104 /* Node balancing for insertion. Here we only split or push nodes around 1105 * when they are completely full. This is also done top down, so we 1106 * have to be pessimistic. 1107 */ 1108 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 1109 struct btrfs_root *root, 1110 struct btrfs_path *path, int level) 1111 { 1112 struct extent_buffer *right = NULL; 1113 struct extent_buffer *mid; 1114 struct extent_buffer *left = NULL; 1115 struct extent_buffer *parent = NULL; 1116 int ret = 0; 1117 int wret; 1118 int pslot; 1119 int orig_slot = path->slots[level]; 1120 u64 orig_ptr; 1121 1122 if (level == 0) 1123 return 1; 1124 1125 mid = path->nodes[level]; 1126 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1127 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 1128 1129 if (level < BTRFS_MAX_LEVEL - 1) 1130 parent = path->nodes[level + 1]; 1131 pslot = path->slots[level + 1]; 1132 1133 if (!parent) 1134 return 1; 1135 1136 left = read_node_slot(root, parent, pslot - 1); 1137 1138 /* first, try to make some room in the middle buffer */ 1139 if (left) { 1140 u32 left_nr; 1141 1142 btrfs_tree_lock(left); 1143 btrfs_set_lock_blocking(left); 1144 1145 left_nr = btrfs_header_nritems(left); 1146 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1147 wret = 1; 1148 } else { 1149 ret = btrfs_cow_block(trans, root, left, parent, 1150 pslot - 1, &left, 0); 1151 if (ret) 1152 wret = 1; 1153 else { 1154 wret = push_node_left(trans, root, 1155 left, mid, 0); 1156 } 1157 } 1158 if (wret < 0) 1159 ret = wret; 1160 if (wret == 0) { 1161 struct btrfs_disk_key disk_key; 1162 orig_slot += left_nr; 1163 btrfs_node_key(mid, &disk_key, 0); 1164 btrfs_set_node_key(parent, &disk_key, pslot); 1165 btrfs_mark_buffer_dirty(parent); 1166 if (btrfs_header_nritems(left) > orig_slot) { 1167 path->nodes[level] = left; 1168 path->slots[level + 1] -= 1; 1169 path->slots[level] = orig_slot; 1170 btrfs_tree_unlock(mid); 1171 free_extent_buffer(mid); 1172 } else { 1173 orig_slot -= 1174 btrfs_header_nritems(left); 1175 path->slots[level] = orig_slot; 1176 btrfs_tree_unlock(left); 1177 free_extent_buffer(left); 1178 } 1179 return 0; 1180 } 1181 btrfs_tree_unlock(left); 1182 free_extent_buffer(left); 1183 } 1184 right = read_node_slot(root, parent, pslot + 1); 1185 1186 /* 1187 * then try to empty the right most buffer into the middle 1188 */ 1189 if (right) { 1190 u32 right_nr; 1191 1192 btrfs_tree_lock(right); 1193 btrfs_set_lock_blocking(right); 1194 1195 right_nr = btrfs_header_nritems(right); 1196 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1197 wret = 1; 1198 } else { 1199 ret = btrfs_cow_block(trans, root, right, 1200 parent, pslot + 1, 1201 &right, 0); 1202 if (ret) 1203 wret = 1; 1204 else { 1205 wret = balance_node_right(trans, root, 1206 right, mid); 1207 } 1208 } 1209 if (wret < 0) 1210 ret = wret; 1211 if (wret == 0) { 1212 struct btrfs_disk_key disk_key; 1213 1214 btrfs_node_key(right, &disk_key, 0); 1215 btrfs_set_node_key(parent, &disk_key, pslot + 1); 1216 btrfs_mark_buffer_dirty(parent); 1217 1218 if (btrfs_header_nritems(mid) <= orig_slot) { 1219 path->nodes[level] = right; 1220 path->slots[level + 1] += 1; 1221 path->slots[level] = orig_slot - 1222 btrfs_header_nritems(mid); 1223 btrfs_tree_unlock(mid); 1224 free_extent_buffer(mid); 1225 } else { 1226 btrfs_tree_unlock(right); 1227 free_extent_buffer(right); 1228 } 1229 return 0; 1230 } 1231 btrfs_tree_unlock(right); 1232 free_extent_buffer(right); 1233 } 1234 return 1; 1235 } 1236 1237 /* 1238 * readahead one full node of leaves, finding things that are close 1239 * to the block in 'slot', and triggering ra on them. 1240 */ 1241 static noinline void reada_for_search(struct btrfs_root *root, 1242 struct btrfs_path *path, 1243 int level, int slot, u64 objectid) 1244 { 1245 struct extent_buffer *node; 1246 struct btrfs_disk_key disk_key; 1247 u32 nritems; 1248 u64 search; 1249 u64 target; 1250 u64 nread = 0; 1251 int direction = path->reada; 1252 struct extent_buffer *eb; 1253 u32 nr; 1254 u32 blocksize; 1255 u32 nscan = 0; 1256 1257 if (level != 1) 1258 return; 1259 1260 if (!path->nodes[level]) 1261 return; 1262 1263 node = path->nodes[level]; 1264 1265 search = btrfs_node_blockptr(node, slot); 1266 blocksize = btrfs_level_size(root, level - 1); 1267 eb = btrfs_find_tree_block(root, search, blocksize); 1268 if (eb) { 1269 free_extent_buffer(eb); 1270 return; 1271 } 1272 1273 target = search; 1274 1275 nritems = btrfs_header_nritems(node); 1276 nr = slot; 1277 while (1) { 1278 if (direction < 0) { 1279 if (nr == 0) 1280 break; 1281 nr--; 1282 } else if (direction > 0) { 1283 nr++; 1284 if (nr >= nritems) 1285 break; 1286 } 1287 if (path->reada < 0 && objectid) { 1288 btrfs_node_key(node, &disk_key, nr); 1289 if (btrfs_disk_key_objectid(&disk_key) != objectid) 1290 break; 1291 } 1292 search = btrfs_node_blockptr(node, nr); 1293 if ((search <= target && target - search <= 65536) || 1294 (search > target && search - target <= 65536)) { 1295 readahead_tree_block(root, search, blocksize, 1296 btrfs_node_ptr_generation(node, nr)); 1297 nread += blocksize; 1298 } 1299 nscan++; 1300 if ((nread > 65536 || nscan > 32)) 1301 break; 1302 } 1303 } 1304 1305 /* 1306 * returns -EAGAIN if it had to drop the path, or zero if everything was in 1307 * cache 1308 */ 1309 static noinline int reada_for_balance(struct btrfs_root *root, 1310 struct btrfs_path *path, int level) 1311 { 1312 int slot; 1313 int nritems; 1314 struct extent_buffer *parent; 1315 struct extent_buffer *eb; 1316 u64 gen; 1317 u64 block1 = 0; 1318 u64 block2 = 0; 1319 int ret = 0; 1320 int blocksize; 1321 1322 parent = path->nodes[level - 1]; 1323 if (!parent) 1324 return 0; 1325 1326 nritems = btrfs_header_nritems(parent); 1327 slot = path->slots[level]; 1328 blocksize = btrfs_level_size(root, level); 1329 1330 if (slot > 0) { 1331 block1 = btrfs_node_blockptr(parent, slot - 1); 1332 gen = btrfs_node_ptr_generation(parent, slot - 1); 1333 eb = btrfs_find_tree_block(root, block1, blocksize); 1334 if (eb && btrfs_buffer_uptodate(eb, gen)) 1335 block1 = 0; 1336 free_extent_buffer(eb); 1337 } 1338 if (slot < nritems) { 1339 block2 = btrfs_node_blockptr(parent, slot + 1); 1340 gen = btrfs_node_ptr_generation(parent, slot + 1); 1341 eb = btrfs_find_tree_block(root, block2, blocksize); 1342 if (eb && btrfs_buffer_uptodate(eb, gen)) 1343 block2 = 0; 1344 free_extent_buffer(eb); 1345 } 1346 if (block1 || block2) { 1347 ret = -EAGAIN; 1348 btrfs_release_path(root, path); 1349 if (block1) 1350 readahead_tree_block(root, block1, blocksize, 0); 1351 if (block2) 1352 readahead_tree_block(root, block2, blocksize, 0); 1353 1354 if (block1) { 1355 eb = read_tree_block(root, block1, blocksize, 0); 1356 free_extent_buffer(eb); 1357 } 1358 if (block1) { 1359 eb = read_tree_block(root, block2, blocksize, 0); 1360 free_extent_buffer(eb); 1361 } 1362 } 1363 return ret; 1364 } 1365 1366 1367 /* 1368 * when we walk down the tree, it is usually safe to unlock the higher layers 1369 * in the tree. The exceptions are when our path goes through slot 0, because 1370 * operations on the tree might require changing key pointers higher up in the 1371 * tree. 1372 * 1373 * callers might also have set path->keep_locks, which tells this code to keep 1374 * the lock if the path points to the last slot in the block. This is part of 1375 * walking through the tree, and selecting the next slot in the higher block. 1376 * 1377 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 1378 * if lowest_unlock is 1, level 0 won't be unlocked 1379 */ 1380 static noinline void unlock_up(struct btrfs_path *path, int level, 1381 int lowest_unlock) 1382 { 1383 int i; 1384 int skip_level = level; 1385 int no_skips = 0; 1386 struct extent_buffer *t; 1387 1388 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1389 if (!path->nodes[i]) 1390 break; 1391 if (!path->locks[i]) 1392 break; 1393 if (!no_skips && path->slots[i] == 0) { 1394 skip_level = i + 1; 1395 continue; 1396 } 1397 if (!no_skips && path->keep_locks) { 1398 u32 nritems; 1399 t = path->nodes[i]; 1400 nritems = btrfs_header_nritems(t); 1401 if (nritems < 1 || path->slots[i] >= nritems - 1) { 1402 skip_level = i + 1; 1403 continue; 1404 } 1405 } 1406 if (skip_level < i && i >= lowest_unlock) 1407 no_skips = 1; 1408 1409 t = path->nodes[i]; 1410 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1411 btrfs_tree_unlock(t); 1412 path->locks[i] = 0; 1413 } 1414 } 1415 } 1416 1417 /* 1418 * This releases any locks held in the path starting at level and 1419 * going all the way up to the root. 1420 * 1421 * btrfs_search_slot will keep the lock held on higher nodes in a few 1422 * corner cases, such as COW of the block at slot zero in the node. This 1423 * ignores those rules, and it should only be called when there are no 1424 * more updates to be done higher up in the tree. 1425 */ 1426 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 1427 { 1428 int i; 1429 1430 if (path->keep_locks || path->lowest_level) 1431 return; 1432 1433 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1434 if (!path->nodes[i]) 1435 continue; 1436 if (!path->locks[i]) 1437 continue; 1438 btrfs_tree_unlock(path->nodes[i]); 1439 path->locks[i] = 0; 1440 } 1441 } 1442 1443 /* 1444 * look for key in the tree. path is filled in with nodes along the way 1445 * if key is found, we return zero and you can find the item in the leaf 1446 * level of the path (level 0) 1447 * 1448 * If the key isn't found, the path points to the slot where it should 1449 * be inserted, and 1 is returned. If there are other errors during the 1450 * search a negative error number is returned. 1451 * 1452 * if ins_len > 0, nodes and leaves will be split as we walk down the 1453 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 1454 * possible) 1455 */ 1456 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1457 *root, struct btrfs_key *key, struct btrfs_path *p, int 1458 ins_len, int cow) 1459 { 1460 struct extent_buffer *b; 1461 struct extent_buffer *tmp; 1462 int slot; 1463 int ret; 1464 int level; 1465 int should_reada = p->reada; 1466 int lowest_unlock = 1; 1467 int blocksize; 1468 u8 lowest_level = 0; 1469 u64 blocknr; 1470 u64 gen; 1471 struct btrfs_key prealloc_block; 1472 1473 lowest_level = p->lowest_level; 1474 WARN_ON(lowest_level && ins_len > 0); 1475 WARN_ON(p->nodes[0] != NULL); 1476 1477 if (ins_len < 0) 1478 lowest_unlock = 2; 1479 1480 prealloc_block.objectid = 0; 1481 1482 again: 1483 if (p->skip_locking) 1484 b = btrfs_root_node(root); 1485 else 1486 b = btrfs_lock_root_node(root); 1487 1488 while (b) { 1489 level = btrfs_header_level(b); 1490 1491 /* 1492 * setup the path here so we can release it under lock 1493 * contention with the cow code 1494 */ 1495 p->nodes[level] = b; 1496 if (!p->skip_locking) 1497 p->locks[level] = 1; 1498 1499 if (cow) { 1500 int wret; 1501 1502 /* is a cow on this block not required */ 1503 if (btrfs_header_generation(b) == trans->transid && 1504 btrfs_header_owner(b) == root->root_key.objectid && 1505 !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { 1506 goto cow_done; 1507 } 1508 1509 /* ok, we have to cow, is our old prealloc the right 1510 * size? 1511 */ 1512 if (prealloc_block.objectid && 1513 prealloc_block.offset != b->len) { 1514 btrfs_release_path(root, p); 1515 btrfs_free_reserved_extent(root, 1516 prealloc_block.objectid, 1517 prealloc_block.offset); 1518 prealloc_block.objectid = 0; 1519 goto again; 1520 } 1521 1522 /* 1523 * for higher level blocks, try not to allocate blocks 1524 * with the block and the parent locks held. 1525 */ 1526 if (level > 0 && !prealloc_block.objectid) { 1527 u32 size = b->len; 1528 u64 hint = b->start; 1529 1530 btrfs_release_path(root, p); 1531 ret = btrfs_reserve_extent(trans, root, 1532 size, size, 0, 1533 hint, (u64)-1, 1534 &prealloc_block, 0); 1535 BUG_ON(ret); 1536 goto again; 1537 } 1538 1539 btrfs_set_path_blocking(p); 1540 1541 wret = btrfs_cow_block(trans, root, b, 1542 p->nodes[level + 1], 1543 p->slots[level + 1], 1544 &b, prealloc_block.objectid); 1545 prealloc_block.objectid = 0; 1546 if (wret) { 1547 free_extent_buffer(b); 1548 ret = wret; 1549 goto done; 1550 } 1551 } 1552 cow_done: 1553 BUG_ON(!cow && ins_len); 1554 if (level != btrfs_header_level(b)) 1555 WARN_ON(1); 1556 level = btrfs_header_level(b); 1557 1558 p->nodes[level] = b; 1559 if (!p->skip_locking) 1560 p->locks[level] = 1; 1561 1562 btrfs_clear_path_blocking(p); 1563 1564 /* 1565 * we have a lock on b and as long as we aren't changing 1566 * the tree, there is no way to for the items in b to change. 1567 * It is safe to drop the lock on our parent before we 1568 * go through the expensive btree search on b. 1569 * 1570 * If cow is true, then we might be changing slot zero, 1571 * which may require changing the parent. So, we can't 1572 * drop the lock until after we know which slot we're 1573 * operating on. 1574 */ 1575 if (!cow) 1576 btrfs_unlock_up_safe(p, level + 1); 1577 1578 ret = check_block(root, p, level); 1579 if (ret) { 1580 ret = -1; 1581 goto done; 1582 } 1583 1584 ret = bin_search(b, key, level, &slot); 1585 1586 if (level != 0) { 1587 if (ret && slot > 0) 1588 slot -= 1; 1589 p->slots[level] = slot; 1590 if ((p->search_for_split || ins_len > 0) && 1591 btrfs_header_nritems(b) >= 1592 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1593 int sret; 1594 1595 sret = reada_for_balance(root, p, level); 1596 if (sret) 1597 goto again; 1598 1599 btrfs_set_path_blocking(p); 1600 sret = split_node(trans, root, p, level); 1601 btrfs_clear_path_blocking(p); 1602 1603 BUG_ON(sret > 0); 1604 if (sret) { 1605 ret = sret; 1606 goto done; 1607 } 1608 b = p->nodes[level]; 1609 slot = p->slots[level]; 1610 } else if (ins_len < 0 && 1611 btrfs_header_nritems(b) < 1612 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { 1613 int sret; 1614 1615 sret = reada_for_balance(root, p, level); 1616 if (sret) 1617 goto again; 1618 1619 btrfs_set_path_blocking(p); 1620 sret = balance_level(trans, root, p, level); 1621 btrfs_clear_path_blocking(p); 1622 1623 if (sret) { 1624 ret = sret; 1625 goto done; 1626 } 1627 b = p->nodes[level]; 1628 if (!b) { 1629 btrfs_release_path(NULL, p); 1630 goto again; 1631 } 1632 slot = p->slots[level]; 1633 BUG_ON(btrfs_header_nritems(b) == 1); 1634 } 1635 unlock_up(p, level, lowest_unlock); 1636 1637 /* this is only true while dropping a snapshot */ 1638 if (level == lowest_level) { 1639 ret = 0; 1640 goto done; 1641 } 1642 1643 blocknr = btrfs_node_blockptr(b, slot); 1644 gen = btrfs_node_ptr_generation(b, slot); 1645 blocksize = btrfs_level_size(root, level - 1); 1646 1647 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1648 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1649 b = tmp; 1650 } else { 1651 /* 1652 * reduce lock contention at high levels 1653 * of the btree by dropping locks before 1654 * we read. 1655 */ 1656 if (level > 0) { 1657 btrfs_release_path(NULL, p); 1658 if (tmp) 1659 free_extent_buffer(tmp); 1660 if (should_reada) 1661 reada_for_search(root, p, 1662 level, slot, 1663 key->objectid); 1664 1665 tmp = read_tree_block(root, blocknr, 1666 blocksize, gen); 1667 if (tmp) 1668 free_extent_buffer(tmp); 1669 goto again; 1670 } else { 1671 btrfs_set_path_blocking(p); 1672 if (tmp) 1673 free_extent_buffer(tmp); 1674 if (should_reada) 1675 reada_for_search(root, p, 1676 level, slot, 1677 key->objectid); 1678 b = read_node_slot(root, b, slot); 1679 } 1680 } 1681 if (!p->skip_locking) { 1682 int lret; 1683 1684 btrfs_clear_path_blocking(p); 1685 lret = btrfs_try_spin_lock(b); 1686 1687 if (!lret) { 1688 btrfs_set_path_blocking(p); 1689 btrfs_tree_lock(b); 1690 btrfs_clear_path_blocking(p); 1691 } 1692 } 1693 } else { 1694 p->slots[level] = slot; 1695 if (ins_len > 0 && 1696 btrfs_leaf_free_space(root, b) < ins_len) { 1697 int sret; 1698 1699 btrfs_set_path_blocking(p); 1700 sret = split_leaf(trans, root, key, 1701 p, ins_len, ret == 0); 1702 btrfs_clear_path_blocking(p); 1703 1704 BUG_ON(sret > 0); 1705 if (sret) { 1706 ret = sret; 1707 goto done; 1708 } 1709 } 1710 if (!p->search_for_split) 1711 unlock_up(p, level, lowest_unlock); 1712 goto done; 1713 } 1714 } 1715 ret = 1; 1716 done: 1717 /* 1718 * we don't really know what they plan on doing with the path 1719 * from here on, so for now just mark it as blocking 1720 */ 1721 btrfs_set_path_blocking(p); 1722 if (prealloc_block.objectid) { 1723 btrfs_free_reserved_extent(root, 1724 prealloc_block.objectid, 1725 prealloc_block.offset); 1726 } 1727 return ret; 1728 } 1729 1730 int btrfs_merge_path(struct btrfs_trans_handle *trans, 1731 struct btrfs_root *root, 1732 struct btrfs_key *node_keys, 1733 u64 *nodes, int lowest_level) 1734 { 1735 struct extent_buffer *eb; 1736 struct extent_buffer *parent; 1737 struct btrfs_key key; 1738 u64 bytenr; 1739 u64 generation; 1740 u32 blocksize; 1741 int level; 1742 int slot; 1743 int key_match; 1744 int ret; 1745 1746 eb = btrfs_lock_root_node(root); 1747 ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); 1748 BUG_ON(ret); 1749 1750 btrfs_set_lock_blocking(eb); 1751 1752 parent = eb; 1753 while (1) { 1754 level = btrfs_header_level(parent); 1755 if (level == 0 || level <= lowest_level) 1756 break; 1757 1758 ret = bin_search(parent, &node_keys[lowest_level], level, 1759 &slot); 1760 if (ret && slot > 0) 1761 slot--; 1762 1763 bytenr = btrfs_node_blockptr(parent, slot); 1764 if (nodes[level - 1] == bytenr) 1765 break; 1766 1767 blocksize = btrfs_level_size(root, level - 1); 1768 generation = btrfs_node_ptr_generation(parent, slot); 1769 btrfs_node_key_to_cpu(eb, &key, slot); 1770 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); 1771 1772 if (generation == trans->transid) { 1773 eb = read_tree_block(root, bytenr, blocksize, 1774 generation); 1775 btrfs_tree_lock(eb); 1776 btrfs_set_lock_blocking(eb); 1777 } 1778 1779 /* 1780 * if node keys match and node pointer hasn't been modified 1781 * in the running transaction, we can merge the path. for 1782 * blocks owened by reloc trees, the node pointer check is 1783 * skipped, this is because these blocks are fully controlled 1784 * by the space balance code, no one else can modify them. 1785 */ 1786 if (!nodes[level - 1] || !key_match || 1787 (generation == trans->transid && 1788 btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) { 1789 if (level == 1 || level == lowest_level + 1) { 1790 if (generation == trans->transid) { 1791 btrfs_tree_unlock(eb); 1792 free_extent_buffer(eb); 1793 } 1794 break; 1795 } 1796 1797 if (generation != trans->transid) { 1798 eb = read_tree_block(root, bytenr, blocksize, 1799 generation); 1800 btrfs_tree_lock(eb); 1801 btrfs_set_lock_blocking(eb); 1802 } 1803 1804 ret = btrfs_cow_block(trans, root, eb, parent, slot, 1805 &eb, 0); 1806 BUG_ON(ret); 1807 1808 if (root->root_key.objectid == 1809 BTRFS_TREE_RELOC_OBJECTID) { 1810 if (!nodes[level - 1]) { 1811 nodes[level - 1] = eb->start; 1812 memcpy(&node_keys[level - 1], &key, 1813 sizeof(node_keys[0])); 1814 } else { 1815 WARN_ON(1); 1816 } 1817 } 1818 1819 btrfs_tree_unlock(parent); 1820 free_extent_buffer(parent); 1821 parent = eb; 1822 continue; 1823 } 1824 1825 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); 1826 btrfs_set_node_ptr_generation(parent, slot, trans->transid); 1827 btrfs_mark_buffer_dirty(parent); 1828 1829 ret = btrfs_inc_extent_ref(trans, root, 1830 nodes[level - 1], 1831 blocksize, parent->start, 1832 btrfs_header_owner(parent), 1833 btrfs_header_generation(parent), 1834 level - 1); 1835 BUG_ON(ret); 1836 1837 /* 1838 * If the block was created in the running transaction, 1839 * it's possible this is the last reference to it, so we 1840 * should drop the subtree. 1841 */ 1842 if (generation == trans->transid) { 1843 ret = btrfs_drop_subtree(trans, root, eb, parent); 1844 BUG_ON(ret); 1845 btrfs_tree_unlock(eb); 1846 free_extent_buffer(eb); 1847 } else { 1848 ret = btrfs_free_extent(trans, root, bytenr, 1849 blocksize, parent->start, 1850 btrfs_header_owner(parent), 1851 btrfs_header_generation(parent), 1852 level - 1, 1); 1853 BUG_ON(ret); 1854 } 1855 break; 1856 } 1857 btrfs_tree_unlock(parent); 1858 free_extent_buffer(parent); 1859 return 0; 1860 } 1861 1862 /* 1863 * adjust the pointers going up the tree, starting at level 1864 * making sure the right key of each node is points to 'key'. 1865 * This is used after shifting pointers to the left, so it stops 1866 * fixing up pointers when a given leaf/node is not in slot 0 of the 1867 * higher levels 1868 * 1869 * If this fails to write a tree block, it returns -1, but continues 1870 * fixing up the blocks in ram so the tree is consistent. 1871 */ 1872 static int fixup_low_keys(struct btrfs_trans_handle *trans, 1873 struct btrfs_root *root, struct btrfs_path *path, 1874 struct btrfs_disk_key *key, int level) 1875 { 1876 int i; 1877 int ret = 0; 1878 struct extent_buffer *t; 1879 1880 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1881 int tslot = path->slots[i]; 1882 if (!path->nodes[i]) 1883 break; 1884 t = path->nodes[i]; 1885 btrfs_set_node_key(t, key, tslot); 1886 btrfs_mark_buffer_dirty(path->nodes[i]); 1887 if (tslot != 0) 1888 break; 1889 } 1890 return ret; 1891 } 1892 1893 /* 1894 * update item key. 1895 * 1896 * This function isn't completely safe. It's the caller's responsibility 1897 * that the new key won't break the order 1898 */ 1899 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1900 struct btrfs_root *root, struct btrfs_path *path, 1901 struct btrfs_key *new_key) 1902 { 1903 struct btrfs_disk_key disk_key; 1904 struct extent_buffer *eb; 1905 int slot; 1906 1907 eb = path->nodes[0]; 1908 slot = path->slots[0]; 1909 if (slot > 0) { 1910 btrfs_item_key(eb, &disk_key, slot - 1); 1911 if (comp_keys(&disk_key, new_key) >= 0) 1912 return -1; 1913 } 1914 if (slot < btrfs_header_nritems(eb) - 1) { 1915 btrfs_item_key(eb, &disk_key, slot + 1); 1916 if (comp_keys(&disk_key, new_key) <= 0) 1917 return -1; 1918 } 1919 1920 btrfs_cpu_key_to_disk(&disk_key, new_key); 1921 btrfs_set_item_key(eb, &disk_key, slot); 1922 btrfs_mark_buffer_dirty(eb); 1923 if (slot == 0) 1924 fixup_low_keys(trans, root, path, &disk_key, 1); 1925 return 0; 1926 } 1927 1928 /* 1929 * try to push data from one node into the next node left in the 1930 * tree. 1931 * 1932 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 1933 * error, and > 0 if there was no room in the left hand block. 1934 */ 1935 static int push_node_left(struct btrfs_trans_handle *trans, 1936 struct btrfs_root *root, struct extent_buffer *dst, 1937 struct extent_buffer *src, int empty) 1938 { 1939 int push_items = 0; 1940 int src_nritems; 1941 int dst_nritems; 1942 int ret = 0; 1943 1944 src_nritems = btrfs_header_nritems(src); 1945 dst_nritems = btrfs_header_nritems(dst); 1946 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1947 WARN_ON(btrfs_header_generation(src) != trans->transid); 1948 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1949 1950 if (!empty && src_nritems <= 8) 1951 return 1; 1952 1953 if (push_items <= 0) 1954 return 1; 1955 1956 if (empty) { 1957 push_items = min(src_nritems, push_items); 1958 if (push_items < src_nritems) { 1959 /* leave at least 8 pointers in the node if 1960 * we aren't going to empty it 1961 */ 1962 if (src_nritems - push_items < 8) { 1963 if (push_items <= 8) 1964 return 1; 1965 push_items -= 8; 1966 } 1967 } 1968 } else 1969 push_items = min(src_nritems - 8, push_items); 1970 1971 copy_extent_buffer(dst, src, 1972 btrfs_node_key_ptr_offset(dst_nritems), 1973 btrfs_node_key_ptr_offset(0), 1974 push_items * sizeof(struct btrfs_key_ptr)); 1975 1976 if (push_items < src_nritems) { 1977 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 1978 btrfs_node_key_ptr_offset(push_items), 1979 (src_nritems - push_items) * 1980 sizeof(struct btrfs_key_ptr)); 1981 } 1982 btrfs_set_header_nritems(src, src_nritems - push_items); 1983 btrfs_set_header_nritems(dst, dst_nritems + push_items); 1984 btrfs_mark_buffer_dirty(src); 1985 btrfs_mark_buffer_dirty(dst); 1986 1987 ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items); 1988 BUG_ON(ret); 1989 1990 return ret; 1991 } 1992 1993 /* 1994 * try to push data from one node into the next node right in the 1995 * tree. 1996 * 1997 * returns 0 if some ptrs were pushed, < 0 if there was some horrible 1998 * error, and > 0 if there was no room in the right hand block. 1999 * 2000 * this will only push up to 1/2 the contents of the left node over 2001 */ 2002 static int balance_node_right(struct btrfs_trans_handle *trans, 2003 struct btrfs_root *root, 2004 struct extent_buffer *dst, 2005 struct extent_buffer *src) 2006 { 2007 int push_items = 0; 2008 int max_push; 2009 int src_nritems; 2010 int dst_nritems; 2011 int ret = 0; 2012 2013 WARN_ON(btrfs_header_generation(src) != trans->transid); 2014 WARN_ON(btrfs_header_generation(dst) != trans->transid); 2015 2016 src_nritems = btrfs_header_nritems(src); 2017 dst_nritems = btrfs_header_nritems(dst); 2018 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 2019 if (push_items <= 0) 2020 return 1; 2021 2022 if (src_nritems < 4) 2023 return 1; 2024 2025 max_push = src_nritems / 2 + 1; 2026 /* don't try to empty the node */ 2027 if (max_push >= src_nritems) 2028 return 1; 2029 2030 if (max_push < push_items) 2031 push_items = max_push; 2032 2033 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 2034 btrfs_node_key_ptr_offset(0), 2035 (dst_nritems) * 2036 sizeof(struct btrfs_key_ptr)); 2037 2038 copy_extent_buffer(dst, src, 2039 btrfs_node_key_ptr_offset(0), 2040 btrfs_node_key_ptr_offset(src_nritems - push_items), 2041 push_items * sizeof(struct btrfs_key_ptr)); 2042 2043 btrfs_set_header_nritems(src, src_nritems - push_items); 2044 btrfs_set_header_nritems(dst, dst_nritems + push_items); 2045 2046 btrfs_mark_buffer_dirty(src); 2047 btrfs_mark_buffer_dirty(dst); 2048 2049 ret = btrfs_update_ref(trans, root, src, dst, 0, push_items); 2050 BUG_ON(ret); 2051 2052 return ret; 2053 } 2054 2055 /* 2056 * helper function to insert a new root level in the tree. 2057 * A new node is allocated, and a single item is inserted to 2058 * point to the existing root 2059 * 2060 * returns zero on success or < 0 on failure. 2061 */ 2062 static noinline int insert_new_root(struct btrfs_trans_handle *trans, 2063 struct btrfs_root *root, 2064 struct btrfs_path *path, int level) 2065 { 2066 u64 lower_gen; 2067 struct extent_buffer *lower; 2068 struct extent_buffer *c; 2069 struct extent_buffer *old; 2070 struct btrfs_disk_key lower_key; 2071 int ret; 2072 2073 BUG_ON(path->nodes[level]); 2074 BUG_ON(path->nodes[level-1] != root->node); 2075 2076 lower = path->nodes[level-1]; 2077 if (level == 1) 2078 btrfs_item_key(lower, &lower_key, 0); 2079 else 2080 btrfs_node_key(lower, &lower_key, 0); 2081 2082 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2083 root->root_key.objectid, trans->transid, 2084 level, root->node->start, 0); 2085 if (IS_ERR(c)) 2086 return PTR_ERR(c); 2087 2088 memset_extent_buffer(c, 0, 0, root->nodesize); 2089 btrfs_set_header_nritems(c, 1); 2090 btrfs_set_header_level(c, level); 2091 btrfs_set_header_bytenr(c, c->start); 2092 btrfs_set_header_generation(c, trans->transid); 2093 btrfs_set_header_owner(c, root->root_key.objectid); 2094 2095 write_extent_buffer(c, root->fs_info->fsid, 2096 (unsigned long)btrfs_header_fsid(c), 2097 BTRFS_FSID_SIZE); 2098 2099 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 2100 (unsigned long)btrfs_header_chunk_tree_uuid(c), 2101 BTRFS_UUID_SIZE); 2102 2103 btrfs_set_node_key(c, &lower_key, 0); 2104 btrfs_set_node_blockptr(c, 0, lower->start); 2105 lower_gen = btrfs_header_generation(lower); 2106 WARN_ON(lower_gen != trans->transid); 2107 2108 btrfs_set_node_ptr_generation(c, 0, lower_gen); 2109 2110 btrfs_mark_buffer_dirty(c); 2111 2112 spin_lock(&root->node_lock); 2113 old = root->node; 2114 root->node = c; 2115 spin_unlock(&root->node_lock); 2116 2117 ret = btrfs_update_extent_ref(trans, root, lower->start, 2118 lower->start, c->start, 2119 root->root_key.objectid, 2120 trans->transid, level - 1); 2121 BUG_ON(ret); 2122 2123 /* the super has an extra ref to root->node */ 2124 free_extent_buffer(old); 2125 2126 add_root_to_dirty_list(root); 2127 extent_buffer_get(c); 2128 path->nodes[level] = c; 2129 path->locks[level] = 1; 2130 path->slots[level] = 0; 2131 return 0; 2132 } 2133 2134 /* 2135 * worker function to insert a single pointer in a node. 2136 * the node should have enough room for the pointer already 2137 * 2138 * slot and level indicate where you want the key to go, and 2139 * blocknr is the block the key points to. 2140 * 2141 * returns zero on success and < 0 on any error 2142 */ 2143 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 2144 *root, struct btrfs_path *path, struct btrfs_disk_key 2145 *key, u64 bytenr, int slot, int level) 2146 { 2147 struct extent_buffer *lower; 2148 int nritems; 2149 2150 BUG_ON(!path->nodes[level]); 2151 lower = path->nodes[level]; 2152 nritems = btrfs_header_nritems(lower); 2153 if (slot > nritems) 2154 BUG(); 2155 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2156 BUG(); 2157 if (slot != nritems) { 2158 memmove_extent_buffer(lower, 2159 btrfs_node_key_ptr_offset(slot + 1), 2160 btrfs_node_key_ptr_offset(slot), 2161 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 2162 } 2163 btrfs_set_node_key(lower, key, slot); 2164 btrfs_set_node_blockptr(lower, slot, bytenr); 2165 WARN_ON(trans->transid == 0); 2166 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 2167 btrfs_set_header_nritems(lower, nritems + 1); 2168 btrfs_mark_buffer_dirty(lower); 2169 return 0; 2170 } 2171 2172 /* 2173 * split the node at the specified level in path in two. 2174 * The path is corrected to point to the appropriate node after the split 2175 * 2176 * Before splitting this tries to make some room in the node by pushing 2177 * left and right, if either one works, it returns right away. 2178 * 2179 * returns 0 on success and < 0 on failure 2180 */ 2181 static noinline int split_node(struct btrfs_trans_handle *trans, 2182 struct btrfs_root *root, 2183 struct btrfs_path *path, int level) 2184 { 2185 struct extent_buffer *c; 2186 struct extent_buffer *split; 2187 struct btrfs_disk_key disk_key; 2188 int mid; 2189 int ret; 2190 int wret; 2191 u32 c_nritems; 2192 2193 c = path->nodes[level]; 2194 WARN_ON(btrfs_header_generation(c) != trans->transid); 2195 if (c == root->node) { 2196 /* trying to split the root, lets make a new one */ 2197 ret = insert_new_root(trans, root, path, level + 1); 2198 if (ret) 2199 return ret; 2200 } else { 2201 ret = push_nodes_for_insert(trans, root, path, level); 2202 c = path->nodes[level]; 2203 if (!ret && btrfs_header_nritems(c) < 2204 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) 2205 return 0; 2206 if (ret < 0) 2207 return ret; 2208 } 2209 2210 c_nritems = btrfs_header_nritems(c); 2211 2212 split = btrfs_alloc_free_block(trans, root, root->nodesize, 2213 path->nodes[level + 1]->start, 2214 root->root_key.objectid, 2215 trans->transid, level, c->start, 0); 2216 if (IS_ERR(split)) 2217 return PTR_ERR(split); 2218 2219 btrfs_set_header_flags(split, btrfs_header_flags(c)); 2220 btrfs_set_header_level(split, btrfs_header_level(c)); 2221 btrfs_set_header_bytenr(split, split->start); 2222 btrfs_set_header_generation(split, trans->transid); 2223 btrfs_set_header_owner(split, root->root_key.objectid); 2224 btrfs_set_header_flags(split, 0); 2225 write_extent_buffer(split, root->fs_info->fsid, 2226 (unsigned long)btrfs_header_fsid(split), 2227 BTRFS_FSID_SIZE); 2228 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 2229 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2230 BTRFS_UUID_SIZE); 2231 2232 mid = (c_nritems + 1) / 2; 2233 2234 copy_extent_buffer(split, c, 2235 btrfs_node_key_ptr_offset(0), 2236 btrfs_node_key_ptr_offset(mid), 2237 (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 2238 btrfs_set_header_nritems(split, c_nritems - mid); 2239 btrfs_set_header_nritems(c, mid); 2240 ret = 0; 2241 2242 btrfs_mark_buffer_dirty(c); 2243 btrfs_mark_buffer_dirty(split); 2244 2245 btrfs_node_key(split, &disk_key, 0); 2246 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2247 path->slots[level + 1] + 1, 2248 level + 1); 2249 if (wret) 2250 ret = wret; 2251 2252 ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid); 2253 BUG_ON(ret); 2254 2255 if (path->slots[level] >= mid) { 2256 path->slots[level] -= mid; 2257 btrfs_tree_unlock(c); 2258 free_extent_buffer(c); 2259 path->nodes[level] = split; 2260 path->slots[level + 1] += 1; 2261 } else { 2262 btrfs_tree_unlock(split); 2263 free_extent_buffer(split); 2264 } 2265 return ret; 2266 } 2267 2268 /* 2269 * how many bytes are required to store the items in a leaf. start 2270 * and nr indicate which items in the leaf to check. This totals up the 2271 * space used both by the item structs and the item data 2272 */ 2273 static int leaf_space_used(struct extent_buffer *l, int start, int nr) 2274 { 2275 int data_len; 2276 int nritems = btrfs_header_nritems(l); 2277 int end = min(nritems, start + nr) - 1; 2278 2279 if (!nr) 2280 return 0; 2281 data_len = btrfs_item_end_nr(l, start); 2282 data_len = data_len - btrfs_item_offset_nr(l, end); 2283 data_len += sizeof(struct btrfs_item) * nr; 2284 WARN_ON(data_len < 0); 2285 return data_len; 2286 } 2287 2288 /* 2289 * The space between the end of the leaf items and 2290 * the start of the leaf data. IOW, how much room 2291 * the leaf has left for both items and data 2292 */ 2293 noinline int btrfs_leaf_free_space(struct btrfs_root *root, 2294 struct extent_buffer *leaf) 2295 { 2296 int nritems = btrfs_header_nritems(leaf); 2297 int ret; 2298 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 2299 if (ret < 0) { 2300 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " 2301 "used %d nritems %d\n", 2302 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 2303 leaf_space_used(leaf, 0, nritems), nritems); 2304 } 2305 return ret; 2306 } 2307 2308 /* 2309 * push some data in the path leaf to the right, trying to free up at 2310 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2311 * 2312 * returns 1 if the push failed because the other node didn't have enough 2313 * room, 0 if everything worked out and < 0 if there were major errors. 2314 */ 2315 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2316 *root, struct btrfs_path *path, int data_size, 2317 int empty) 2318 { 2319 struct extent_buffer *left = path->nodes[0]; 2320 struct extent_buffer *right; 2321 struct extent_buffer *upper; 2322 struct btrfs_disk_key disk_key; 2323 int slot; 2324 u32 i; 2325 int free_space; 2326 int push_space = 0; 2327 int push_items = 0; 2328 struct btrfs_item *item; 2329 u32 left_nritems; 2330 u32 nr; 2331 u32 right_nritems; 2332 u32 data_end; 2333 u32 this_item_size; 2334 int ret; 2335 2336 slot = path->slots[1]; 2337 if (!path->nodes[1]) 2338 return 1; 2339 2340 upper = path->nodes[1]; 2341 if (slot >= btrfs_header_nritems(upper) - 1) 2342 return 1; 2343 2344 WARN_ON(!btrfs_tree_locked(path->nodes[1])); 2345 2346 right = read_node_slot(root, upper, slot + 1); 2347 btrfs_tree_lock(right); 2348 btrfs_set_lock_blocking(right); 2349 2350 free_space = btrfs_leaf_free_space(root, right); 2351 if (free_space < data_size) 2352 goto out_unlock; 2353 2354 /* cow and double check */ 2355 ret = btrfs_cow_block(trans, root, right, upper, 2356 slot + 1, &right, 0); 2357 if (ret) 2358 goto out_unlock; 2359 2360 free_space = btrfs_leaf_free_space(root, right); 2361 if (free_space < data_size) 2362 goto out_unlock; 2363 2364 left_nritems = btrfs_header_nritems(left); 2365 if (left_nritems == 0) 2366 goto out_unlock; 2367 2368 if (empty) 2369 nr = 0; 2370 else 2371 nr = 1; 2372 2373 if (path->slots[0] >= left_nritems) 2374 push_space += data_size; 2375 2376 i = left_nritems - 1; 2377 while (i >= nr) { 2378 item = btrfs_item_nr(left, i); 2379 2380 if (!empty && push_items > 0) { 2381 if (path->slots[0] > i) 2382 break; 2383 if (path->slots[0] == i) { 2384 int space = btrfs_leaf_free_space(root, left); 2385 if (space + push_space * 2 > free_space) 2386 break; 2387 } 2388 } 2389 2390 if (path->slots[0] == i) 2391 push_space += data_size; 2392 2393 if (!left->map_token) { 2394 map_extent_buffer(left, (unsigned long)item, 2395 sizeof(struct btrfs_item), 2396 &left->map_token, &left->kaddr, 2397 &left->map_start, &left->map_len, 2398 KM_USER1); 2399 } 2400 2401 this_item_size = btrfs_item_size(left, item); 2402 if (this_item_size + sizeof(*item) + push_space > free_space) 2403 break; 2404 2405 push_items++; 2406 push_space += this_item_size + sizeof(*item); 2407 if (i == 0) 2408 break; 2409 i--; 2410 } 2411 if (left->map_token) { 2412 unmap_extent_buffer(left, left->map_token, KM_USER1); 2413 left->map_token = NULL; 2414 } 2415 2416 if (push_items == 0) 2417 goto out_unlock; 2418 2419 if (!empty && push_items == left_nritems) 2420 WARN_ON(1); 2421 2422 /* push left to right */ 2423 right_nritems = btrfs_header_nritems(right); 2424 2425 push_space = btrfs_item_end_nr(left, left_nritems - push_items); 2426 push_space -= leaf_data_end(root, left); 2427 2428 /* make room in the right data area */ 2429 data_end = leaf_data_end(root, right); 2430 memmove_extent_buffer(right, 2431 btrfs_leaf_data(right) + data_end - push_space, 2432 btrfs_leaf_data(right) + data_end, 2433 BTRFS_LEAF_DATA_SIZE(root) - data_end); 2434 2435 /* copy from the left data area */ 2436 copy_extent_buffer(right, left, btrfs_leaf_data(right) + 2437 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2438 btrfs_leaf_data(left) + leaf_data_end(root, left), 2439 push_space); 2440 2441 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 2442 btrfs_item_nr_offset(0), 2443 right_nritems * sizeof(struct btrfs_item)); 2444 2445 /* copy the items from left to right */ 2446 copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 2447 btrfs_item_nr_offset(left_nritems - push_items), 2448 push_items * sizeof(struct btrfs_item)); 2449 2450 /* update the item pointers */ 2451 right_nritems += push_items; 2452 btrfs_set_header_nritems(right, right_nritems); 2453 push_space = BTRFS_LEAF_DATA_SIZE(root); 2454 for (i = 0; i < right_nritems; i++) { 2455 item = btrfs_item_nr(right, i); 2456 if (!right->map_token) { 2457 map_extent_buffer(right, (unsigned long)item, 2458 sizeof(struct btrfs_item), 2459 &right->map_token, &right->kaddr, 2460 &right->map_start, &right->map_len, 2461 KM_USER1); 2462 } 2463 push_space -= btrfs_item_size(right, item); 2464 btrfs_set_item_offset(right, item, push_space); 2465 } 2466 2467 if (right->map_token) { 2468 unmap_extent_buffer(right, right->map_token, KM_USER1); 2469 right->map_token = NULL; 2470 } 2471 left_nritems -= push_items; 2472 btrfs_set_header_nritems(left, left_nritems); 2473 2474 if (left_nritems) 2475 btrfs_mark_buffer_dirty(left); 2476 btrfs_mark_buffer_dirty(right); 2477 2478 ret = btrfs_update_ref(trans, root, left, right, 0, push_items); 2479 BUG_ON(ret); 2480 2481 btrfs_item_key(right, &disk_key, 0); 2482 btrfs_set_node_key(upper, &disk_key, slot + 1); 2483 btrfs_mark_buffer_dirty(upper); 2484 2485 /* then fixup the leaf pointer in the path */ 2486 if (path->slots[0] >= left_nritems) { 2487 path->slots[0] -= left_nritems; 2488 if (btrfs_header_nritems(path->nodes[0]) == 0) 2489 clean_tree_block(trans, root, path->nodes[0]); 2490 btrfs_tree_unlock(path->nodes[0]); 2491 free_extent_buffer(path->nodes[0]); 2492 path->nodes[0] = right; 2493 path->slots[1] += 1; 2494 } else { 2495 btrfs_tree_unlock(right); 2496 free_extent_buffer(right); 2497 } 2498 return 0; 2499 2500 out_unlock: 2501 btrfs_tree_unlock(right); 2502 free_extent_buffer(right); 2503 return 1; 2504 } 2505 2506 /* 2507 * push some data in the path leaf to the left, trying to free up at 2508 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2509 */ 2510 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2511 *root, struct btrfs_path *path, int data_size, 2512 int empty) 2513 { 2514 struct btrfs_disk_key disk_key; 2515 struct extent_buffer *right = path->nodes[0]; 2516 struct extent_buffer *left; 2517 int slot; 2518 int i; 2519 int free_space; 2520 int push_space = 0; 2521 int push_items = 0; 2522 struct btrfs_item *item; 2523 u32 old_left_nritems; 2524 u32 right_nritems; 2525 u32 nr; 2526 int ret = 0; 2527 int wret; 2528 u32 this_item_size; 2529 u32 old_left_item_size; 2530 2531 slot = path->slots[1]; 2532 if (slot == 0) 2533 return 1; 2534 if (!path->nodes[1]) 2535 return 1; 2536 2537 right_nritems = btrfs_header_nritems(right); 2538 if (right_nritems == 0) 2539 return 1; 2540 2541 WARN_ON(!btrfs_tree_locked(path->nodes[1])); 2542 2543 left = read_node_slot(root, path->nodes[1], slot - 1); 2544 btrfs_tree_lock(left); 2545 btrfs_set_lock_blocking(left); 2546 2547 free_space = btrfs_leaf_free_space(root, left); 2548 if (free_space < data_size) { 2549 ret = 1; 2550 goto out; 2551 } 2552 2553 /* cow and double check */ 2554 ret = btrfs_cow_block(trans, root, left, 2555 path->nodes[1], slot - 1, &left, 0); 2556 if (ret) { 2557 /* we hit -ENOSPC, but it isn't fatal here */ 2558 ret = 1; 2559 goto out; 2560 } 2561 2562 free_space = btrfs_leaf_free_space(root, left); 2563 if (free_space < data_size) { 2564 ret = 1; 2565 goto out; 2566 } 2567 2568 if (empty) 2569 nr = right_nritems; 2570 else 2571 nr = right_nritems - 1; 2572 2573 for (i = 0; i < nr; i++) { 2574 item = btrfs_item_nr(right, i); 2575 if (!right->map_token) { 2576 map_extent_buffer(right, (unsigned long)item, 2577 sizeof(struct btrfs_item), 2578 &right->map_token, &right->kaddr, 2579 &right->map_start, &right->map_len, 2580 KM_USER1); 2581 } 2582 2583 if (!empty && push_items > 0) { 2584 if (path->slots[0] < i) 2585 break; 2586 if (path->slots[0] == i) { 2587 int space = btrfs_leaf_free_space(root, right); 2588 if (space + push_space * 2 > free_space) 2589 break; 2590 } 2591 } 2592 2593 if (path->slots[0] == i) 2594 push_space += data_size; 2595 2596 this_item_size = btrfs_item_size(right, item); 2597 if (this_item_size + sizeof(*item) + push_space > free_space) 2598 break; 2599 2600 push_items++; 2601 push_space += this_item_size + sizeof(*item); 2602 } 2603 2604 if (right->map_token) { 2605 unmap_extent_buffer(right, right->map_token, KM_USER1); 2606 right->map_token = NULL; 2607 } 2608 2609 if (push_items == 0) { 2610 ret = 1; 2611 goto out; 2612 } 2613 if (!empty && push_items == btrfs_header_nritems(right)) 2614 WARN_ON(1); 2615 2616 /* push data from right to left */ 2617 copy_extent_buffer(left, right, 2618 btrfs_item_nr_offset(btrfs_header_nritems(left)), 2619 btrfs_item_nr_offset(0), 2620 push_items * sizeof(struct btrfs_item)); 2621 2622 push_space = BTRFS_LEAF_DATA_SIZE(root) - 2623 btrfs_item_offset_nr(right, push_items - 1); 2624 2625 copy_extent_buffer(left, right, btrfs_leaf_data(left) + 2626 leaf_data_end(root, left) - push_space, 2627 btrfs_leaf_data(right) + 2628 btrfs_item_offset_nr(right, push_items - 1), 2629 push_space); 2630 old_left_nritems = btrfs_header_nritems(left); 2631 BUG_ON(old_left_nritems <= 0); 2632 2633 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 2634 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 2635 u32 ioff; 2636 2637 item = btrfs_item_nr(left, i); 2638 if (!left->map_token) { 2639 map_extent_buffer(left, (unsigned long)item, 2640 sizeof(struct btrfs_item), 2641 &left->map_token, &left->kaddr, 2642 &left->map_start, &left->map_len, 2643 KM_USER1); 2644 } 2645 2646 ioff = btrfs_item_offset(left, item); 2647 btrfs_set_item_offset(left, item, 2648 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2649 } 2650 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2651 if (left->map_token) { 2652 unmap_extent_buffer(left, left->map_token, KM_USER1); 2653 left->map_token = NULL; 2654 } 2655 2656 /* fixup right node */ 2657 if (push_items > right_nritems) { 2658 printk(KERN_CRIT "push items %d nr %u\n", push_items, 2659 right_nritems); 2660 WARN_ON(1); 2661 } 2662 2663 if (push_items < right_nritems) { 2664 push_space = btrfs_item_offset_nr(right, push_items - 1) - 2665 leaf_data_end(root, right); 2666 memmove_extent_buffer(right, btrfs_leaf_data(right) + 2667 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2668 btrfs_leaf_data(right) + 2669 leaf_data_end(root, right), push_space); 2670 2671 memmove_extent_buffer(right, btrfs_item_nr_offset(0), 2672 btrfs_item_nr_offset(push_items), 2673 (btrfs_header_nritems(right) - push_items) * 2674 sizeof(struct btrfs_item)); 2675 } 2676 right_nritems -= push_items; 2677 btrfs_set_header_nritems(right, right_nritems); 2678 push_space = BTRFS_LEAF_DATA_SIZE(root); 2679 for (i = 0; i < right_nritems; i++) { 2680 item = btrfs_item_nr(right, i); 2681 2682 if (!right->map_token) { 2683 map_extent_buffer(right, (unsigned long)item, 2684 sizeof(struct btrfs_item), 2685 &right->map_token, &right->kaddr, 2686 &right->map_start, &right->map_len, 2687 KM_USER1); 2688 } 2689 2690 push_space = push_space - btrfs_item_size(right, item); 2691 btrfs_set_item_offset(right, item, push_space); 2692 } 2693 if (right->map_token) { 2694 unmap_extent_buffer(right, right->map_token, KM_USER1); 2695 right->map_token = NULL; 2696 } 2697 2698 btrfs_mark_buffer_dirty(left); 2699 if (right_nritems) 2700 btrfs_mark_buffer_dirty(right); 2701 2702 ret = btrfs_update_ref(trans, root, right, left, 2703 old_left_nritems, push_items); 2704 BUG_ON(ret); 2705 2706 btrfs_item_key(right, &disk_key, 0); 2707 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2708 if (wret) 2709 ret = wret; 2710 2711 /* then fixup the leaf pointer in the path */ 2712 if (path->slots[0] < push_items) { 2713 path->slots[0] += old_left_nritems; 2714 if (btrfs_header_nritems(path->nodes[0]) == 0) 2715 clean_tree_block(trans, root, path->nodes[0]); 2716 btrfs_tree_unlock(path->nodes[0]); 2717 free_extent_buffer(path->nodes[0]); 2718 path->nodes[0] = left; 2719 path->slots[1] -= 1; 2720 } else { 2721 btrfs_tree_unlock(left); 2722 free_extent_buffer(left); 2723 path->slots[0] -= push_items; 2724 } 2725 BUG_ON(path->slots[0] < 0); 2726 return ret; 2727 out: 2728 btrfs_tree_unlock(left); 2729 free_extent_buffer(left); 2730 return ret; 2731 } 2732 2733 /* 2734 * split the path's leaf in two, making sure there is at least data_size 2735 * available for the resulting leaf level of the path. 2736 * 2737 * returns 0 if all went well and < 0 on failure. 2738 */ 2739 static noinline int split_leaf(struct btrfs_trans_handle *trans, 2740 struct btrfs_root *root, 2741 struct btrfs_key *ins_key, 2742 struct btrfs_path *path, int data_size, 2743 int extend) 2744 { 2745 struct extent_buffer *l; 2746 u32 nritems; 2747 int mid; 2748 int slot; 2749 struct extent_buffer *right; 2750 int data_copy_size; 2751 int rt_data_off; 2752 int i; 2753 int ret = 0; 2754 int wret; 2755 int double_split; 2756 int num_doubles = 0; 2757 struct btrfs_disk_key disk_key; 2758 2759 /* first try to make some room by pushing left and right */ 2760 if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { 2761 wret = push_leaf_right(trans, root, path, data_size, 0); 2762 if (wret < 0) 2763 return wret; 2764 if (wret) { 2765 wret = push_leaf_left(trans, root, path, data_size, 0); 2766 if (wret < 0) 2767 return wret; 2768 } 2769 l = path->nodes[0]; 2770 2771 /* did the pushes work? */ 2772 if (btrfs_leaf_free_space(root, l) >= data_size) 2773 return 0; 2774 } 2775 2776 if (!path->nodes[1]) { 2777 ret = insert_new_root(trans, root, path, 1); 2778 if (ret) 2779 return ret; 2780 } 2781 again: 2782 double_split = 0; 2783 l = path->nodes[0]; 2784 slot = path->slots[0]; 2785 nritems = btrfs_header_nritems(l); 2786 mid = (nritems + 1) / 2; 2787 2788 right = btrfs_alloc_free_block(trans, root, root->leafsize, 2789 path->nodes[1]->start, 2790 root->root_key.objectid, 2791 trans->transid, 0, l->start, 0); 2792 if (IS_ERR(right)) { 2793 BUG_ON(1); 2794 return PTR_ERR(right); 2795 } 2796 2797 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2798 btrfs_set_header_bytenr(right, right->start); 2799 btrfs_set_header_generation(right, trans->transid); 2800 btrfs_set_header_owner(right, root->root_key.objectid); 2801 btrfs_set_header_level(right, 0); 2802 write_extent_buffer(right, root->fs_info->fsid, 2803 (unsigned long)btrfs_header_fsid(right), 2804 BTRFS_FSID_SIZE); 2805 2806 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 2807 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2808 BTRFS_UUID_SIZE); 2809 if (mid <= slot) { 2810 if (nritems == 1 || 2811 leaf_space_used(l, mid, nritems - mid) + data_size > 2812 BTRFS_LEAF_DATA_SIZE(root)) { 2813 if (slot >= nritems) { 2814 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2815 btrfs_set_header_nritems(right, 0); 2816 wret = insert_ptr(trans, root, path, 2817 &disk_key, right->start, 2818 path->slots[1] + 1, 1); 2819 if (wret) 2820 ret = wret; 2821 2822 btrfs_tree_unlock(path->nodes[0]); 2823 free_extent_buffer(path->nodes[0]); 2824 path->nodes[0] = right; 2825 path->slots[0] = 0; 2826 path->slots[1] += 1; 2827 btrfs_mark_buffer_dirty(right); 2828 return ret; 2829 } 2830 mid = slot; 2831 if (mid != nritems && 2832 leaf_space_used(l, mid, nritems - mid) + 2833 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2834 double_split = 1; 2835 } 2836 } 2837 } else { 2838 if (leaf_space_used(l, 0, mid) + data_size > 2839 BTRFS_LEAF_DATA_SIZE(root)) { 2840 if (!extend && data_size && slot == 0) { 2841 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2842 btrfs_set_header_nritems(right, 0); 2843 wret = insert_ptr(trans, root, path, 2844 &disk_key, 2845 right->start, 2846 path->slots[1], 1); 2847 if (wret) 2848 ret = wret; 2849 btrfs_tree_unlock(path->nodes[0]); 2850 free_extent_buffer(path->nodes[0]); 2851 path->nodes[0] = right; 2852 path->slots[0] = 0; 2853 if (path->slots[1] == 0) { 2854 wret = fixup_low_keys(trans, root, 2855 path, &disk_key, 1); 2856 if (wret) 2857 ret = wret; 2858 } 2859 btrfs_mark_buffer_dirty(right); 2860 return ret; 2861 } else if ((extend || !data_size) && slot == 0) { 2862 mid = 1; 2863 } else { 2864 mid = slot; 2865 if (mid != nritems && 2866 leaf_space_used(l, mid, nritems - mid) + 2867 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2868 double_split = 1; 2869 } 2870 } 2871 } 2872 } 2873 nritems = nritems - mid; 2874 btrfs_set_header_nritems(right, nritems); 2875 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); 2876 2877 copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 2878 btrfs_item_nr_offset(mid), 2879 nritems * sizeof(struct btrfs_item)); 2880 2881 copy_extent_buffer(right, l, 2882 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 2883 data_copy_size, btrfs_leaf_data(l) + 2884 leaf_data_end(root, l), data_copy_size); 2885 2886 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 2887 btrfs_item_end_nr(l, mid); 2888 2889 for (i = 0; i < nritems; i++) { 2890 struct btrfs_item *item = btrfs_item_nr(right, i); 2891 u32 ioff; 2892 2893 if (!right->map_token) { 2894 map_extent_buffer(right, (unsigned long)item, 2895 sizeof(struct btrfs_item), 2896 &right->map_token, &right->kaddr, 2897 &right->map_start, &right->map_len, 2898 KM_USER1); 2899 } 2900 2901 ioff = btrfs_item_offset(right, item); 2902 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2903 } 2904 2905 if (right->map_token) { 2906 unmap_extent_buffer(right, right->map_token, KM_USER1); 2907 right->map_token = NULL; 2908 } 2909 2910 btrfs_set_header_nritems(l, mid); 2911 ret = 0; 2912 btrfs_item_key(right, &disk_key, 0); 2913 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2914 path->slots[1] + 1, 1); 2915 if (wret) 2916 ret = wret; 2917 2918 btrfs_mark_buffer_dirty(right); 2919 btrfs_mark_buffer_dirty(l); 2920 BUG_ON(path->slots[0] != slot); 2921 2922 ret = btrfs_update_ref(trans, root, l, right, 0, nritems); 2923 BUG_ON(ret); 2924 2925 if (mid <= slot) { 2926 btrfs_tree_unlock(path->nodes[0]); 2927 free_extent_buffer(path->nodes[0]); 2928 path->nodes[0] = right; 2929 path->slots[0] -= mid; 2930 path->slots[1] += 1; 2931 } else { 2932 btrfs_tree_unlock(right); 2933 free_extent_buffer(right); 2934 } 2935 2936 BUG_ON(path->slots[0] < 0); 2937 2938 if (double_split) { 2939 BUG_ON(num_doubles != 0); 2940 num_doubles++; 2941 goto again; 2942 } 2943 return ret; 2944 } 2945 2946 /* 2947 * This function splits a single item into two items, 2948 * giving 'new_key' to the new item and splitting the 2949 * old one at split_offset (from the start of the item). 2950 * 2951 * The path may be released by this operation. After 2952 * the split, the path is pointing to the old item. The 2953 * new item is going to be in the same node as the old one. 2954 * 2955 * Note, the item being split must be smaller enough to live alone on 2956 * a tree block with room for one extra struct btrfs_item 2957 * 2958 * This allows us to split the item in place, keeping a lock on the 2959 * leaf the entire time. 2960 */ 2961 int btrfs_split_item(struct btrfs_trans_handle *trans, 2962 struct btrfs_root *root, 2963 struct btrfs_path *path, 2964 struct btrfs_key *new_key, 2965 unsigned long split_offset) 2966 { 2967 u32 item_size; 2968 struct extent_buffer *leaf; 2969 struct btrfs_key orig_key; 2970 struct btrfs_item *item; 2971 struct btrfs_item *new_item; 2972 int ret = 0; 2973 int slot; 2974 u32 nritems; 2975 u32 orig_offset; 2976 struct btrfs_disk_key disk_key; 2977 char *buf; 2978 2979 leaf = path->nodes[0]; 2980 btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]); 2981 if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item)) 2982 goto split; 2983 2984 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2985 btrfs_release_path(root, path); 2986 2987 path->search_for_split = 1; 2988 path->keep_locks = 1; 2989 2990 ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1); 2991 path->search_for_split = 0; 2992 2993 /* if our item isn't there or got smaller, return now */ 2994 if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0], 2995 path->slots[0])) { 2996 path->keep_locks = 0; 2997 return -EAGAIN; 2998 } 2999 3000 ret = split_leaf(trans, root, &orig_key, path, 3001 sizeof(struct btrfs_item), 1); 3002 path->keep_locks = 0; 3003 BUG_ON(ret); 3004 3005 /* 3006 * make sure any changes to the path from split_leaf leave it 3007 * in a blocking state 3008 */ 3009 btrfs_set_path_blocking(path); 3010 3011 leaf = path->nodes[0]; 3012 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3013 3014 split: 3015 item = btrfs_item_nr(leaf, path->slots[0]); 3016 orig_offset = btrfs_item_offset(leaf, item); 3017 item_size = btrfs_item_size(leaf, item); 3018 3019 3020 buf = kmalloc(item_size, GFP_NOFS); 3021 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3022 path->slots[0]), item_size); 3023 slot = path->slots[0] + 1; 3024 leaf = path->nodes[0]; 3025 3026 nritems = btrfs_header_nritems(leaf); 3027 3028 if (slot != nritems) { 3029 /* shift the items */ 3030 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3031 btrfs_item_nr_offset(slot), 3032 (nritems - slot) * sizeof(struct btrfs_item)); 3033 3034 } 3035 3036 btrfs_cpu_key_to_disk(&disk_key, new_key); 3037 btrfs_set_item_key(leaf, &disk_key, slot); 3038 3039 new_item = btrfs_item_nr(leaf, slot); 3040 3041 btrfs_set_item_offset(leaf, new_item, orig_offset); 3042 btrfs_set_item_size(leaf, new_item, item_size - split_offset); 3043 3044 btrfs_set_item_offset(leaf, item, 3045 orig_offset + item_size - split_offset); 3046 btrfs_set_item_size(leaf, item, split_offset); 3047 3048 btrfs_set_header_nritems(leaf, nritems + 1); 3049 3050 /* write the data for the start of the original item */ 3051 write_extent_buffer(leaf, buf, 3052 btrfs_item_ptr_offset(leaf, path->slots[0]), 3053 split_offset); 3054 3055 /* write the data for the new item */ 3056 write_extent_buffer(leaf, buf + split_offset, 3057 btrfs_item_ptr_offset(leaf, slot), 3058 item_size - split_offset); 3059 btrfs_mark_buffer_dirty(leaf); 3060 3061 ret = 0; 3062 if (btrfs_leaf_free_space(root, leaf) < 0) { 3063 btrfs_print_leaf(root, leaf); 3064 BUG(); 3065 } 3066 kfree(buf); 3067 return ret; 3068 } 3069 3070 /* 3071 * make the item pointed to by the path smaller. new_size indicates 3072 * how small to make it, and from_end tells us if we just chop bytes 3073 * off the end of the item or if we shift the item to chop bytes off 3074 * the front. 3075 */ 3076 int btrfs_truncate_item(struct btrfs_trans_handle *trans, 3077 struct btrfs_root *root, 3078 struct btrfs_path *path, 3079 u32 new_size, int from_end) 3080 { 3081 int ret = 0; 3082 int slot; 3083 int slot_orig; 3084 struct extent_buffer *leaf; 3085 struct btrfs_item *item; 3086 u32 nritems; 3087 unsigned int data_end; 3088 unsigned int old_data_start; 3089 unsigned int old_size; 3090 unsigned int size_diff; 3091 int i; 3092 3093 slot_orig = path->slots[0]; 3094 leaf = path->nodes[0]; 3095 slot = path->slots[0]; 3096 3097 old_size = btrfs_item_size_nr(leaf, slot); 3098 if (old_size == new_size) 3099 return 0; 3100 3101 nritems = btrfs_header_nritems(leaf); 3102 data_end = leaf_data_end(root, leaf); 3103 3104 old_data_start = btrfs_item_offset_nr(leaf, slot); 3105 3106 size_diff = old_size - new_size; 3107 3108 BUG_ON(slot < 0); 3109 BUG_ON(slot >= nritems); 3110 3111 /* 3112 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3113 */ 3114 /* first correct the data pointers */ 3115 for (i = slot; i < nritems; i++) { 3116 u32 ioff; 3117 item = btrfs_item_nr(leaf, i); 3118 3119 if (!leaf->map_token) { 3120 map_extent_buffer(leaf, (unsigned long)item, 3121 sizeof(struct btrfs_item), 3122 &leaf->map_token, &leaf->kaddr, 3123 &leaf->map_start, &leaf->map_len, 3124 KM_USER1); 3125 } 3126 3127 ioff = btrfs_item_offset(leaf, item); 3128 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3129 } 3130 3131 if (leaf->map_token) { 3132 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3133 leaf->map_token = NULL; 3134 } 3135 3136 /* shift the data */ 3137 if (from_end) { 3138 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3139 data_end + size_diff, btrfs_leaf_data(leaf) + 3140 data_end, old_data_start + new_size - data_end); 3141 } else { 3142 struct btrfs_disk_key disk_key; 3143 u64 offset; 3144 3145 btrfs_item_key(leaf, &disk_key, slot); 3146 3147 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 3148 unsigned long ptr; 3149 struct btrfs_file_extent_item *fi; 3150 3151 fi = btrfs_item_ptr(leaf, slot, 3152 struct btrfs_file_extent_item); 3153 fi = (struct btrfs_file_extent_item *)( 3154 (unsigned long)fi - size_diff); 3155 3156 if (btrfs_file_extent_type(leaf, fi) == 3157 BTRFS_FILE_EXTENT_INLINE) { 3158 ptr = btrfs_item_ptr_offset(leaf, slot); 3159 memmove_extent_buffer(leaf, ptr, 3160 (unsigned long)fi, 3161 offsetof(struct btrfs_file_extent_item, 3162 disk_bytenr)); 3163 } 3164 } 3165 3166 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3167 data_end + size_diff, btrfs_leaf_data(leaf) + 3168 data_end, old_data_start - data_end); 3169 3170 offset = btrfs_disk_key_offset(&disk_key); 3171 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 3172 btrfs_set_item_key(leaf, &disk_key, slot); 3173 if (slot == 0) 3174 fixup_low_keys(trans, root, path, &disk_key, 1); 3175 } 3176 3177 item = btrfs_item_nr(leaf, slot); 3178 btrfs_set_item_size(leaf, item, new_size); 3179 btrfs_mark_buffer_dirty(leaf); 3180 3181 ret = 0; 3182 if (btrfs_leaf_free_space(root, leaf) < 0) { 3183 btrfs_print_leaf(root, leaf); 3184 BUG(); 3185 } 3186 return ret; 3187 } 3188 3189 /* 3190 * make the item pointed to by the path bigger, data_size is the new size. 3191 */ 3192 int btrfs_extend_item(struct btrfs_trans_handle *trans, 3193 struct btrfs_root *root, struct btrfs_path *path, 3194 u32 data_size) 3195 { 3196 int ret = 0; 3197 int slot; 3198 int slot_orig; 3199 struct extent_buffer *leaf; 3200 struct btrfs_item *item; 3201 u32 nritems; 3202 unsigned int data_end; 3203 unsigned int old_data; 3204 unsigned int old_size; 3205 int i; 3206 3207 slot_orig = path->slots[0]; 3208 leaf = path->nodes[0]; 3209 3210 nritems = btrfs_header_nritems(leaf); 3211 data_end = leaf_data_end(root, leaf); 3212 3213 if (btrfs_leaf_free_space(root, leaf) < data_size) { 3214 btrfs_print_leaf(root, leaf); 3215 BUG(); 3216 } 3217 slot = path->slots[0]; 3218 old_data = btrfs_item_end_nr(leaf, slot); 3219 3220 BUG_ON(slot < 0); 3221 if (slot >= nritems) { 3222 btrfs_print_leaf(root, leaf); 3223 printk(KERN_CRIT "slot %d too large, nritems %d\n", 3224 slot, nritems); 3225 BUG_ON(1); 3226 } 3227 3228 /* 3229 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3230 */ 3231 /* first correct the data pointers */ 3232 for (i = slot; i < nritems; i++) { 3233 u32 ioff; 3234 item = btrfs_item_nr(leaf, i); 3235 3236 if (!leaf->map_token) { 3237 map_extent_buffer(leaf, (unsigned long)item, 3238 sizeof(struct btrfs_item), 3239 &leaf->map_token, &leaf->kaddr, 3240 &leaf->map_start, &leaf->map_len, 3241 KM_USER1); 3242 } 3243 ioff = btrfs_item_offset(leaf, item); 3244 btrfs_set_item_offset(leaf, item, ioff - data_size); 3245 } 3246 3247 if (leaf->map_token) { 3248 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3249 leaf->map_token = NULL; 3250 } 3251 3252 /* shift the data */ 3253 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3254 data_end - data_size, btrfs_leaf_data(leaf) + 3255 data_end, old_data - data_end); 3256 3257 data_end = old_data; 3258 old_size = btrfs_item_size_nr(leaf, slot); 3259 item = btrfs_item_nr(leaf, slot); 3260 btrfs_set_item_size(leaf, item, old_size + data_size); 3261 btrfs_mark_buffer_dirty(leaf); 3262 3263 ret = 0; 3264 if (btrfs_leaf_free_space(root, leaf) < 0) { 3265 btrfs_print_leaf(root, leaf); 3266 BUG(); 3267 } 3268 return ret; 3269 } 3270 3271 /* 3272 * Given a key and some data, insert items into the tree. 3273 * This does all the path init required, making room in the tree if needed. 3274 * Returns the number of keys that were inserted. 3275 */ 3276 int btrfs_insert_some_items(struct btrfs_trans_handle *trans, 3277 struct btrfs_root *root, 3278 struct btrfs_path *path, 3279 struct btrfs_key *cpu_key, u32 *data_size, 3280 int nr) 3281 { 3282 struct extent_buffer *leaf; 3283 struct btrfs_item *item; 3284 int ret = 0; 3285 int slot; 3286 int i; 3287 u32 nritems; 3288 u32 total_data = 0; 3289 u32 total_size = 0; 3290 unsigned int data_end; 3291 struct btrfs_disk_key disk_key; 3292 struct btrfs_key found_key; 3293 3294 for (i = 0; i < nr; i++) { 3295 if (total_size + data_size[i] + sizeof(struct btrfs_item) > 3296 BTRFS_LEAF_DATA_SIZE(root)) { 3297 break; 3298 nr = i; 3299 } 3300 total_data += data_size[i]; 3301 total_size += data_size[i] + sizeof(struct btrfs_item); 3302 } 3303 BUG_ON(nr == 0); 3304 3305 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3306 if (ret == 0) 3307 return -EEXIST; 3308 if (ret < 0) 3309 goto out; 3310 3311 leaf = path->nodes[0]; 3312 3313 nritems = btrfs_header_nritems(leaf); 3314 data_end = leaf_data_end(root, leaf); 3315 3316 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3317 for (i = nr; i >= 0; i--) { 3318 total_data -= data_size[i]; 3319 total_size -= data_size[i] + sizeof(struct btrfs_item); 3320 if (total_size < btrfs_leaf_free_space(root, leaf)) 3321 break; 3322 } 3323 nr = i; 3324 } 3325 3326 slot = path->slots[0]; 3327 BUG_ON(slot < 0); 3328 3329 if (slot != nritems) { 3330 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3331 3332 item = btrfs_item_nr(leaf, slot); 3333 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3334 3335 /* figure out how many keys we can insert in here */ 3336 total_data = data_size[0]; 3337 for (i = 1; i < nr; i++) { 3338 if (comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3339 break; 3340 total_data += data_size[i]; 3341 } 3342 nr = i; 3343 3344 if (old_data < data_end) { 3345 btrfs_print_leaf(root, leaf); 3346 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3347 slot, old_data, data_end); 3348 BUG_ON(1); 3349 } 3350 /* 3351 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3352 */ 3353 /* first correct the data pointers */ 3354 WARN_ON(leaf->map_token); 3355 for (i = slot; i < nritems; i++) { 3356 u32 ioff; 3357 3358 item = btrfs_item_nr(leaf, i); 3359 if (!leaf->map_token) { 3360 map_extent_buffer(leaf, (unsigned long)item, 3361 sizeof(struct btrfs_item), 3362 &leaf->map_token, &leaf->kaddr, 3363 &leaf->map_start, &leaf->map_len, 3364 KM_USER1); 3365 } 3366 3367 ioff = btrfs_item_offset(leaf, item); 3368 btrfs_set_item_offset(leaf, item, ioff - total_data); 3369 } 3370 if (leaf->map_token) { 3371 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3372 leaf->map_token = NULL; 3373 } 3374 3375 /* shift the items */ 3376 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3377 btrfs_item_nr_offset(slot), 3378 (nritems - slot) * sizeof(struct btrfs_item)); 3379 3380 /* shift the data */ 3381 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3382 data_end - total_data, btrfs_leaf_data(leaf) + 3383 data_end, old_data - data_end); 3384 data_end = old_data; 3385 } else { 3386 /* 3387 * this sucks but it has to be done, if we are inserting at 3388 * the end of the leaf only insert 1 of the items, since we 3389 * have no way of knowing whats on the next leaf and we'd have 3390 * to drop our current locks to figure it out 3391 */ 3392 nr = 1; 3393 } 3394 3395 /* setup the item for the new data */ 3396 for (i = 0; i < nr; i++) { 3397 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3398 btrfs_set_item_key(leaf, &disk_key, slot + i); 3399 item = btrfs_item_nr(leaf, slot + i); 3400 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3401 data_end -= data_size[i]; 3402 btrfs_set_item_size(leaf, item, data_size[i]); 3403 } 3404 btrfs_set_header_nritems(leaf, nritems + nr); 3405 btrfs_mark_buffer_dirty(leaf); 3406 3407 ret = 0; 3408 if (slot == 0) { 3409 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3410 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3411 } 3412 3413 if (btrfs_leaf_free_space(root, leaf) < 0) { 3414 btrfs_print_leaf(root, leaf); 3415 BUG(); 3416 } 3417 out: 3418 if (!ret) 3419 ret = nr; 3420 return ret; 3421 } 3422 3423 /* 3424 * Given a key and some data, insert items into the tree. 3425 * This does all the path init required, making room in the tree if needed. 3426 */ 3427 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 3428 struct btrfs_root *root, 3429 struct btrfs_path *path, 3430 struct btrfs_key *cpu_key, u32 *data_size, 3431 int nr) 3432 { 3433 struct extent_buffer *leaf; 3434 struct btrfs_item *item; 3435 int ret = 0; 3436 int slot; 3437 int slot_orig; 3438 int i; 3439 u32 nritems; 3440 u32 total_size = 0; 3441 u32 total_data = 0; 3442 unsigned int data_end; 3443 struct btrfs_disk_key disk_key; 3444 3445 for (i = 0; i < nr; i++) 3446 total_data += data_size[i]; 3447 3448 total_size = total_data + (nr * sizeof(struct btrfs_item)); 3449 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3450 if (ret == 0) 3451 return -EEXIST; 3452 if (ret < 0) 3453 goto out; 3454 3455 slot_orig = path->slots[0]; 3456 leaf = path->nodes[0]; 3457 3458 nritems = btrfs_header_nritems(leaf); 3459 data_end = leaf_data_end(root, leaf); 3460 3461 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3462 btrfs_print_leaf(root, leaf); 3463 printk(KERN_CRIT "not enough freespace need %u have %d\n", 3464 total_size, btrfs_leaf_free_space(root, leaf)); 3465 BUG(); 3466 } 3467 3468 slot = path->slots[0]; 3469 BUG_ON(slot < 0); 3470 3471 if (slot != nritems) { 3472 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3473 3474 if (old_data < data_end) { 3475 btrfs_print_leaf(root, leaf); 3476 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3477 slot, old_data, data_end); 3478 BUG_ON(1); 3479 } 3480 /* 3481 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3482 */ 3483 /* first correct the data pointers */ 3484 WARN_ON(leaf->map_token); 3485 for (i = slot; i < nritems; i++) { 3486 u32 ioff; 3487 3488 item = btrfs_item_nr(leaf, i); 3489 if (!leaf->map_token) { 3490 map_extent_buffer(leaf, (unsigned long)item, 3491 sizeof(struct btrfs_item), 3492 &leaf->map_token, &leaf->kaddr, 3493 &leaf->map_start, &leaf->map_len, 3494 KM_USER1); 3495 } 3496 3497 ioff = btrfs_item_offset(leaf, item); 3498 btrfs_set_item_offset(leaf, item, ioff - total_data); 3499 } 3500 if (leaf->map_token) { 3501 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3502 leaf->map_token = NULL; 3503 } 3504 3505 /* shift the items */ 3506 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3507 btrfs_item_nr_offset(slot), 3508 (nritems - slot) * sizeof(struct btrfs_item)); 3509 3510 /* shift the data */ 3511 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3512 data_end - total_data, btrfs_leaf_data(leaf) + 3513 data_end, old_data - data_end); 3514 data_end = old_data; 3515 } 3516 3517 /* setup the item for the new data */ 3518 for (i = 0; i < nr; i++) { 3519 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3520 btrfs_set_item_key(leaf, &disk_key, slot + i); 3521 item = btrfs_item_nr(leaf, slot + i); 3522 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3523 data_end -= data_size[i]; 3524 btrfs_set_item_size(leaf, item, data_size[i]); 3525 } 3526 btrfs_set_header_nritems(leaf, nritems + nr); 3527 btrfs_mark_buffer_dirty(leaf); 3528 3529 ret = 0; 3530 if (slot == 0) { 3531 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3532 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3533 } 3534 3535 if (btrfs_leaf_free_space(root, leaf) < 0) { 3536 btrfs_print_leaf(root, leaf); 3537 BUG(); 3538 } 3539 out: 3540 btrfs_unlock_up_safe(path, 1); 3541 return ret; 3542 } 3543 3544 /* 3545 * Given a key and some data, insert an item into the tree. 3546 * This does all the path init required, making room in the tree if needed. 3547 */ 3548 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 3549 *root, struct btrfs_key *cpu_key, void *data, u32 3550 data_size) 3551 { 3552 int ret = 0; 3553 struct btrfs_path *path; 3554 struct extent_buffer *leaf; 3555 unsigned long ptr; 3556 3557 path = btrfs_alloc_path(); 3558 BUG_ON(!path); 3559 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 3560 if (!ret) { 3561 leaf = path->nodes[0]; 3562 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 3563 write_extent_buffer(leaf, data, ptr, data_size); 3564 btrfs_mark_buffer_dirty(leaf); 3565 } 3566 btrfs_free_path(path); 3567 return ret; 3568 } 3569 3570 /* 3571 * delete the pointer from a given node. 3572 * 3573 * the tree should have been previously balanced so the deletion does not 3574 * empty a node. 3575 */ 3576 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3577 struct btrfs_path *path, int level, int slot) 3578 { 3579 struct extent_buffer *parent = path->nodes[level]; 3580 u32 nritems; 3581 int ret = 0; 3582 int wret; 3583 3584 nritems = btrfs_header_nritems(parent); 3585 if (slot != nritems - 1) { 3586 memmove_extent_buffer(parent, 3587 btrfs_node_key_ptr_offset(slot), 3588 btrfs_node_key_ptr_offset(slot + 1), 3589 sizeof(struct btrfs_key_ptr) * 3590 (nritems - slot - 1)); 3591 } 3592 nritems--; 3593 btrfs_set_header_nritems(parent, nritems); 3594 if (nritems == 0 && parent == root->node) { 3595 BUG_ON(btrfs_header_level(root->node) != 1); 3596 /* just turn the root into a leaf and break */ 3597 btrfs_set_header_level(root->node, 0); 3598 } else if (slot == 0) { 3599 struct btrfs_disk_key disk_key; 3600 3601 btrfs_node_key(parent, &disk_key, 0); 3602 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 3603 if (wret) 3604 ret = wret; 3605 } 3606 btrfs_mark_buffer_dirty(parent); 3607 return ret; 3608 } 3609 3610 /* 3611 * a helper function to delete the leaf pointed to by path->slots[1] and 3612 * path->nodes[1]. bytenr is the node block pointer, but since the callers 3613 * already know it, it is faster to have them pass it down than to 3614 * read it out of the node again. 3615 * 3616 * This deletes the pointer in path->nodes[1] and frees the leaf 3617 * block extent. zero is returned if it all worked out, < 0 otherwise. 3618 * 3619 * The path must have already been setup for deleting the leaf, including 3620 * all the proper balancing. path->nodes[1] must be locked. 3621 */ 3622 noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3623 struct btrfs_root *root, 3624 struct btrfs_path *path, u64 bytenr) 3625 { 3626 int ret; 3627 u64 root_gen = btrfs_header_generation(path->nodes[1]); 3628 u64 parent_start = path->nodes[1]->start; 3629 u64 parent_owner = btrfs_header_owner(path->nodes[1]); 3630 3631 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3632 if (ret) 3633 return ret; 3634 3635 /* 3636 * btrfs_free_extent is expensive, we want to make sure we 3637 * aren't holding any locks when we call it 3638 */ 3639 btrfs_unlock_up_safe(path, 0); 3640 3641 ret = btrfs_free_extent(trans, root, bytenr, 3642 btrfs_level_size(root, 0), 3643 parent_start, parent_owner, 3644 root_gen, 0, 1); 3645 return ret; 3646 } 3647 /* 3648 * delete the item at the leaf level in path. If that empties 3649 * the leaf, remove it from the tree 3650 */ 3651 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3652 struct btrfs_path *path, int slot, int nr) 3653 { 3654 struct extent_buffer *leaf; 3655 struct btrfs_item *item; 3656 int last_off; 3657 int dsize = 0; 3658 int ret = 0; 3659 int wret; 3660 int i; 3661 u32 nritems; 3662 3663 leaf = path->nodes[0]; 3664 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 3665 3666 for (i = 0; i < nr; i++) 3667 dsize += btrfs_item_size_nr(leaf, slot + i); 3668 3669 nritems = btrfs_header_nritems(leaf); 3670 3671 if (slot + nr != nritems) { 3672 int data_end = leaf_data_end(root, leaf); 3673 3674 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3675 data_end + dsize, 3676 btrfs_leaf_data(leaf) + data_end, 3677 last_off - data_end); 3678 3679 for (i = slot + nr; i < nritems; i++) { 3680 u32 ioff; 3681 3682 item = btrfs_item_nr(leaf, i); 3683 if (!leaf->map_token) { 3684 map_extent_buffer(leaf, (unsigned long)item, 3685 sizeof(struct btrfs_item), 3686 &leaf->map_token, &leaf->kaddr, 3687 &leaf->map_start, &leaf->map_len, 3688 KM_USER1); 3689 } 3690 ioff = btrfs_item_offset(leaf, item); 3691 btrfs_set_item_offset(leaf, item, ioff + dsize); 3692 } 3693 3694 if (leaf->map_token) { 3695 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3696 leaf->map_token = NULL; 3697 } 3698 3699 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3700 btrfs_item_nr_offset(slot + nr), 3701 sizeof(struct btrfs_item) * 3702 (nritems - slot - nr)); 3703 } 3704 btrfs_set_header_nritems(leaf, nritems - nr); 3705 nritems -= nr; 3706 3707 /* delete the leaf if we've emptied it */ 3708 if (nritems == 0) { 3709 if (leaf == root->node) { 3710 btrfs_set_header_level(leaf, 0); 3711 } else { 3712 ret = btrfs_del_leaf(trans, root, path, leaf->start); 3713 BUG_ON(ret); 3714 } 3715 } else { 3716 int used = leaf_space_used(leaf, 0, nritems); 3717 if (slot == 0) { 3718 struct btrfs_disk_key disk_key; 3719 3720 btrfs_item_key(leaf, &disk_key, 0); 3721 wret = fixup_low_keys(trans, root, path, 3722 &disk_key, 1); 3723 if (wret) 3724 ret = wret; 3725 } 3726 3727 /* delete the leaf if it is mostly empty */ 3728 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { 3729 /* push_leaf_left fixes the path. 3730 * make sure the path still points to our leaf 3731 * for possible call to del_ptr below 3732 */ 3733 slot = path->slots[1]; 3734 extent_buffer_get(leaf); 3735 3736 wret = push_leaf_left(trans, root, path, 1, 1); 3737 if (wret < 0 && wret != -ENOSPC) 3738 ret = wret; 3739 3740 if (path->nodes[0] == leaf && 3741 btrfs_header_nritems(leaf)) { 3742 wret = push_leaf_right(trans, root, path, 1, 1); 3743 if (wret < 0 && wret != -ENOSPC) 3744 ret = wret; 3745 } 3746 3747 if (btrfs_header_nritems(leaf) == 0) { 3748 path->slots[1] = slot; 3749 ret = btrfs_del_leaf(trans, root, path, 3750 leaf->start); 3751 BUG_ON(ret); 3752 free_extent_buffer(leaf); 3753 } else { 3754 /* if we're still in the path, make sure 3755 * we're dirty. Otherwise, one of the 3756 * push_leaf functions must have already 3757 * dirtied this buffer 3758 */ 3759 if (path->nodes[0] == leaf) 3760 btrfs_mark_buffer_dirty(leaf); 3761 free_extent_buffer(leaf); 3762 } 3763 } else { 3764 btrfs_mark_buffer_dirty(leaf); 3765 } 3766 } 3767 return ret; 3768 } 3769 3770 /* 3771 * search the tree again to find a leaf with lesser keys 3772 * returns 0 if it found something or 1 if there are no lesser leaves. 3773 * returns < 0 on io errors. 3774 * 3775 * This may release the path, and so you may lose any locks held at the 3776 * time you call it. 3777 */ 3778 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 3779 { 3780 struct btrfs_key key; 3781 struct btrfs_disk_key found_key; 3782 int ret; 3783 3784 btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 3785 3786 if (key.offset > 0) 3787 key.offset--; 3788 else if (key.type > 0) 3789 key.type--; 3790 else if (key.objectid > 0) 3791 key.objectid--; 3792 else 3793 return 1; 3794 3795 btrfs_release_path(root, path); 3796 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3797 if (ret < 0) 3798 return ret; 3799 btrfs_item_key(path->nodes[0], &found_key, 0); 3800 ret = comp_keys(&found_key, &key); 3801 if (ret < 0) 3802 return 0; 3803 return 1; 3804 } 3805 3806 /* 3807 * A helper function to walk down the tree starting at min_key, and looking 3808 * for nodes or leaves that are either in cache or have a minimum 3809 * transaction id. This is used by the btree defrag code, and tree logging 3810 * 3811 * This does not cow, but it does stuff the starting key it finds back 3812 * into min_key, so you can call btrfs_search_slot with cow=1 on the 3813 * key and get a writable path. 3814 * 3815 * This does lock as it descends, and path->keep_locks should be set 3816 * to 1 by the caller. 3817 * 3818 * This honors path->lowest_level to prevent descent past a given level 3819 * of the tree. 3820 * 3821 * min_trans indicates the oldest transaction that you are interested 3822 * in walking through. Any nodes or leaves older than min_trans are 3823 * skipped over (without reading them). 3824 * 3825 * returns zero if something useful was found, < 0 on error and 1 if there 3826 * was nothing in the tree that matched the search criteria. 3827 */ 3828 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 3829 struct btrfs_key *max_key, 3830 struct btrfs_path *path, int cache_only, 3831 u64 min_trans) 3832 { 3833 struct extent_buffer *cur; 3834 struct btrfs_key found_key; 3835 int slot; 3836 int sret; 3837 u32 nritems; 3838 int level; 3839 int ret = 1; 3840 3841 WARN_ON(!path->keep_locks); 3842 again: 3843 cur = btrfs_lock_root_node(root); 3844 level = btrfs_header_level(cur); 3845 WARN_ON(path->nodes[level]); 3846 path->nodes[level] = cur; 3847 path->locks[level] = 1; 3848 3849 if (btrfs_header_generation(cur) < min_trans) { 3850 ret = 1; 3851 goto out; 3852 } 3853 while (1) { 3854 nritems = btrfs_header_nritems(cur); 3855 level = btrfs_header_level(cur); 3856 sret = bin_search(cur, min_key, level, &slot); 3857 3858 /* at the lowest level, we're done, setup the path and exit */ 3859 if (level == path->lowest_level) { 3860 if (slot >= nritems) 3861 goto find_next_key; 3862 ret = 0; 3863 path->slots[level] = slot; 3864 btrfs_item_key_to_cpu(cur, &found_key, slot); 3865 goto out; 3866 } 3867 if (sret && slot > 0) 3868 slot--; 3869 /* 3870 * check this node pointer against the cache_only and 3871 * min_trans parameters. If it isn't in cache or is too 3872 * old, skip to the next one. 3873 */ 3874 while (slot < nritems) { 3875 u64 blockptr; 3876 u64 gen; 3877 struct extent_buffer *tmp; 3878 struct btrfs_disk_key disk_key; 3879 3880 blockptr = btrfs_node_blockptr(cur, slot); 3881 gen = btrfs_node_ptr_generation(cur, slot); 3882 if (gen < min_trans) { 3883 slot++; 3884 continue; 3885 } 3886 if (!cache_only) 3887 break; 3888 3889 if (max_key) { 3890 btrfs_node_key(cur, &disk_key, slot); 3891 if (comp_keys(&disk_key, max_key) >= 0) { 3892 ret = 1; 3893 goto out; 3894 } 3895 } 3896 3897 tmp = btrfs_find_tree_block(root, blockptr, 3898 btrfs_level_size(root, level - 1)); 3899 3900 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 3901 free_extent_buffer(tmp); 3902 break; 3903 } 3904 if (tmp) 3905 free_extent_buffer(tmp); 3906 slot++; 3907 } 3908 find_next_key: 3909 /* 3910 * we didn't find a candidate key in this node, walk forward 3911 * and find another one 3912 */ 3913 if (slot >= nritems) { 3914 path->slots[level] = slot; 3915 btrfs_set_path_blocking(path); 3916 sret = btrfs_find_next_key(root, path, min_key, level, 3917 cache_only, min_trans); 3918 if (sret == 0) { 3919 btrfs_release_path(root, path); 3920 goto again; 3921 } else { 3922 btrfs_clear_path_blocking(path); 3923 goto out; 3924 } 3925 } 3926 /* save our key for returning back */ 3927 btrfs_node_key_to_cpu(cur, &found_key, slot); 3928 path->slots[level] = slot; 3929 if (level == path->lowest_level) { 3930 ret = 0; 3931 unlock_up(path, level, 1); 3932 goto out; 3933 } 3934 btrfs_set_path_blocking(path); 3935 cur = read_node_slot(root, cur, slot); 3936 3937 btrfs_tree_lock(cur); 3938 3939 path->locks[level - 1] = 1; 3940 path->nodes[level - 1] = cur; 3941 unlock_up(path, level, 1); 3942 btrfs_clear_path_blocking(path); 3943 } 3944 out: 3945 if (ret == 0) 3946 memcpy(min_key, &found_key, sizeof(found_key)); 3947 btrfs_set_path_blocking(path); 3948 return ret; 3949 } 3950 3951 /* 3952 * this is similar to btrfs_next_leaf, but does not try to preserve 3953 * and fixup the path. It looks for and returns the next key in the 3954 * tree based on the current path and the cache_only and min_trans 3955 * parameters. 3956 * 3957 * 0 is returned if another key is found, < 0 if there are any errors 3958 * and 1 is returned if there are no higher keys in the tree 3959 * 3960 * path->keep_locks should be set to 1 on the search made before 3961 * calling this function. 3962 */ 3963 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 3964 struct btrfs_key *key, int lowest_level, 3965 int cache_only, u64 min_trans) 3966 { 3967 int level = lowest_level; 3968 int slot; 3969 struct extent_buffer *c; 3970 3971 WARN_ON(!path->keep_locks); 3972 while (level < BTRFS_MAX_LEVEL) { 3973 if (!path->nodes[level]) 3974 return 1; 3975 3976 slot = path->slots[level] + 1; 3977 c = path->nodes[level]; 3978 next: 3979 if (slot >= btrfs_header_nritems(c)) { 3980 level++; 3981 if (level == BTRFS_MAX_LEVEL) 3982 return 1; 3983 continue; 3984 } 3985 if (level == 0) 3986 btrfs_item_key_to_cpu(c, key, slot); 3987 else { 3988 u64 blockptr = btrfs_node_blockptr(c, slot); 3989 u64 gen = btrfs_node_ptr_generation(c, slot); 3990 3991 if (cache_only) { 3992 struct extent_buffer *cur; 3993 cur = btrfs_find_tree_block(root, blockptr, 3994 btrfs_level_size(root, level - 1)); 3995 if (!cur || !btrfs_buffer_uptodate(cur, gen)) { 3996 slot++; 3997 if (cur) 3998 free_extent_buffer(cur); 3999 goto next; 4000 } 4001 free_extent_buffer(cur); 4002 } 4003 if (gen < min_trans) { 4004 slot++; 4005 goto next; 4006 } 4007 btrfs_node_key_to_cpu(c, key, slot); 4008 } 4009 return 0; 4010 } 4011 return 1; 4012 } 4013 4014 /* 4015 * search the tree again to find a leaf with greater keys 4016 * returns 0 if it found something or 1 if there are no greater leaves. 4017 * returns < 0 on io errors. 4018 */ 4019 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 4020 { 4021 int slot; 4022 int level = 1; 4023 struct extent_buffer *c; 4024 struct extent_buffer *next = NULL; 4025 struct btrfs_key key; 4026 u32 nritems; 4027 int ret; 4028 4029 nritems = btrfs_header_nritems(path->nodes[0]); 4030 if (nritems == 0) 4031 return 1; 4032 4033 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 4034 4035 btrfs_release_path(root, path); 4036 path->keep_locks = 1; 4037 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4038 path->keep_locks = 0; 4039 4040 if (ret < 0) 4041 return ret; 4042 4043 btrfs_set_path_blocking(path); 4044 nritems = btrfs_header_nritems(path->nodes[0]); 4045 /* 4046 * by releasing the path above we dropped all our locks. A balance 4047 * could have added more items next to the key that used to be 4048 * at the very end of the block. So, check again here and 4049 * advance the path if there are now more items available. 4050 */ 4051 if (nritems > 0 && path->slots[0] < nritems - 1) { 4052 path->slots[0]++; 4053 goto done; 4054 } 4055 4056 while (level < BTRFS_MAX_LEVEL) { 4057 if (!path->nodes[level]) 4058 return 1; 4059 4060 slot = path->slots[level] + 1; 4061 c = path->nodes[level]; 4062 if (slot >= btrfs_header_nritems(c)) { 4063 level++; 4064 if (level == BTRFS_MAX_LEVEL) 4065 return 1; 4066 continue; 4067 } 4068 4069 if (next) { 4070 btrfs_tree_unlock(next); 4071 free_extent_buffer(next); 4072 } 4073 4074 /* the path was set to blocking above */ 4075 if (level == 1 && (path->locks[1] || path->skip_locking) && 4076 path->reada) 4077 reada_for_search(root, path, level, slot, 0); 4078 4079 next = read_node_slot(root, c, slot); 4080 if (!path->skip_locking) { 4081 WARN_ON(!btrfs_tree_locked(c)); 4082 btrfs_tree_lock(next); 4083 btrfs_set_lock_blocking(next); 4084 } 4085 break; 4086 } 4087 path->slots[level] = slot; 4088 while (1) { 4089 level--; 4090 c = path->nodes[level]; 4091 if (path->locks[level]) 4092 btrfs_tree_unlock(c); 4093 free_extent_buffer(c); 4094 path->nodes[level] = next; 4095 path->slots[level] = 0; 4096 if (!path->skip_locking) 4097 path->locks[level] = 1; 4098 if (!level) 4099 break; 4100 4101 btrfs_set_path_blocking(path); 4102 if (level == 1 && path->locks[1] && path->reada) 4103 reada_for_search(root, path, level, slot, 0); 4104 next = read_node_slot(root, next, 0); 4105 if (!path->skip_locking) { 4106 WARN_ON(!btrfs_tree_locked(path->nodes[level])); 4107 btrfs_tree_lock(next); 4108 btrfs_set_lock_blocking(next); 4109 } 4110 } 4111 done: 4112 unlock_up(path, 0, 1); 4113 return 0; 4114 } 4115 4116 /* 4117 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 4118 * searching until it gets past min_objectid or finds an item of 'type' 4119 * 4120 * returns 0 if something is found, 1 if nothing was found and < 0 on error 4121 */ 4122 int btrfs_previous_item(struct btrfs_root *root, 4123 struct btrfs_path *path, u64 min_objectid, 4124 int type) 4125 { 4126 struct btrfs_key found_key; 4127 struct extent_buffer *leaf; 4128 u32 nritems; 4129 int ret; 4130 4131 while (1) { 4132 if (path->slots[0] == 0) { 4133 btrfs_set_path_blocking(path); 4134 ret = btrfs_prev_leaf(root, path); 4135 if (ret != 0) 4136 return ret; 4137 } else { 4138 path->slots[0]--; 4139 } 4140 leaf = path->nodes[0]; 4141 nritems = btrfs_header_nritems(leaf); 4142 if (nritems == 0) 4143 return 1; 4144 if (path->slots[0] == nritems) 4145 path->slots[0]--; 4146 4147 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4148 if (found_key.type == type) 4149 return 0; 4150 if (found_key.objectid < min_objectid) 4151 break; 4152 if (found_key.objectid == min_objectid && 4153 found_key.type < type) 4154 break; 4155 } 4156 return 1; 4157 } 4158