1 /* 2 * Copyright (C) 2007,2008 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/slab.h> 21 #include "ctree.h" 22 #include "disk-io.h" 23 #include "transaction.h" 24 #include "print-tree.h" 25 #include "locking.h" 26 27 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 28 *root, struct btrfs_path *path, int level); 29 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 30 *root, struct btrfs_key *ins_key, 31 struct btrfs_path *path, int data_size, int extend); 32 static int push_node_left(struct btrfs_trans_handle *trans, 33 struct btrfs_root *root, struct extent_buffer *dst, 34 struct extent_buffer *src, int empty); 35 static int balance_node_right(struct btrfs_trans_handle *trans, 36 struct btrfs_root *root, 37 struct extent_buffer *dst_buf, 38 struct extent_buffer *src_buf); 39 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 40 struct btrfs_path *path, int level, int slot); 41 static int setup_items_for_insert(struct btrfs_trans_handle *trans, 42 struct btrfs_root *root, struct btrfs_path *path, 43 struct btrfs_key *cpu_key, u32 *data_size, 44 u32 total_data, u32 total_size, int nr); 45 46 47 struct btrfs_path *btrfs_alloc_path(void) 48 { 49 struct btrfs_path *path; 50 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 51 if (path) 52 path->reada = 1; 53 return path; 54 } 55 56 /* 57 * set all locked nodes in the path to blocking locks. This should 58 * be done before scheduling 59 */ 60 noinline void btrfs_set_path_blocking(struct btrfs_path *p) 61 { 62 int i; 63 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 64 if (p->nodes[i] && p->locks[i]) 65 btrfs_set_lock_blocking(p->nodes[i]); 66 } 67 } 68 69 /* 70 * reset all the locked nodes in the patch to spinning locks. 71 * 72 * held is used to keep lockdep happy, when lockdep is enabled 73 * we set held to a blocking lock before we go around and 74 * retake all the spinlocks in the path. You can safely use NULL 75 * for held 76 */ 77 noinline void btrfs_clear_path_blocking(struct btrfs_path *p, 78 struct extent_buffer *held) 79 { 80 int i; 81 82 #ifdef CONFIG_DEBUG_LOCK_ALLOC 83 /* lockdep really cares that we take all of these spinlocks 84 * in the right order. If any of the locks in the path are not 85 * currently blocking, it is going to complain. So, make really 86 * really sure by forcing the path to blocking before we clear 87 * the path blocking. 88 */ 89 if (held) 90 btrfs_set_lock_blocking(held); 91 btrfs_set_path_blocking(p); 92 #endif 93 94 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 95 if (p->nodes[i] && p->locks[i]) 96 btrfs_clear_lock_blocking(p->nodes[i]); 97 } 98 99 #ifdef CONFIG_DEBUG_LOCK_ALLOC 100 if (held) 101 btrfs_clear_lock_blocking(held); 102 #endif 103 } 104 105 /* this also releases the path */ 106 void btrfs_free_path(struct btrfs_path *p) 107 { 108 if (!p) 109 return; 110 btrfs_release_path(NULL, p); 111 kmem_cache_free(btrfs_path_cachep, p); 112 } 113 114 /* 115 * path release drops references on the extent buffers in the path 116 * and it drops any locks held by this path 117 * 118 * It is safe to call this on paths that no locks or extent buffers held. 119 */ 120 noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 121 { 122 int i; 123 124 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 125 p->slots[i] = 0; 126 if (!p->nodes[i]) 127 continue; 128 if (p->locks[i]) { 129 btrfs_tree_unlock(p->nodes[i]); 130 p->locks[i] = 0; 131 } 132 free_extent_buffer(p->nodes[i]); 133 p->nodes[i] = NULL; 134 } 135 } 136 137 /* 138 * safely gets a reference on the root node of a tree. A lock 139 * is not taken, so a concurrent writer may put a different node 140 * at the root of the tree. See btrfs_lock_root_node for the 141 * looping required. 142 * 143 * The extent buffer returned by this has a reference taken, so 144 * it won't disappear. It may stop being the root of the tree 145 * at any time because there are no locks held. 146 */ 147 struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 148 { 149 struct extent_buffer *eb; 150 151 rcu_read_lock(); 152 eb = rcu_dereference(root->node); 153 extent_buffer_get(eb); 154 rcu_read_unlock(); 155 return eb; 156 } 157 158 /* loop around taking references on and locking the root node of the 159 * tree until you end up with a lock on the root. A locked buffer 160 * is returned, with a reference held. 161 */ 162 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 163 { 164 struct extent_buffer *eb; 165 166 while (1) { 167 eb = btrfs_root_node(root); 168 btrfs_tree_lock(eb); 169 if (eb == root->node) 170 break; 171 btrfs_tree_unlock(eb); 172 free_extent_buffer(eb); 173 } 174 return eb; 175 } 176 177 /* cowonly root (everything not a reference counted cow subvolume), just get 178 * put onto a simple dirty list. transaction.c walks this to make sure they 179 * get properly updated on disk. 180 */ 181 static void add_root_to_dirty_list(struct btrfs_root *root) 182 { 183 if (root->track_dirty && list_empty(&root->dirty_list)) { 184 list_add(&root->dirty_list, 185 &root->fs_info->dirty_cowonly_roots); 186 } 187 } 188 189 /* 190 * used by snapshot creation to make a copy of a root for a tree with 191 * a given objectid. The buffer with the new root node is returned in 192 * cow_ret, and this func returns zero on success or a negative error code. 193 */ 194 int btrfs_copy_root(struct btrfs_trans_handle *trans, 195 struct btrfs_root *root, 196 struct extent_buffer *buf, 197 struct extent_buffer **cow_ret, u64 new_root_objectid) 198 { 199 struct extent_buffer *cow; 200 int ret = 0; 201 int level; 202 struct btrfs_disk_key disk_key; 203 204 WARN_ON(root->ref_cows && trans->transid != 205 root->fs_info->running_transaction->transid); 206 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 207 208 level = btrfs_header_level(buf); 209 if (level == 0) 210 btrfs_item_key(buf, &disk_key, 0); 211 else 212 btrfs_node_key(buf, &disk_key, 0); 213 214 cow = btrfs_alloc_free_block(trans, root, buf->len, 0, 215 new_root_objectid, &disk_key, level, 216 buf->start, 0); 217 if (IS_ERR(cow)) 218 return PTR_ERR(cow); 219 220 copy_extent_buffer(cow, buf, 0, 0, cow->len); 221 btrfs_set_header_bytenr(cow, cow->start); 222 btrfs_set_header_generation(cow, trans->transid); 223 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 224 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 225 BTRFS_HEADER_FLAG_RELOC); 226 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 227 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 228 else 229 btrfs_set_header_owner(cow, new_root_objectid); 230 231 write_extent_buffer(cow, root->fs_info->fsid, 232 (unsigned long)btrfs_header_fsid(cow), 233 BTRFS_FSID_SIZE); 234 235 WARN_ON(btrfs_header_generation(buf) > trans->transid); 236 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 237 ret = btrfs_inc_ref(trans, root, cow, 1); 238 else 239 ret = btrfs_inc_ref(trans, root, cow, 0); 240 241 if (ret) 242 return ret; 243 244 btrfs_mark_buffer_dirty(cow); 245 *cow_ret = cow; 246 return 0; 247 } 248 249 /* 250 * check if the tree block can be shared by multiple trees 251 */ 252 int btrfs_block_can_be_shared(struct btrfs_root *root, 253 struct extent_buffer *buf) 254 { 255 /* 256 * Tree blocks not in refernece counted trees and tree roots 257 * are never shared. If a block was allocated after the last 258 * snapshot and the block was not allocated by tree relocation, 259 * we know the block is not shared. 260 */ 261 if (root->ref_cows && 262 buf != root->node && buf != root->commit_root && 263 (btrfs_header_generation(buf) <= 264 btrfs_root_last_snapshot(&root->root_item) || 265 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 266 return 1; 267 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 268 if (root->ref_cows && 269 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 270 return 1; 271 #endif 272 return 0; 273 } 274 275 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 276 struct btrfs_root *root, 277 struct extent_buffer *buf, 278 struct extent_buffer *cow, 279 int *last_ref) 280 { 281 u64 refs; 282 u64 owner; 283 u64 flags; 284 u64 new_flags = 0; 285 int ret; 286 287 /* 288 * Backrefs update rules: 289 * 290 * Always use full backrefs for extent pointers in tree block 291 * allocated by tree relocation. 292 * 293 * If a shared tree block is no longer referenced by its owner 294 * tree (btrfs_header_owner(buf) == root->root_key.objectid), 295 * use full backrefs for extent pointers in tree block. 296 * 297 * If a tree block is been relocating 298 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), 299 * use full backrefs for extent pointers in tree block. 300 * The reason for this is some operations (such as drop tree) 301 * are only allowed for blocks use full backrefs. 302 */ 303 304 if (btrfs_block_can_be_shared(root, buf)) { 305 ret = btrfs_lookup_extent_info(trans, root, buf->start, 306 buf->len, &refs, &flags); 307 BUG_ON(ret); 308 BUG_ON(refs == 0); 309 } else { 310 refs = 1; 311 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 312 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 313 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 314 else 315 flags = 0; 316 } 317 318 owner = btrfs_header_owner(buf); 319 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && 320 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 321 322 if (refs > 1) { 323 if ((owner == root->root_key.objectid || 324 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 325 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 326 ret = btrfs_inc_ref(trans, root, buf, 1); 327 BUG_ON(ret); 328 329 if (root->root_key.objectid == 330 BTRFS_TREE_RELOC_OBJECTID) { 331 ret = btrfs_dec_ref(trans, root, buf, 0); 332 BUG_ON(ret); 333 ret = btrfs_inc_ref(trans, root, cow, 1); 334 BUG_ON(ret); 335 } 336 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 337 } else { 338 339 if (root->root_key.objectid == 340 BTRFS_TREE_RELOC_OBJECTID) 341 ret = btrfs_inc_ref(trans, root, cow, 1); 342 else 343 ret = btrfs_inc_ref(trans, root, cow, 0); 344 BUG_ON(ret); 345 } 346 if (new_flags != 0) { 347 ret = btrfs_set_disk_extent_flags(trans, root, 348 buf->start, 349 buf->len, 350 new_flags, 0); 351 BUG_ON(ret); 352 } 353 } else { 354 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 355 if (root->root_key.objectid == 356 BTRFS_TREE_RELOC_OBJECTID) 357 ret = btrfs_inc_ref(trans, root, cow, 1); 358 else 359 ret = btrfs_inc_ref(trans, root, cow, 0); 360 BUG_ON(ret); 361 ret = btrfs_dec_ref(trans, root, buf, 1); 362 BUG_ON(ret); 363 } 364 clean_tree_block(trans, root, buf); 365 *last_ref = 1; 366 } 367 return 0; 368 } 369 370 /* 371 * does the dirty work in cow of a single block. The parent block (if 372 * supplied) is updated to point to the new cow copy. The new buffer is marked 373 * dirty and returned locked. If you modify the block it needs to be marked 374 * dirty again. 375 * 376 * search_start -- an allocation hint for the new block 377 * 378 * empty_size -- a hint that you plan on doing more cow. This is the size in 379 * bytes the allocator should try to find free next to the block it returns. 380 * This is just a hint and may be ignored by the allocator. 381 */ 382 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 383 struct btrfs_root *root, 384 struct extent_buffer *buf, 385 struct extent_buffer *parent, int parent_slot, 386 struct extent_buffer **cow_ret, 387 u64 search_start, u64 empty_size) 388 { 389 struct btrfs_disk_key disk_key; 390 struct extent_buffer *cow; 391 int level; 392 int last_ref = 0; 393 int unlock_orig = 0; 394 u64 parent_start; 395 396 if (*cow_ret == buf) 397 unlock_orig = 1; 398 399 btrfs_assert_tree_locked(buf); 400 401 WARN_ON(root->ref_cows && trans->transid != 402 root->fs_info->running_transaction->transid); 403 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 404 405 level = btrfs_header_level(buf); 406 407 if (level == 0) 408 btrfs_item_key(buf, &disk_key, 0); 409 else 410 btrfs_node_key(buf, &disk_key, 0); 411 412 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 413 if (parent) 414 parent_start = parent->start; 415 else 416 parent_start = 0; 417 } else 418 parent_start = 0; 419 420 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, 421 root->root_key.objectid, &disk_key, 422 level, search_start, empty_size); 423 if (IS_ERR(cow)) 424 return PTR_ERR(cow); 425 426 /* cow is set to blocking by btrfs_init_new_buffer */ 427 428 copy_extent_buffer(cow, buf, 0, 0, cow->len); 429 btrfs_set_header_bytenr(cow, cow->start); 430 btrfs_set_header_generation(cow, trans->transid); 431 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 432 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 433 BTRFS_HEADER_FLAG_RELOC); 434 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 435 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 436 else 437 btrfs_set_header_owner(cow, root->root_key.objectid); 438 439 write_extent_buffer(cow, root->fs_info->fsid, 440 (unsigned long)btrfs_header_fsid(cow), 441 BTRFS_FSID_SIZE); 442 443 update_ref_for_cow(trans, root, buf, cow, &last_ref); 444 445 if (root->ref_cows) 446 btrfs_reloc_cow_block(trans, root, buf, cow); 447 448 if (buf == root->node) { 449 WARN_ON(parent && parent != buf); 450 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 451 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 452 parent_start = buf->start; 453 else 454 parent_start = 0; 455 456 extent_buffer_get(cow); 457 rcu_assign_pointer(root->node, cow); 458 459 btrfs_free_tree_block(trans, root, buf, parent_start, 460 last_ref); 461 free_extent_buffer(buf); 462 add_root_to_dirty_list(root); 463 } else { 464 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 465 parent_start = parent->start; 466 else 467 parent_start = 0; 468 469 WARN_ON(trans->transid != btrfs_header_generation(parent)); 470 btrfs_set_node_blockptr(parent, parent_slot, 471 cow->start); 472 btrfs_set_node_ptr_generation(parent, parent_slot, 473 trans->transid); 474 btrfs_mark_buffer_dirty(parent); 475 btrfs_free_tree_block(trans, root, buf, parent_start, 476 last_ref); 477 } 478 if (unlock_orig) 479 btrfs_tree_unlock(buf); 480 free_extent_buffer(buf); 481 btrfs_mark_buffer_dirty(cow); 482 *cow_ret = cow; 483 return 0; 484 } 485 486 static inline int should_cow_block(struct btrfs_trans_handle *trans, 487 struct btrfs_root *root, 488 struct extent_buffer *buf) 489 { 490 if (btrfs_header_generation(buf) == trans->transid && 491 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 492 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 493 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 494 return 0; 495 return 1; 496 } 497 498 /* 499 * cows a single block, see __btrfs_cow_block for the real work. 500 * This version of it has extra checks so that a block isn't cow'd more than 501 * once per transaction, as long as it hasn't been written yet 502 */ 503 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 504 struct btrfs_root *root, struct extent_buffer *buf, 505 struct extent_buffer *parent, int parent_slot, 506 struct extent_buffer **cow_ret) 507 { 508 u64 search_start; 509 int ret; 510 511 if (trans->transaction != root->fs_info->running_transaction) { 512 printk(KERN_CRIT "trans %llu running %llu\n", 513 (unsigned long long)trans->transid, 514 (unsigned long long) 515 root->fs_info->running_transaction->transid); 516 WARN_ON(1); 517 } 518 if (trans->transid != root->fs_info->generation) { 519 printk(KERN_CRIT "trans %llu running %llu\n", 520 (unsigned long long)trans->transid, 521 (unsigned long long)root->fs_info->generation); 522 WARN_ON(1); 523 } 524 525 if (!should_cow_block(trans, root, buf)) { 526 *cow_ret = buf; 527 return 0; 528 } 529 530 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 531 532 if (parent) 533 btrfs_set_lock_blocking(parent); 534 btrfs_set_lock_blocking(buf); 535 536 ret = __btrfs_cow_block(trans, root, buf, parent, 537 parent_slot, cow_ret, search_start, 0); 538 539 trace_btrfs_cow_block(root, buf, *cow_ret); 540 541 return ret; 542 } 543 544 /* 545 * helper function for defrag to decide if two blocks pointed to by a 546 * node are actually close by 547 */ 548 static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 549 { 550 if (blocknr < other && other - (blocknr + blocksize) < 32768) 551 return 1; 552 if (blocknr > other && blocknr - (other + blocksize) < 32768) 553 return 1; 554 return 0; 555 } 556 557 /* 558 * compare two keys in a memcmp fashion 559 */ 560 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 561 { 562 struct btrfs_key k1; 563 564 btrfs_disk_key_to_cpu(&k1, disk); 565 566 return btrfs_comp_cpu_keys(&k1, k2); 567 } 568 569 /* 570 * same as comp_keys only with two btrfs_key's 571 */ 572 int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 573 { 574 if (k1->objectid > k2->objectid) 575 return 1; 576 if (k1->objectid < k2->objectid) 577 return -1; 578 if (k1->type > k2->type) 579 return 1; 580 if (k1->type < k2->type) 581 return -1; 582 if (k1->offset > k2->offset) 583 return 1; 584 if (k1->offset < k2->offset) 585 return -1; 586 return 0; 587 } 588 589 /* 590 * this is used by the defrag code to go through all the 591 * leaves pointed to by a node and reallocate them so that 592 * disk order is close to key order 593 */ 594 int btrfs_realloc_node(struct btrfs_trans_handle *trans, 595 struct btrfs_root *root, struct extent_buffer *parent, 596 int start_slot, int cache_only, u64 *last_ret, 597 struct btrfs_key *progress) 598 { 599 struct extent_buffer *cur; 600 u64 blocknr; 601 u64 gen; 602 u64 search_start = *last_ret; 603 u64 last_block = 0; 604 u64 other; 605 u32 parent_nritems; 606 int end_slot; 607 int i; 608 int err = 0; 609 int parent_level; 610 int uptodate; 611 u32 blocksize; 612 int progress_passed = 0; 613 struct btrfs_disk_key disk_key; 614 615 parent_level = btrfs_header_level(parent); 616 if (cache_only && parent_level != 1) 617 return 0; 618 619 if (trans->transaction != root->fs_info->running_transaction) 620 WARN_ON(1); 621 if (trans->transid != root->fs_info->generation) 622 WARN_ON(1); 623 624 parent_nritems = btrfs_header_nritems(parent); 625 blocksize = btrfs_level_size(root, parent_level - 1); 626 end_slot = parent_nritems; 627 628 if (parent_nritems == 1) 629 return 0; 630 631 btrfs_set_lock_blocking(parent); 632 633 for (i = start_slot; i < end_slot; i++) { 634 int close = 1; 635 636 if (!parent->map_token) { 637 map_extent_buffer(parent, 638 btrfs_node_key_ptr_offset(i), 639 sizeof(struct btrfs_key_ptr), 640 &parent->map_token, &parent->kaddr, 641 &parent->map_start, &parent->map_len, 642 KM_USER1); 643 } 644 btrfs_node_key(parent, &disk_key, i); 645 if (!progress_passed && comp_keys(&disk_key, progress) < 0) 646 continue; 647 648 progress_passed = 1; 649 blocknr = btrfs_node_blockptr(parent, i); 650 gen = btrfs_node_ptr_generation(parent, i); 651 if (last_block == 0) 652 last_block = blocknr; 653 654 if (i > 0) { 655 other = btrfs_node_blockptr(parent, i - 1); 656 close = close_blocks(blocknr, other, blocksize); 657 } 658 if (!close && i < end_slot - 2) { 659 other = btrfs_node_blockptr(parent, i + 1); 660 close = close_blocks(blocknr, other, blocksize); 661 } 662 if (close) { 663 last_block = blocknr; 664 continue; 665 } 666 if (parent->map_token) { 667 unmap_extent_buffer(parent, parent->map_token, 668 KM_USER1); 669 parent->map_token = NULL; 670 } 671 672 cur = btrfs_find_tree_block(root, blocknr, blocksize); 673 if (cur) 674 uptodate = btrfs_buffer_uptodate(cur, gen); 675 else 676 uptodate = 0; 677 if (!cur || !uptodate) { 678 if (cache_only) { 679 free_extent_buffer(cur); 680 continue; 681 } 682 if (!cur) { 683 cur = read_tree_block(root, blocknr, 684 blocksize, gen); 685 if (!cur) 686 return -EIO; 687 } else if (!uptodate) { 688 btrfs_read_buffer(cur, gen); 689 } 690 } 691 if (search_start == 0) 692 search_start = last_block; 693 694 btrfs_tree_lock(cur); 695 btrfs_set_lock_blocking(cur); 696 err = __btrfs_cow_block(trans, root, cur, parent, i, 697 &cur, search_start, 698 min(16 * blocksize, 699 (end_slot - i) * blocksize)); 700 if (err) { 701 btrfs_tree_unlock(cur); 702 free_extent_buffer(cur); 703 break; 704 } 705 search_start = cur->start; 706 last_block = cur->start; 707 *last_ret = search_start; 708 btrfs_tree_unlock(cur); 709 free_extent_buffer(cur); 710 } 711 if (parent->map_token) { 712 unmap_extent_buffer(parent, parent->map_token, 713 KM_USER1); 714 parent->map_token = NULL; 715 } 716 return err; 717 } 718 719 /* 720 * The leaf data grows from end-to-front in the node. 721 * this returns the address of the start of the last item, 722 * which is the stop of the leaf data stack 723 */ 724 static inline unsigned int leaf_data_end(struct btrfs_root *root, 725 struct extent_buffer *leaf) 726 { 727 u32 nr = btrfs_header_nritems(leaf); 728 if (nr == 0) 729 return BTRFS_LEAF_DATA_SIZE(root); 730 return btrfs_item_offset_nr(leaf, nr - 1); 731 } 732 733 734 /* 735 * search for key in the extent_buffer. The items start at offset p, 736 * and they are item_size apart. There are 'max' items in p. 737 * 738 * the slot in the array is returned via slot, and it points to 739 * the place where you would insert key if it is not found in 740 * the array. 741 * 742 * slot may point to max if the key is bigger than all of the keys 743 */ 744 static noinline int generic_bin_search(struct extent_buffer *eb, 745 unsigned long p, 746 int item_size, struct btrfs_key *key, 747 int max, int *slot) 748 { 749 int low = 0; 750 int high = max; 751 int mid; 752 int ret; 753 struct btrfs_disk_key *tmp = NULL; 754 struct btrfs_disk_key unaligned; 755 unsigned long offset; 756 char *map_token = NULL; 757 char *kaddr = NULL; 758 unsigned long map_start = 0; 759 unsigned long map_len = 0; 760 int err; 761 762 while (low < high) { 763 mid = (low + high) / 2; 764 offset = p + mid * item_size; 765 766 if (!map_token || offset < map_start || 767 (offset + sizeof(struct btrfs_disk_key)) > 768 map_start + map_len) { 769 if (map_token) { 770 unmap_extent_buffer(eb, map_token, KM_USER0); 771 map_token = NULL; 772 } 773 774 err = map_private_extent_buffer(eb, offset, 775 sizeof(struct btrfs_disk_key), 776 &map_token, &kaddr, 777 &map_start, &map_len, KM_USER0); 778 779 if (!err) { 780 tmp = (struct btrfs_disk_key *)(kaddr + offset - 781 map_start); 782 } else { 783 read_extent_buffer(eb, &unaligned, 784 offset, sizeof(unaligned)); 785 tmp = &unaligned; 786 } 787 788 } else { 789 tmp = (struct btrfs_disk_key *)(kaddr + offset - 790 map_start); 791 } 792 ret = comp_keys(tmp, key); 793 794 if (ret < 0) 795 low = mid + 1; 796 else if (ret > 0) 797 high = mid; 798 else { 799 *slot = mid; 800 if (map_token) 801 unmap_extent_buffer(eb, map_token, KM_USER0); 802 return 0; 803 } 804 } 805 *slot = low; 806 if (map_token) 807 unmap_extent_buffer(eb, map_token, KM_USER0); 808 return 1; 809 } 810 811 /* 812 * simple bin_search frontend that does the right thing for 813 * leaves vs nodes 814 */ 815 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 816 int level, int *slot) 817 { 818 if (level == 0) { 819 return generic_bin_search(eb, 820 offsetof(struct btrfs_leaf, items), 821 sizeof(struct btrfs_item), 822 key, btrfs_header_nritems(eb), 823 slot); 824 } else { 825 return generic_bin_search(eb, 826 offsetof(struct btrfs_node, ptrs), 827 sizeof(struct btrfs_key_ptr), 828 key, btrfs_header_nritems(eb), 829 slot); 830 } 831 return -1; 832 } 833 834 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 835 int level, int *slot) 836 { 837 return bin_search(eb, key, level, slot); 838 } 839 840 static void root_add_used(struct btrfs_root *root, u32 size) 841 { 842 spin_lock(&root->accounting_lock); 843 btrfs_set_root_used(&root->root_item, 844 btrfs_root_used(&root->root_item) + size); 845 spin_unlock(&root->accounting_lock); 846 } 847 848 static void root_sub_used(struct btrfs_root *root, u32 size) 849 { 850 spin_lock(&root->accounting_lock); 851 btrfs_set_root_used(&root->root_item, 852 btrfs_root_used(&root->root_item) - size); 853 spin_unlock(&root->accounting_lock); 854 } 855 856 /* given a node and slot number, this reads the blocks it points to. The 857 * extent buffer is returned with a reference taken (but unlocked). 858 * NULL is returned on error. 859 */ 860 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, 861 struct extent_buffer *parent, int slot) 862 { 863 int level = btrfs_header_level(parent); 864 if (slot < 0) 865 return NULL; 866 if (slot >= btrfs_header_nritems(parent)) 867 return NULL; 868 869 BUG_ON(level == 0); 870 871 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 872 btrfs_level_size(root, level - 1), 873 btrfs_node_ptr_generation(parent, slot)); 874 } 875 876 /* 877 * node level balancing, used to make sure nodes are in proper order for 878 * item deletion. We balance from the top down, so we have to make sure 879 * that a deletion won't leave an node completely empty later on. 880 */ 881 static noinline int balance_level(struct btrfs_trans_handle *trans, 882 struct btrfs_root *root, 883 struct btrfs_path *path, int level) 884 { 885 struct extent_buffer *right = NULL; 886 struct extent_buffer *mid; 887 struct extent_buffer *left = NULL; 888 struct extent_buffer *parent = NULL; 889 int ret = 0; 890 int wret; 891 int pslot; 892 int orig_slot = path->slots[level]; 893 u64 orig_ptr; 894 895 if (level == 0) 896 return 0; 897 898 mid = path->nodes[level]; 899 900 WARN_ON(!path->locks[level]); 901 WARN_ON(btrfs_header_generation(mid) != trans->transid); 902 903 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 904 905 if (level < BTRFS_MAX_LEVEL - 1) 906 parent = path->nodes[level + 1]; 907 pslot = path->slots[level + 1]; 908 909 /* 910 * deal with the case where there is only one pointer in the root 911 * by promoting the node below to a root 912 */ 913 if (!parent) { 914 struct extent_buffer *child; 915 916 if (btrfs_header_nritems(mid) != 1) 917 return 0; 918 919 /* promote the child to a root */ 920 child = read_node_slot(root, mid, 0); 921 BUG_ON(!child); 922 btrfs_tree_lock(child); 923 btrfs_set_lock_blocking(child); 924 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 925 if (ret) { 926 btrfs_tree_unlock(child); 927 free_extent_buffer(child); 928 goto enospc; 929 } 930 931 rcu_assign_pointer(root->node, child); 932 933 add_root_to_dirty_list(root); 934 btrfs_tree_unlock(child); 935 936 path->locks[level] = 0; 937 path->nodes[level] = NULL; 938 clean_tree_block(trans, root, mid); 939 btrfs_tree_unlock(mid); 940 /* once for the path */ 941 free_extent_buffer(mid); 942 943 root_sub_used(root, mid->len); 944 btrfs_free_tree_block(trans, root, mid, 0, 1); 945 /* once for the root ptr */ 946 free_extent_buffer(mid); 947 return 0; 948 } 949 if (btrfs_header_nritems(mid) > 950 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 951 return 0; 952 953 btrfs_header_nritems(mid); 954 955 left = read_node_slot(root, parent, pslot - 1); 956 if (left) { 957 btrfs_tree_lock(left); 958 btrfs_set_lock_blocking(left); 959 wret = btrfs_cow_block(trans, root, left, 960 parent, pslot - 1, &left); 961 if (wret) { 962 ret = wret; 963 goto enospc; 964 } 965 } 966 right = read_node_slot(root, parent, pslot + 1); 967 if (right) { 968 btrfs_tree_lock(right); 969 btrfs_set_lock_blocking(right); 970 wret = btrfs_cow_block(trans, root, right, 971 parent, pslot + 1, &right); 972 if (wret) { 973 ret = wret; 974 goto enospc; 975 } 976 } 977 978 /* first, try to make some room in the middle buffer */ 979 if (left) { 980 orig_slot += btrfs_header_nritems(left); 981 wret = push_node_left(trans, root, left, mid, 1); 982 if (wret < 0) 983 ret = wret; 984 btrfs_header_nritems(mid); 985 } 986 987 /* 988 * then try to empty the right most buffer into the middle 989 */ 990 if (right) { 991 wret = push_node_left(trans, root, mid, right, 1); 992 if (wret < 0 && wret != -ENOSPC) 993 ret = wret; 994 if (btrfs_header_nritems(right) == 0) { 995 clean_tree_block(trans, root, right); 996 btrfs_tree_unlock(right); 997 wret = del_ptr(trans, root, path, level + 1, pslot + 998 1); 999 if (wret) 1000 ret = wret; 1001 root_sub_used(root, right->len); 1002 btrfs_free_tree_block(trans, root, right, 0, 1); 1003 free_extent_buffer(right); 1004 right = NULL; 1005 } else { 1006 struct btrfs_disk_key right_key; 1007 btrfs_node_key(right, &right_key, 0); 1008 btrfs_set_node_key(parent, &right_key, pslot + 1); 1009 btrfs_mark_buffer_dirty(parent); 1010 } 1011 } 1012 if (btrfs_header_nritems(mid) == 1) { 1013 /* 1014 * we're not allowed to leave a node with one item in the 1015 * tree during a delete. A deletion from lower in the tree 1016 * could try to delete the only pointer in this node. 1017 * So, pull some keys from the left. 1018 * There has to be a left pointer at this point because 1019 * otherwise we would have pulled some pointers from the 1020 * right 1021 */ 1022 BUG_ON(!left); 1023 wret = balance_node_right(trans, root, mid, left); 1024 if (wret < 0) { 1025 ret = wret; 1026 goto enospc; 1027 } 1028 if (wret == 1) { 1029 wret = push_node_left(trans, root, left, mid, 1); 1030 if (wret < 0) 1031 ret = wret; 1032 } 1033 BUG_ON(wret == 1); 1034 } 1035 if (btrfs_header_nritems(mid) == 0) { 1036 clean_tree_block(trans, root, mid); 1037 btrfs_tree_unlock(mid); 1038 wret = del_ptr(trans, root, path, level + 1, pslot); 1039 if (wret) 1040 ret = wret; 1041 root_sub_used(root, mid->len); 1042 btrfs_free_tree_block(trans, root, mid, 0, 1); 1043 free_extent_buffer(mid); 1044 mid = NULL; 1045 } else { 1046 /* update the parent key to reflect our changes */ 1047 struct btrfs_disk_key mid_key; 1048 btrfs_node_key(mid, &mid_key, 0); 1049 btrfs_set_node_key(parent, &mid_key, pslot); 1050 btrfs_mark_buffer_dirty(parent); 1051 } 1052 1053 /* update the path */ 1054 if (left) { 1055 if (btrfs_header_nritems(left) > orig_slot) { 1056 extent_buffer_get(left); 1057 /* left was locked after cow */ 1058 path->nodes[level] = left; 1059 path->slots[level + 1] -= 1; 1060 path->slots[level] = orig_slot; 1061 if (mid) { 1062 btrfs_tree_unlock(mid); 1063 free_extent_buffer(mid); 1064 } 1065 } else { 1066 orig_slot -= btrfs_header_nritems(left); 1067 path->slots[level] = orig_slot; 1068 } 1069 } 1070 /* double check we haven't messed things up */ 1071 if (orig_ptr != 1072 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1073 BUG(); 1074 enospc: 1075 if (right) { 1076 btrfs_tree_unlock(right); 1077 free_extent_buffer(right); 1078 } 1079 if (left) { 1080 if (path->nodes[level] != left) 1081 btrfs_tree_unlock(left); 1082 free_extent_buffer(left); 1083 } 1084 return ret; 1085 } 1086 1087 /* Node balancing for insertion. Here we only split or push nodes around 1088 * when they are completely full. This is also done top down, so we 1089 * have to be pessimistic. 1090 */ 1091 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 1092 struct btrfs_root *root, 1093 struct btrfs_path *path, int level) 1094 { 1095 struct extent_buffer *right = NULL; 1096 struct extent_buffer *mid; 1097 struct extent_buffer *left = NULL; 1098 struct extent_buffer *parent = NULL; 1099 int ret = 0; 1100 int wret; 1101 int pslot; 1102 int orig_slot = path->slots[level]; 1103 1104 if (level == 0) 1105 return 1; 1106 1107 mid = path->nodes[level]; 1108 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1109 1110 if (level < BTRFS_MAX_LEVEL - 1) 1111 parent = path->nodes[level + 1]; 1112 pslot = path->slots[level + 1]; 1113 1114 if (!parent) 1115 return 1; 1116 1117 left = read_node_slot(root, parent, pslot - 1); 1118 1119 /* first, try to make some room in the middle buffer */ 1120 if (left) { 1121 u32 left_nr; 1122 1123 btrfs_tree_lock(left); 1124 btrfs_set_lock_blocking(left); 1125 1126 left_nr = btrfs_header_nritems(left); 1127 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1128 wret = 1; 1129 } else { 1130 ret = btrfs_cow_block(trans, root, left, parent, 1131 pslot - 1, &left); 1132 if (ret) 1133 wret = 1; 1134 else { 1135 wret = push_node_left(trans, root, 1136 left, mid, 0); 1137 } 1138 } 1139 if (wret < 0) 1140 ret = wret; 1141 if (wret == 0) { 1142 struct btrfs_disk_key disk_key; 1143 orig_slot += left_nr; 1144 btrfs_node_key(mid, &disk_key, 0); 1145 btrfs_set_node_key(parent, &disk_key, pslot); 1146 btrfs_mark_buffer_dirty(parent); 1147 if (btrfs_header_nritems(left) > orig_slot) { 1148 path->nodes[level] = left; 1149 path->slots[level + 1] -= 1; 1150 path->slots[level] = orig_slot; 1151 btrfs_tree_unlock(mid); 1152 free_extent_buffer(mid); 1153 } else { 1154 orig_slot -= 1155 btrfs_header_nritems(left); 1156 path->slots[level] = orig_slot; 1157 btrfs_tree_unlock(left); 1158 free_extent_buffer(left); 1159 } 1160 return 0; 1161 } 1162 btrfs_tree_unlock(left); 1163 free_extent_buffer(left); 1164 } 1165 right = read_node_slot(root, parent, pslot + 1); 1166 1167 /* 1168 * then try to empty the right most buffer into the middle 1169 */ 1170 if (right) { 1171 u32 right_nr; 1172 1173 btrfs_tree_lock(right); 1174 btrfs_set_lock_blocking(right); 1175 1176 right_nr = btrfs_header_nritems(right); 1177 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1178 wret = 1; 1179 } else { 1180 ret = btrfs_cow_block(trans, root, right, 1181 parent, pslot + 1, 1182 &right); 1183 if (ret) 1184 wret = 1; 1185 else { 1186 wret = balance_node_right(trans, root, 1187 right, mid); 1188 } 1189 } 1190 if (wret < 0) 1191 ret = wret; 1192 if (wret == 0) { 1193 struct btrfs_disk_key disk_key; 1194 1195 btrfs_node_key(right, &disk_key, 0); 1196 btrfs_set_node_key(parent, &disk_key, pslot + 1); 1197 btrfs_mark_buffer_dirty(parent); 1198 1199 if (btrfs_header_nritems(mid) <= orig_slot) { 1200 path->nodes[level] = right; 1201 path->slots[level + 1] += 1; 1202 path->slots[level] = orig_slot - 1203 btrfs_header_nritems(mid); 1204 btrfs_tree_unlock(mid); 1205 free_extent_buffer(mid); 1206 } else { 1207 btrfs_tree_unlock(right); 1208 free_extent_buffer(right); 1209 } 1210 return 0; 1211 } 1212 btrfs_tree_unlock(right); 1213 free_extent_buffer(right); 1214 } 1215 return 1; 1216 } 1217 1218 /* 1219 * readahead one full node of leaves, finding things that are close 1220 * to the block in 'slot', and triggering ra on them. 1221 */ 1222 static void reada_for_search(struct btrfs_root *root, 1223 struct btrfs_path *path, 1224 int level, int slot, u64 objectid) 1225 { 1226 struct extent_buffer *node; 1227 struct btrfs_disk_key disk_key; 1228 u32 nritems; 1229 u64 search; 1230 u64 target; 1231 u64 nread = 0; 1232 int direction = path->reada; 1233 struct extent_buffer *eb; 1234 u32 nr; 1235 u32 blocksize; 1236 u32 nscan = 0; 1237 1238 if (level != 1) 1239 return; 1240 1241 if (!path->nodes[level]) 1242 return; 1243 1244 node = path->nodes[level]; 1245 1246 search = btrfs_node_blockptr(node, slot); 1247 blocksize = btrfs_level_size(root, level - 1); 1248 eb = btrfs_find_tree_block(root, search, blocksize); 1249 if (eb) { 1250 free_extent_buffer(eb); 1251 return; 1252 } 1253 1254 target = search; 1255 1256 nritems = btrfs_header_nritems(node); 1257 nr = slot; 1258 while (1) { 1259 if (direction < 0) { 1260 if (nr == 0) 1261 break; 1262 nr--; 1263 } else if (direction > 0) { 1264 nr++; 1265 if (nr >= nritems) 1266 break; 1267 } 1268 if (path->reada < 0 && objectid) { 1269 btrfs_node_key(node, &disk_key, nr); 1270 if (btrfs_disk_key_objectid(&disk_key) != objectid) 1271 break; 1272 } 1273 search = btrfs_node_blockptr(node, nr); 1274 if ((search <= target && target - search <= 65536) || 1275 (search > target && search - target <= 65536)) { 1276 readahead_tree_block(root, search, blocksize, 1277 btrfs_node_ptr_generation(node, nr)); 1278 nread += blocksize; 1279 } 1280 nscan++; 1281 if ((nread > 65536 || nscan > 32)) 1282 break; 1283 } 1284 } 1285 1286 /* 1287 * returns -EAGAIN if it had to drop the path, or zero if everything was in 1288 * cache 1289 */ 1290 static noinline int reada_for_balance(struct btrfs_root *root, 1291 struct btrfs_path *path, int level) 1292 { 1293 int slot; 1294 int nritems; 1295 struct extent_buffer *parent; 1296 struct extent_buffer *eb; 1297 u64 gen; 1298 u64 block1 = 0; 1299 u64 block2 = 0; 1300 int ret = 0; 1301 int blocksize; 1302 1303 parent = path->nodes[level + 1]; 1304 if (!parent) 1305 return 0; 1306 1307 nritems = btrfs_header_nritems(parent); 1308 slot = path->slots[level + 1]; 1309 blocksize = btrfs_level_size(root, level); 1310 1311 if (slot > 0) { 1312 block1 = btrfs_node_blockptr(parent, slot - 1); 1313 gen = btrfs_node_ptr_generation(parent, slot - 1); 1314 eb = btrfs_find_tree_block(root, block1, blocksize); 1315 if (eb && btrfs_buffer_uptodate(eb, gen)) 1316 block1 = 0; 1317 free_extent_buffer(eb); 1318 } 1319 if (slot + 1 < nritems) { 1320 block2 = btrfs_node_blockptr(parent, slot + 1); 1321 gen = btrfs_node_ptr_generation(parent, slot + 1); 1322 eb = btrfs_find_tree_block(root, block2, blocksize); 1323 if (eb && btrfs_buffer_uptodate(eb, gen)) 1324 block2 = 0; 1325 free_extent_buffer(eb); 1326 } 1327 if (block1 || block2) { 1328 ret = -EAGAIN; 1329 1330 /* release the whole path */ 1331 btrfs_release_path(root, path); 1332 1333 /* read the blocks */ 1334 if (block1) 1335 readahead_tree_block(root, block1, blocksize, 0); 1336 if (block2) 1337 readahead_tree_block(root, block2, blocksize, 0); 1338 1339 if (block1) { 1340 eb = read_tree_block(root, block1, blocksize, 0); 1341 free_extent_buffer(eb); 1342 } 1343 if (block2) { 1344 eb = read_tree_block(root, block2, blocksize, 0); 1345 free_extent_buffer(eb); 1346 } 1347 } 1348 return ret; 1349 } 1350 1351 1352 /* 1353 * when we walk down the tree, it is usually safe to unlock the higher layers 1354 * in the tree. The exceptions are when our path goes through slot 0, because 1355 * operations on the tree might require changing key pointers higher up in the 1356 * tree. 1357 * 1358 * callers might also have set path->keep_locks, which tells this code to keep 1359 * the lock if the path points to the last slot in the block. This is part of 1360 * walking through the tree, and selecting the next slot in the higher block. 1361 * 1362 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 1363 * if lowest_unlock is 1, level 0 won't be unlocked 1364 */ 1365 static noinline void unlock_up(struct btrfs_path *path, int level, 1366 int lowest_unlock) 1367 { 1368 int i; 1369 int skip_level = level; 1370 int no_skips = 0; 1371 struct extent_buffer *t; 1372 1373 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1374 if (!path->nodes[i]) 1375 break; 1376 if (!path->locks[i]) 1377 break; 1378 if (!no_skips && path->slots[i] == 0) { 1379 skip_level = i + 1; 1380 continue; 1381 } 1382 if (!no_skips && path->keep_locks) { 1383 u32 nritems; 1384 t = path->nodes[i]; 1385 nritems = btrfs_header_nritems(t); 1386 if (nritems < 1 || path->slots[i] >= nritems - 1) { 1387 skip_level = i + 1; 1388 continue; 1389 } 1390 } 1391 if (skip_level < i && i >= lowest_unlock) 1392 no_skips = 1; 1393 1394 t = path->nodes[i]; 1395 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1396 btrfs_tree_unlock(t); 1397 path->locks[i] = 0; 1398 } 1399 } 1400 } 1401 1402 /* 1403 * This releases any locks held in the path starting at level and 1404 * going all the way up to the root. 1405 * 1406 * btrfs_search_slot will keep the lock held on higher nodes in a few 1407 * corner cases, such as COW of the block at slot zero in the node. This 1408 * ignores those rules, and it should only be called when there are no 1409 * more updates to be done higher up in the tree. 1410 */ 1411 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 1412 { 1413 int i; 1414 1415 if (path->keep_locks) 1416 return; 1417 1418 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1419 if (!path->nodes[i]) 1420 continue; 1421 if (!path->locks[i]) 1422 continue; 1423 btrfs_tree_unlock(path->nodes[i]); 1424 path->locks[i] = 0; 1425 } 1426 } 1427 1428 /* 1429 * helper function for btrfs_search_slot. The goal is to find a block 1430 * in cache without setting the path to blocking. If we find the block 1431 * we return zero and the path is unchanged. 1432 * 1433 * If we can't find the block, we set the path blocking and do some 1434 * reada. -EAGAIN is returned and the search must be repeated. 1435 */ 1436 static int 1437 read_block_for_search(struct btrfs_trans_handle *trans, 1438 struct btrfs_root *root, struct btrfs_path *p, 1439 struct extent_buffer **eb_ret, int level, int slot, 1440 struct btrfs_key *key) 1441 { 1442 u64 blocknr; 1443 u64 gen; 1444 u32 blocksize; 1445 struct extent_buffer *b = *eb_ret; 1446 struct extent_buffer *tmp; 1447 int ret; 1448 1449 blocknr = btrfs_node_blockptr(b, slot); 1450 gen = btrfs_node_ptr_generation(b, slot); 1451 blocksize = btrfs_level_size(root, level - 1); 1452 1453 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1454 if (tmp) { 1455 if (btrfs_buffer_uptodate(tmp, 0)) { 1456 if (btrfs_buffer_uptodate(tmp, gen)) { 1457 /* 1458 * we found an up to date block without 1459 * sleeping, return 1460 * right away 1461 */ 1462 *eb_ret = tmp; 1463 return 0; 1464 } 1465 /* the pages were up to date, but we failed 1466 * the generation number check. Do a full 1467 * read for the generation number that is correct. 1468 * We must do this without dropping locks so 1469 * we can trust our generation number 1470 */ 1471 free_extent_buffer(tmp); 1472 tmp = read_tree_block(root, blocknr, blocksize, gen); 1473 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1474 *eb_ret = tmp; 1475 return 0; 1476 } 1477 free_extent_buffer(tmp); 1478 btrfs_release_path(NULL, p); 1479 return -EIO; 1480 } 1481 } 1482 1483 /* 1484 * reduce lock contention at high levels 1485 * of the btree by dropping locks before 1486 * we read. Don't release the lock on the current 1487 * level because we need to walk this node to figure 1488 * out which blocks to read. 1489 */ 1490 btrfs_unlock_up_safe(p, level + 1); 1491 btrfs_set_path_blocking(p); 1492 1493 free_extent_buffer(tmp); 1494 if (p->reada) 1495 reada_for_search(root, p, level, slot, key->objectid); 1496 1497 btrfs_release_path(NULL, p); 1498 1499 ret = -EAGAIN; 1500 tmp = read_tree_block(root, blocknr, blocksize, 0); 1501 if (tmp) { 1502 /* 1503 * If the read above didn't mark this buffer up to date, 1504 * it will never end up being up to date. Set ret to EIO now 1505 * and give up so that our caller doesn't loop forever 1506 * on our EAGAINs. 1507 */ 1508 if (!btrfs_buffer_uptodate(tmp, 0)) 1509 ret = -EIO; 1510 free_extent_buffer(tmp); 1511 } 1512 return ret; 1513 } 1514 1515 /* 1516 * helper function for btrfs_search_slot. This does all of the checks 1517 * for node-level blocks and does any balancing required based on 1518 * the ins_len. 1519 * 1520 * If no extra work was required, zero is returned. If we had to 1521 * drop the path, -EAGAIN is returned and btrfs_search_slot must 1522 * start over 1523 */ 1524 static int 1525 setup_nodes_for_search(struct btrfs_trans_handle *trans, 1526 struct btrfs_root *root, struct btrfs_path *p, 1527 struct extent_buffer *b, int level, int ins_len) 1528 { 1529 int ret; 1530 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 1531 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1532 int sret; 1533 1534 sret = reada_for_balance(root, p, level); 1535 if (sret) 1536 goto again; 1537 1538 btrfs_set_path_blocking(p); 1539 sret = split_node(trans, root, p, level); 1540 btrfs_clear_path_blocking(p, NULL); 1541 1542 BUG_ON(sret > 0); 1543 if (sret) { 1544 ret = sret; 1545 goto done; 1546 } 1547 b = p->nodes[level]; 1548 } else if (ins_len < 0 && btrfs_header_nritems(b) < 1549 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { 1550 int sret; 1551 1552 sret = reada_for_balance(root, p, level); 1553 if (sret) 1554 goto again; 1555 1556 btrfs_set_path_blocking(p); 1557 sret = balance_level(trans, root, p, level); 1558 btrfs_clear_path_blocking(p, NULL); 1559 1560 if (sret) { 1561 ret = sret; 1562 goto done; 1563 } 1564 b = p->nodes[level]; 1565 if (!b) { 1566 btrfs_release_path(NULL, p); 1567 goto again; 1568 } 1569 BUG_ON(btrfs_header_nritems(b) == 1); 1570 } 1571 return 0; 1572 1573 again: 1574 ret = -EAGAIN; 1575 done: 1576 return ret; 1577 } 1578 1579 /* 1580 * look for key in the tree. path is filled in with nodes along the way 1581 * if key is found, we return zero and you can find the item in the leaf 1582 * level of the path (level 0) 1583 * 1584 * If the key isn't found, the path points to the slot where it should 1585 * be inserted, and 1 is returned. If there are other errors during the 1586 * search a negative error number is returned. 1587 * 1588 * if ins_len > 0, nodes and leaves will be split as we walk down the 1589 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 1590 * possible) 1591 */ 1592 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1593 *root, struct btrfs_key *key, struct btrfs_path *p, int 1594 ins_len, int cow) 1595 { 1596 struct extent_buffer *b; 1597 int slot; 1598 int ret; 1599 int err; 1600 int level; 1601 int lowest_unlock = 1; 1602 u8 lowest_level = 0; 1603 1604 lowest_level = p->lowest_level; 1605 WARN_ON(lowest_level && ins_len > 0); 1606 WARN_ON(p->nodes[0] != NULL); 1607 1608 if (ins_len < 0) 1609 lowest_unlock = 2; 1610 1611 again: 1612 if (p->search_commit_root) { 1613 b = root->commit_root; 1614 extent_buffer_get(b); 1615 if (!p->skip_locking) 1616 btrfs_tree_lock(b); 1617 } else { 1618 if (p->skip_locking) 1619 b = btrfs_root_node(root); 1620 else 1621 b = btrfs_lock_root_node(root); 1622 } 1623 1624 while (b) { 1625 level = btrfs_header_level(b); 1626 1627 /* 1628 * setup the path here so we can release it under lock 1629 * contention with the cow code 1630 */ 1631 p->nodes[level] = b; 1632 if (!p->skip_locking) 1633 p->locks[level] = 1; 1634 1635 if (cow) { 1636 /* 1637 * if we don't really need to cow this block 1638 * then we don't want to set the path blocking, 1639 * so we test it here 1640 */ 1641 if (!should_cow_block(trans, root, b)) 1642 goto cow_done; 1643 1644 btrfs_set_path_blocking(p); 1645 1646 err = btrfs_cow_block(trans, root, b, 1647 p->nodes[level + 1], 1648 p->slots[level + 1], &b); 1649 if (err) { 1650 ret = err; 1651 goto done; 1652 } 1653 } 1654 cow_done: 1655 BUG_ON(!cow && ins_len); 1656 if (level != btrfs_header_level(b)) 1657 WARN_ON(1); 1658 level = btrfs_header_level(b); 1659 1660 p->nodes[level] = b; 1661 if (!p->skip_locking) 1662 p->locks[level] = 1; 1663 1664 btrfs_clear_path_blocking(p, NULL); 1665 1666 /* 1667 * we have a lock on b and as long as we aren't changing 1668 * the tree, there is no way to for the items in b to change. 1669 * It is safe to drop the lock on our parent before we 1670 * go through the expensive btree search on b. 1671 * 1672 * If cow is true, then we might be changing slot zero, 1673 * which may require changing the parent. So, we can't 1674 * drop the lock until after we know which slot we're 1675 * operating on. 1676 */ 1677 if (!cow) 1678 btrfs_unlock_up_safe(p, level + 1); 1679 1680 ret = bin_search(b, key, level, &slot); 1681 1682 if (level != 0) { 1683 int dec = 0; 1684 if (ret && slot > 0) { 1685 dec = 1; 1686 slot -= 1; 1687 } 1688 p->slots[level] = slot; 1689 err = setup_nodes_for_search(trans, root, p, b, level, 1690 ins_len); 1691 if (err == -EAGAIN) 1692 goto again; 1693 if (err) { 1694 ret = err; 1695 goto done; 1696 } 1697 b = p->nodes[level]; 1698 slot = p->slots[level]; 1699 1700 unlock_up(p, level, lowest_unlock); 1701 1702 if (level == lowest_level) { 1703 if (dec) 1704 p->slots[level]++; 1705 goto done; 1706 } 1707 1708 err = read_block_for_search(trans, root, p, 1709 &b, level, slot, key); 1710 if (err == -EAGAIN) 1711 goto again; 1712 if (err) { 1713 ret = err; 1714 goto done; 1715 } 1716 1717 if (!p->skip_locking) { 1718 btrfs_clear_path_blocking(p, NULL); 1719 err = btrfs_try_spin_lock(b); 1720 1721 if (!err) { 1722 btrfs_set_path_blocking(p); 1723 btrfs_tree_lock(b); 1724 btrfs_clear_path_blocking(p, b); 1725 } 1726 } 1727 } else { 1728 p->slots[level] = slot; 1729 if (ins_len > 0 && 1730 btrfs_leaf_free_space(root, b) < ins_len) { 1731 btrfs_set_path_blocking(p); 1732 err = split_leaf(trans, root, key, 1733 p, ins_len, ret == 0); 1734 btrfs_clear_path_blocking(p, NULL); 1735 1736 BUG_ON(err > 0); 1737 if (err) { 1738 ret = err; 1739 goto done; 1740 } 1741 } 1742 if (!p->search_for_split) 1743 unlock_up(p, level, lowest_unlock); 1744 goto done; 1745 } 1746 } 1747 ret = 1; 1748 done: 1749 /* 1750 * we don't really know what they plan on doing with the path 1751 * from here on, so for now just mark it as blocking 1752 */ 1753 if (!p->leave_spinning) 1754 btrfs_set_path_blocking(p); 1755 if (ret < 0) 1756 btrfs_release_path(root, p); 1757 return ret; 1758 } 1759 1760 /* 1761 * adjust the pointers going up the tree, starting at level 1762 * making sure the right key of each node is points to 'key'. 1763 * This is used after shifting pointers to the left, so it stops 1764 * fixing up pointers when a given leaf/node is not in slot 0 of the 1765 * higher levels 1766 * 1767 * If this fails to write a tree block, it returns -1, but continues 1768 * fixing up the blocks in ram so the tree is consistent. 1769 */ 1770 static int fixup_low_keys(struct btrfs_trans_handle *trans, 1771 struct btrfs_root *root, struct btrfs_path *path, 1772 struct btrfs_disk_key *key, int level) 1773 { 1774 int i; 1775 int ret = 0; 1776 struct extent_buffer *t; 1777 1778 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1779 int tslot = path->slots[i]; 1780 if (!path->nodes[i]) 1781 break; 1782 t = path->nodes[i]; 1783 btrfs_set_node_key(t, key, tslot); 1784 btrfs_mark_buffer_dirty(path->nodes[i]); 1785 if (tslot != 0) 1786 break; 1787 } 1788 return ret; 1789 } 1790 1791 /* 1792 * update item key. 1793 * 1794 * This function isn't completely safe. It's the caller's responsibility 1795 * that the new key won't break the order 1796 */ 1797 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1798 struct btrfs_root *root, struct btrfs_path *path, 1799 struct btrfs_key *new_key) 1800 { 1801 struct btrfs_disk_key disk_key; 1802 struct extent_buffer *eb; 1803 int slot; 1804 1805 eb = path->nodes[0]; 1806 slot = path->slots[0]; 1807 if (slot > 0) { 1808 btrfs_item_key(eb, &disk_key, slot - 1); 1809 if (comp_keys(&disk_key, new_key) >= 0) 1810 return -1; 1811 } 1812 if (slot < btrfs_header_nritems(eb) - 1) { 1813 btrfs_item_key(eb, &disk_key, slot + 1); 1814 if (comp_keys(&disk_key, new_key) <= 0) 1815 return -1; 1816 } 1817 1818 btrfs_cpu_key_to_disk(&disk_key, new_key); 1819 btrfs_set_item_key(eb, &disk_key, slot); 1820 btrfs_mark_buffer_dirty(eb); 1821 if (slot == 0) 1822 fixup_low_keys(trans, root, path, &disk_key, 1); 1823 return 0; 1824 } 1825 1826 /* 1827 * try to push data from one node into the next node left in the 1828 * tree. 1829 * 1830 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 1831 * error, and > 0 if there was no room in the left hand block. 1832 */ 1833 static int push_node_left(struct btrfs_trans_handle *trans, 1834 struct btrfs_root *root, struct extent_buffer *dst, 1835 struct extent_buffer *src, int empty) 1836 { 1837 int push_items = 0; 1838 int src_nritems; 1839 int dst_nritems; 1840 int ret = 0; 1841 1842 src_nritems = btrfs_header_nritems(src); 1843 dst_nritems = btrfs_header_nritems(dst); 1844 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1845 WARN_ON(btrfs_header_generation(src) != trans->transid); 1846 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1847 1848 if (!empty && src_nritems <= 8) 1849 return 1; 1850 1851 if (push_items <= 0) 1852 return 1; 1853 1854 if (empty) { 1855 push_items = min(src_nritems, push_items); 1856 if (push_items < src_nritems) { 1857 /* leave at least 8 pointers in the node if 1858 * we aren't going to empty it 1859 */ 1860 if (src_nritems - push_items < 8) { 1861 if (push_items <= 8) 1862 return 1; 1863 push_items -= 8; 1864 } 1865 } 1866 } else 1867 push_items = min(src_nritems - 8, push_items); 1868 1869 copy_extent_buffer(dst, src, 1870 btrfs_node_key_ptr_offset(dst_nritems), 1871 btrfs_node_key_ptr_offset(0), 1872 push_items * sizeof(struct btrfs_key_ptr)); 1873 1874 if (push_items < src_nritems) { 1875 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 1876 btrfs_node_key_ptr_offset(push_items), 1877 (src_nritems - push_items) * 1878 sizeof(struct btrfs_key_ptr)); 1879 } 1880 btrfs_set_header_nritems(src, src_nritems - push_items); 1881 btrfs_set_header_nritems(dst, dst_nritems + push_items); 1882 btrfs_mark_buffer_dirty(src); 1883 btrfs_mark_buffer_dirty(dst); 1884 1885 return ret; 1886 } 1887 1888 /* 1889 * try to push data from one node into the next node right in the 1890 * tree. 1891 * 1892 * returns 0 if some ptrs were pushed, < 0 if there was some horrible 1893 * error, and > 0 if there was no room in the right hand block. 1894 * 1895 * this will only push up to 1/2 the contents of the left node over 1896 */ 1897 static int balance_node_right(struct btrfs_trans_handle *trans, 1898 struct btrfs_root *root, 1899 struct extent_buffer *dst, 1900 struct extent_buffer *src) 1901 { 1902 int push_items = 0; 1903 int max_push; 1904 int src_nritems; 1905 int dst_nritems; 1906 int ret = 0; 1907 1908 WARN_ON(btrfs_header_generation(src) != trans->transid); 1909 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1910 1911 src_nritems = btrfs_header_nritems(src); 1912 dst_nritems = btrfs_header_nritems(dst); 1913 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1914 if (push_items <= 0) 1915 return 1; 1916 1917 if (src_nritems < 4) 1918 return 1; 1919 1920 max_push = src_nritems / 2 + 1; 1921 /* don't try to empty the node */ 1922 if (max_push >= src_nritems) 1923 return 1; 1924 1925 if (max_push < push_items) 1926 push_items = max_push; 1927 1928 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 1929 btrfs_node_key_ptr_offset(0), 1930 (dst_nritems) * 1931 sizeof(struct btrfs_key_ptr)); 1932 1933 copy_extent_buffer(dst, src, 1934 btrfs_node_key_ptr_offset(0), 1935 btrfs_node_key_ptr_offset(src_nritems - push_items), 1936 push_items * sizeof(struct btrfs_key_ptr)); 1937 1938 btrfs_set_header_nritems(src, src_nritems - push_items); 1939 btrfs_set_header_nritems(dst, dst_nritems + push_items); 1940 1941 btrfs_mark_buffer_dirty(src); 1942 btrfs_mark_buffer_dirty(dst); 1943 1944 return ret; 1945 } 1946 1947 /* 1948 * helper function to insert a new root level in the tree. 1949 * A new node is allocated, and a single item is inserted to 1950 * point to the existing root 1951 * 1952 * returns zero on success or < 0 on failure. 1953 */ 1954 static noinline int insert_new_root(struct btrfs_trans_handle *trans, 1955 struct btrfs_root *root, 1956 struct btrfs_path *path, int level) 1957 { 1958 u64 lower_gen; 1959 struct extent_buffer *lower; 1960 struct extent_buffer *c; 1961 struct extent_buffer *old; 1962 struct btrfs_disk_key lower_key; 1963 1964 BUG_ON(path->nodes[level]); 1965 BUG_ON(path->nodes[level-1] != root->node); 1966 1967 lower = path->nodes[level-1]; 1968 if (level == 1) 1969 btrfs_item_key(lower, &lower_key, 0); 1970 else 1971 btrfs_node_key(lower, &lower_key, 0); 1972 1973 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 1974 root->root_key.objectid, &lower_key, 1975 level, root->node->start, 0); 1976 if (IS_ERR(c)) 1977 return PTR_ERR(c); 1978 1979 root_add_used(root, root->nodesize); 1980 1981 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); 1982 btrfs_set_header_nritems(c, 1); 1983 btrfs_set_header_level(c, level); 1984 btrfs_set_header_bytenr(c, c->start); 1985 btrfs_set_header_generation(c, trans->transid); 1986 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 1987 btrfs_set_header_owner(c, root->root_key.objectid); 1988 1989 write_extent_buffer(c, root->fs_info->fsid, 1990 (unsigned long)btrfs_header_fsid(c), 1991 BTRFS_FSID_SIZE); 1992 1993 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 1994 (unsigned long)btrfs_header_chunk_tree_uuid(c), 1995 BTRFS_UUID_SIZE); 1996 1997 btrfs_set_node_key(c, &lower_key, 0); 1998 btrfs_set_node_blockptr(c, 0, lower->start); 1999 lower_gen = btrfs_header_generation(lower); 2000 WARN_ON(lower_gen != trans->transid); 2001 2002 btrfs_set_node_ptr_generation(c, 0, lower_gen); 2003 2004 btrfs_mark_buffer_dirty(c); 2005 2006 old = root->node; 2007 rcu_assign_pointer(root->node, c); 2008 2009 /* the super has an extra ref to root->node */ 2010 free_extent_buffer(old); 2011 2012 add_root_to_dirty_list(root); 2013 extent_buffer_get(c); 2014 path->nodes[level] = c; 2015 path->locks[level] = 1; 2016 path->slots[level] = 0; 2017 return 0; 2018 } 2019 2020 /* 2021 * worker function to insert a single pointer in a node. 2022 * the node should have enough room for the pointer already 2023 * 2024 * slot and level indicate where you want the key to go, and 2025 * blocknr is the block the key points to. 2026 * 2027 * returns zero on success and < 0 on any error 2028 */ 2029 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 2030 *root, struct btrfs_path *path, struct btrfs_disk_key 2031 *key, u64 bytenr, int slot, int level) 2032 { 2033 struct extent_buffer *lower; 2034 int nritems; 2035 2036 BUG_ON(!path->nodes[level]); 2037 btrfs_assert_tree_locked(path->nodes[level]); 2038 lower = path->nodes[level]; 2039 nritems = btrfs_header_nritems(lower); 2040 BUG_ON(slot > nritems); 2041 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2042 BUG(); 2043 if (slot != nritems) { 2044 memmove_extent_buffer(lower, 2045 btrfs_node_key_ptr_offset(slot + 1), 2046 btrfs_node_key_ptr_offset(slot), 2047 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 2048 } 2049 btrfs_set_node_key(lower, key, slot); 2050 btrfs_set_node_blockptr(lower, slot, bytenr); 2051 WARN_ON(trans->transid == 0); 2052 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 2053 btrfs_set_header_nritems(lower, nritems + 1); 2054 btrfs_mark_buffer_dirty(lower); 2055 return 0; 2056 } 2057 2058 /* 2059 * split the node at the specified level in path in two. 2060 * The path is corrected to point to the appropriate node after the split 2061 * 2062 * Before splitting this tries to make some room in the node by pushing 2063 * left and right, if either one works, it returns right away. 2064 * 2065 * returns 0 on success and < 0 on failure 2066 */ 2067 static noinline int split_node(struct btrfs_trans_handle *trans, 2068 struct btrfs_root *root, 2069 struct btrfs_path *path, int level) 2070 { 2071 struct extent_buffer *c; 2072 struct extent_buffer *split; 2073 struct btrfs_disk_key disk_key; 2074 int mid; 2075 int ret; 2076 int wret; 2077 u32 c_nritems; 2078 2079 c = path->nodes[level]; 2080 WARN_ON(btrfs_header_generation(c) != trans->transid); 2081 if (c == root->node) { 2082 /* trying to split the root, lets make a new one */ 2083 ret = insert_new_root(trans, root, path, level + 1); 2084 if (ret) 2085 return ret; 2086 } else { 2087 ret = push_nodes_for_insert(trans, root, path, level); 2088 c = path->nodes[level]; 2089 if (!ret && btrfs_header_nritems(c) < 2090 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) 2091 return 0; 2092 if (ret < 0) 2093 return ret; 2094 } 2095 2096 c_nritems = btrfs_header_nritems(c); 2097 mid = (c_nritems + 1) / 2; 2098 btrfs_node_key(c, &disk_key, mid); 2099 2100 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2101 root->root_key.objectid, 2102 &disk_key, level, c->start, 0); 2103 if (IS_ERR(split)) 2104 return PTR_ERR(split); 2105 2106 root_add_used(root, root->nodesize); 2107 2108 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); 2109 btrfs_set_header_level(split, btrfs_header_level(c)); 2110 btrfs_set_header_bytenr(split, split->start); 2111 btrfs_set_header_generation(split, trans->transid); 2112 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 2113 btrfs_set_header_owner(split, root->root_key.objectid); 2114 write_extent_buffer(split, root->fs_info->fsid, 2115 (unsigned long)btrfs_header_fsid(split), 2116 BTRFS_FSID_SIZE); 2117 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 2118 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2119 BTRFS_UUID_SIZE); 2120 2121 2122 copy_extent_buffer(split, c, 2123 btrfs_node_key_ptr_offset(0), 2124 btrfs_node_key_ptr_offset(mid), 2125 (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 2126 btrfs_set_header_nritems(split, c_nritems - mid); 2127 btrfs_set_header_nritems(c, mid); 2128 ret = 0; 2129 2130 btrfs_mark_buffer_dirty(c); 2131 btrfs_mark_buffer_dirty(split); 2132 2133 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2134 path->slots[level + 1] + 1, 2135 level + 1); 2136 if (wret) 2137 ret = wret; 2138 2139 if (path->slots[level] >= mid) { 2140 path->slots[level] -= mid; 2141 btrfs_tree_unlock(c); 2142 free_extent_buffer(c); 2143 path->nodes[level] = split; 2144 path->slots[level + 1] += 1; 2145 } else { 2146 btrfs_tree_unlock(split); 2147 free_extent_buffer(split); 2148 } 2149 return ret; 2150 } 2151 2152 /* 2153 * how many bytes are required to store the items in a leaf. start 2154 * and nr indicate which items in the leaf to check. This totals up the 2155 * space used both by the item structs and the item data 2156 */ 2157 static int leaf_space_used(struct extent_buffer *l, int start, int nr) 2158 { 2159 int data_len; 2160 int nritems = btrfs_header_nritems(l); 2161 int end = min(nritems, start + nr) - 1; 2162 2163 if (!nr) 2164 return 0; 2165 data_len = btrfs_item_end_nr(l, start); 2166 data_len = data_len - btrfs_item_offset_nr(l, end); 2167 data_len += sizeof(struct btrfs_item) * nr; 2168 WARN_ON(data_len < 0); 2169 return data_len; 2170 } 2171 2172 /* 2173 * The space between the end of the leaf items and 2174 * the start of the leaf data. IOW, how much room 2175 * the leaf has left for both items and data 2176 */ 2177 noinline int btrfs_leaf_free_space(struct btrfs_root *root, 2178 struct extent_buffer *leaf) 2179 { 2180 int nritems = btrfs_header_nritems(leaf); 2181 int ret; 2182 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 2183 if (ret < 0) { 2184 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " 2185 "used %d nritems %d\n", 2186 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 2187 leaf_space_used(leaf, 0, nritems), nritems); 2188 } 2189 return ret; 2190 } 2191 2192 /* 2193 * min slot controls the lowest index we're willing to push to the 2194 * right. We'll push up to and including min_slot, but no lower 2195 */ 2196 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2197 struct btrfs_root *root, 2198 struct btrfs_path *path, 2199 int data_size, int empty, 2200 struct extent_buffer *right, 2201 int free_space, u32 left_nritems, 2202 u32 min_slot) 2203 { 2204 struct extent_buffer *left = path->nodes[0]; 2205 struct extent_buffer *upper = path->nodes[1]; 2206 struct btrfs_disk_key disk_key; 2207 int slot; 2208 u32 i; 2209 int push_space = 0; 2210 int push_items = 0; 2211 struct btrfs_item *item; 2212 u32 nr; 2213 u32 right_nritems; 2214 u32 data_end; 2215 u32 this_item_size; 2216 2217 if (empty) 2218 nr = 0; 2219 else 2220 nr = max_t(u32, 1, min_slot); 2221 2222 if (path->slots[0] >= left_nritems) 2223 push_space += data_size; 2224 2225 slot = path->slots[1]; 2226 i = left_nritems - 1; 2227 while (i >= nr) { 2228 item = btrfs_item_nr(left, i); 2229 2230 if (!empty && push_items > 0) { 2231 if (path->slots[0] > i) 2232 break; 2233 if (path->slots[0] == i) { 2234 int space = btrfs_leaf_free_space(root, left); 2235 if (space + push_space * 2 > free_space) 2236 break; 2237 } 2238 } 2239 2240 if (path->slots[0] == i) 2241 push_space += data_size; 2242 2243 if (!left->map_token) { 2244 map_extent_buffer(left, (unsigned long)item, 2245 sizeof(struct btrfs_item), 2246 &left->map_token, &left->kaddr, 2247 &left->map_start, &left->map_len, 2248 KM_USER1); 2249 } 2250 2251 this_item_size = btrfs_item_size(left, item); 2252 if (this_item_size + sizeof(*item) + push_space > free_space) 2253 break; 2254 2255 push_items++; 2256 push_space += this_item_size + sizeof(*item); 2257 if (i == 0) 2258 break; 2259 i--; 2260 } 2261 if (left->map_token) { 2262 unmap_extent_buffer(left, left->map_token, KM_USER1); 2263 left->map_token = NULL; 2264 } 2265 2266 if (push_items == 0) 2267 goto out_unlock; 2268 2269 if (!empty && push_items == left_nritems) 2270 WARN_ON(1); 2271 2272 /* push left to right */ 2273 right_nritems = btrfs_header_nritems(right); 2274 2275 push_space = btrfs_item_end_nr(left, left_nritems - push_items); 2276 push_space -= leaf_data_end(root, left); 2277 2278 /* make room in the right data area */ 2279 data_end = leaf_data_end(root, right); 2280 memmove_extent_buffer(right, 2281 btrfs_leaf_data(right) + data_end - push_space, 2282 btrfs_leaf_data(right) + data_end, 2283 BTRFS_LEAF_DATA_SIZE(root) - data_end); 2284 2285 /* copy from the left data area */ 2286 copy_extent_buffer(right, left, btrfs_leaf_data(right) + 2287 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2288 btrfs_leaf_data(left) + leaf_data_end(root, left), 2289 push_space); 2290 2291 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 2292 btrfs_item_nr_offset(0), 2293 right_nritems * sizeof(struct btrfs_item)); 2294 2295 /* copy the items from left to right */ 2296 copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 2297 btrfs_item_nr_offset(left_nritems - push_items), 2298 push_items * sizeof(struct btrfs_item)); 2299 2300 /* update the item pointers */ 2301 right_nritems += push_items; 2302 btrfs_set_header_nritems(right, right_nritems); 2303 push_space = BTRFS_LEAF_DATA_SIZE(root); 2304 for (i = 0; i < right_nritems; i++) { 2305 item = btrfs_item_nr(right, i); 2306 if (!right->map_token) { 2307 map_extent_buffer(right, (unsigned long)item, 2308 sizeof(struct btrfs_item), 2309 &right->map_token, &right->kaddr, 2310 &right->map_start, &right->map_len, 2311 KM_USER1); 2312 } 2313 push_space -= btrfs_item_size(right, item); 2314 btrfs_set_item_offset(right, item, push_space); 2315 } 2316 2317 if (right->map_token) { 2318 unmap_extent_buffer(right, right->map_token, KM_USER1); 2319 right->map_token = NULL; 2320 } 2321 left_nritems -= push_items; 2322 btrfs_set_header_nritems(left, left_nritems); 2323 2324 if (left_nritems) 2325 btrfs_mark_buffer_dirty(left); 2326 else 2327 clean_tree_block(trans, root, left); 2328 2329 btrfs_mark_buffer_dirty(right); 2330 2331 btrfs_item_key(right, &disk_key, 0); 2332 btrfs_set_node_key(upper, &disk_key, slot + 1); 2333 btrfs_mark_buffer_dirty(upper); 2334 2335 /* then fixup the leaf pointer in the path */ 2336 if (path->slots[0] >= left_nritems) { 2337 path->slots[0] -= left_nritems; 2338 if (btrfs_header_nritems(path->nodes[0]) == 0) 2339 clean_tree_block(trans, root, path->nodes[0]); 2340 btrfs_tree_unlock(path->nodes[0]); 2341 free_extent_buffer(path->nodes[0]); 2342 path->nodes[0] = right; 2343 path->slots[1] += 1; 2344 } else { 2345 btrfs_tree_unlock(right); 2346 free_extent_buffer(right); 2347 } 2348 return 0; 2349 2350 out_unlock: 2351 btrfs_tree_unlock(right); 2352 free_extent_buffer(right); 2353 return 1; 2354 } 2355 2356 /* 2357 * push some data in the path leaf to the right, trying to free up at 2358 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2359 * 2360 * returns 1 if the push failed because the other node didn't have enough 2361 * room, 0 if everything worked out and < 0 if there were major errors. 2362 * 2363 * this will push starting from min_slot to the end of the leaf. It won't 2364 * push any slot lower than min_slot 2365 */ 2366 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2367 *root, struct btrfs_path *path, 2368 int min_data_size, int data_size, 2369 int empty, u32 min_slot) 2370 { 2371 struct extent_buffer *left = path->nodes[0]; 2372 struct extent_buffer *right; 2373 struct extent_buffer *upper; 2374 int slot; 2375 int free_space; 2376 u32 left_nritems; 2377 int ret; 2378 2379 if (!path->nodes[1]) 2380 return 1; 2381 2382 slot = path->slots[1]; 2383 upper = path->nodes[1]; 2384 if (slot >= btrfs_header_nritems(upper) - 1) 2385 return 1; 2386 2387 btrfs_assert_tree_locked(path->nodes[1]); 2388 2389 right = read_node_slot(root, upper, slot + 1); 2390 if (right == NULL) 2391 return 1; 2392 2393 btrfs_tree_lock(right); 2394 btrfs_set_lock_blocking(right); 2395 2396 free_space = btrfs_leaf_free_space(root, right); 2397 if (free_space < data_size) 2398 goto out_unlock; 2399 2400 /* cow and double check */ 2401 ret = btrfs_cow_block(trans, root, right, upper, 2402 slot + 1, &right); 2403 if (ret) 2404 goto out_unlock; 2405 2406 free_space = btrfs_leaf_free_space(root, right); 2407 if (free_space < data_size) 2408 goto out_unlock; 2409 2410 left_nritems = btrfs_header_nritems(left); 2411 if (left_nritems == 0) 2412 goto out_unlock; 2413 2414 return __push_leaf_right(trans, root, path, min_data_size, empty, 2415 right, free_space, left_nritems, min_slot); 2416 out_unlock: 2417 btrfs_tree_unlock(right); 2418 free_extent_buffer(right); 2419 return 1; 2420 } 2421 2422 /* 2423 * push some data in the path leaf to the left, trying to free up at 2424 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2425 * 2426 * max_slot can put a limit on how far into the leaf we'll push items. The 2427 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the 2428 * items 2429 */ 2430 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2431 struct btrfs_root *root, 2432 struct btrfs_path *path, int data_size, 2433 int empty, struct extent_buffer *left, 2434 int free_space, u32 right_nritems, 2435 u32 max_slot) 2436 { 2437 struct btrfs_disk_key disk_key; 2438 struct extent_buffer *right = path->nodes[0]; 2439 int i; 2440 int push_space = 0; 2441 int push_items = 0; 2442 struct btrfs_item *item; 2443 u32 old_left_nritems; 2444 u32 nr; 2445 int ret = 0; 2446 int wret; 2447 u32 this_item_size; 2448 u32 old_left_item_size; 2449 2450 if (empty) 2451 nr = min(right_nritems, max_slot); 2452 else 2453 nr = min(right_nritems - 1, max_slot); 2454 2455 for (i = 0; i < nr; i++) { 2456 item = btrfs_item_nr(right, i); 2457 if (!right->map_token) { 2458 map_extent_buffer(right, (unsigned long)item, 2459 sizeof(struct btrfs_item), 2460 &right->map_token, &right->kaddr, 2461 &right->map_start, &right->map_len, 2462 KM_USER1); 2463 } 2464 2465 if (!empty && push_items > 0) { 2466 if (path->slots[0] < i) 2467 break; 2468 if (path->slots[0] == i) { 2469 int space = btrfs_leaf_free_space(root, right); 2470 if (space + push_space * 2 > free_space) 2471 break; 2472 } 2473 } 2474 2475 if (path->slots[0] == i) 2476 push_space += data_size; 2477 2478 this_item_size = btrfs_item_size(right, item); 2479 if (this_item_size + sizeof(*item) + push_space > free_space) 2480 break; 2481 2482 push_items++; 2483 push_space += this_item_size + sizeof(*item); 2484 } 2485 2486 if (right->map_token) { 2487 unmap_extent_buffer(right, right->map_token, KM_USER1); 2488 right->map_token = NULL; 2489 } 2490 2491 if (push_items == 0) { 2492 ret = 1; 2493 goto out; 2494 } 2495 if (!empty && push_items == btrfs_header_nritems(right)) 2496 WARN_ON(1); 2497 2498 /* push data from right to left */ 2499 copy_extent_buffer(left, right, 2500 btrfs_item_nr_offset(btrfs_header_nritems(left)), 2501 btrfs_item_nr_offset(0), 2502 push_items * sizeof(struct btrfs_item)); 2503 2504 push_space = BTRFS_LEAF_DATA_SIZE(root) - 2505 btrfs_item_offset_nr(right, push_items - 1); 2506 2507 copy_extent_buffer(left, right, btrfs_leaf_data(left) + 2508 leaf_data_end(root, left) - push_space, 2509 btrfs_leaf_data(right) + 2510 btrfs_item_offset_nr(right, push_items - 1), 2511 push_space); 2512 old_left_nritems = btrfs_header_nritems(left); 2513 BUG_ON(old_left_nritems <= 0); 2514 2515 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 2516 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 2517 u32 ioff; 2518 2519 item = btrfs_item_nr(left, i); 2520 if (!left->map_token) { 2521 map_extent_buffer(left, (unsigned long)item, 2522 sizeof(struct btrfs_item), 2523 &left->map_token, &left->kaddr, 2524 &left->map_start, &left->map_len, 2525 KM_USER1); 2526 } 2527 2528 ioff = btrfs_item_offset(left, item); 2529 btrfs_set_item_offset(left, item, 2530 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2531 } 2532 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2533 if (left->map_token) { 2534 unmap_extent_buffer(left, left->map_token, KM_USER1); 2535 left->map_token = NULL; 2536 } 2537 2538 /* fixup right node */ 2539 if (push_items > right_nritems) { 2540 printk(KERN_CRIT "push items %d nr %u\n", push_items, 2541 right_nritems); 2542 WARN_ON(1); 2543 } 2544 2545 if (push_items < right_nritems) { 2546 push_space = btrfs_item_offset_nr(right, push_items - 1) - 2547 leaf_data_end(root, right); 2548 memmove_extent_buffer(right, btrfs_leaf_data(right) + 2549 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2550 btrfs_leaf_data(right) + 2551 leaf_data_end(root, right), push_space); 2552 2553 memmove_extent_buffer(right, btrfs_item_nr_offset(0), 2554 btrfs_item_nr_offset(push_items), 2555 (btrfs_header_nritems(right) - push_items) * 2556 sizeof(struct btrfs_item)); 2557 } 2558 right_nritems -= push_items; 2559 btrfs_set_header_nritems(right, right_nritems); 2560 push_space = BTRFS_LEAF_DATA_SIZE(root); 2561 for (i = 0; i < right_nritems; i++) { 2562 item = btrfs_item_nr(right, i); 2563 2564 if (!right->map_token) { 2565 map_extent_buffer(right, (unsigned long)item, 2566 sizeof(struct btrfs_item), 2567 &right->map_token, &right->kaddr, 2568 &right->map_start, &right->map_len, 2569 KM_USER1); 2570 } 2571 2572 push_space = push_space - btrfs_item_size(right, item); 2573 btrfs_set_item_offset(right, item, push_space); 2574 } 2575 if (right->map_token) { 2576 unmap_extent_buffer(right, right->map_token, KM_USER1); 2577 right->map_token = NULL; 2578 } 2579 2580 btrfs_mark_buffer_dirty(left); 2581 if (right_nritems) 2582 btrfs_mark_buffer_dirty(right); 2583 else 2584 clean_tree_block(trans, root, right); 2585 2586 btrfs_item_key(right, &disk_key, 0); 2587 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2588 if (wret) 2589 ret = wret; 2590 2591 /* then fixup the leaf pointer in the path */ 2592 if (path->slots[0] < push_items) { 2593 path->slots[0] += old_left_nritems; 2594 btrfs_tree_unlock(path->nodes[0]); 2595 free_extent_buffer(path->nodes[0]); 2596 path->nodes[0] = left; 2597 path->slots[1] -= 1; 2598 } else { 2599 btrfs_tree_unlock(left); 2600 free_extent_buffer(left); 2601 path->slots[0] -= push_items; 2602 } 2603 BUG_ON(path->slots[0] < 0); 2604 return ret; 2605 out: 2606 btrfs_tree_unlock(left); 2607 free_extent_buffer(left); 2608 return ret; 2609 } 2610 2611 /* 2612 * push some data in the path leaf to the left, trying to free up at 2613 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2614 * 2615 * max_slot can put a limit on how far into the leaf we'll push items. The 2616 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the 2617 * items 2618 */ 2619 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2620 *root, struct btrfs_path *path, int min_data_size, 2621 int data_size, int empty, u32 max_slot) 2622 { 2623 struct extent_buffer *right = path->nodes[0]; 2624 struct extent_buffer *left; 2625 int slot; 2626 int free_space; 2627 u32 right_nritems; 2628 int ret = 0; 2629 2630 slot = path->slots[1]; 2631 if (slot == 0) 2632 return 1; 2633 if (!path->nodes[1]) 2634 return 1; 2635 2636 right_nritems = btrfs_header_nritems(right); 2637 if (right_nritems == 0) 2638 return 1; 2639 2640 btrfs_assert_tree_locked(path->nodes[1]); 2641 2642 left = read_node_slot(root, path->nodes[1], slot - 1); 2643 if (left == NULL) 2644 return 1; 2645 2646 btrfs_tree_lock(left); 2647 btrfs_set_lock_blocking(left); 2648 2649 free_space = btrfs_leaf_free_space(root, left); 2650 if (free_space < data_size) { 2651 ret = 1; 2652 goto out; 2653 } 2654 2655 /* cow and double check */ 2656 ret = btrfs_cow_block(trans, root, left, 2657 path->nodes[1], slot - 1, &left); 2658 if (ret) { 2659 /* we hit -ENOSPC, but it isn't fatal here */ 2660 ret = 1; 2661 goto out; 2662 } 2663 2664 free_space = btrfs_leaf_free_space(root, left); 2665 if (free_space < data_size) { 2666 ret = 1; 2667 goto out; 2668 } 2669 2670 return __push_leaf_left(trans, root, path, min_data_size, 2671 empty, left, free_space, right_nritems, 2672 max_slot); 2673 out: 2674 btrfs_tree_unlock(left); 2675 free_extent_buffer(left); 2676 return ret; 2677 } 2678 2679 /* 2680 * split the path's leaf in two, making sure there is at least data_size 2681 * available for the resulting leaf level of the path. 2682 * 2683 * returns 0 if all went well and < 0 on failure. 2684 */ 2685 static noinline int copy_for_split(struct btrfs_trans_handle *trans, 2686 struct btrfs_root *root, 2687 struct btrfs_path *path, 2688 struct extent_buffer *l, 2689 struct extent_buffer *right, 2690 int slot, int mid, int nritems) 2691 { 2692 int data_copy_size; 2693 int rt_data_off; 2694 int i; 2695 int ret = 0; 2696 int wret; 2697 struct btrfs_disk_key disk_key; 2698 2699 nritems = nritems - mid; 2700 btrfs_set_header_nritems(right, nritems); 2701 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); 2702 2703 copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 2704 btrfs_item_nr_offset(mid), 2705 nritems * sizeof(struct btrfs_item)); 2706 2707 copy_extent_buffer(right, l, 2708 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 2709 data_copy_size, btrfs_leaf_data(l) + 2710 leaf_data_end(root, l), data_copy_size); 2711 2712 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 2713 btrfs_item_end_nr(l, mid); 2714 2715 for (i = 0; i < nritems; i++) { 2716 struct btrfs_item *item = btrfs_item_nr(right, i); 2717 u32 ioff; 2718 2719 if (!right->map_token) { 2720 map_extent_buffer(right, (unsigned long)item, 2721 sizeof(struct btrfs_item), 2722 &right->map_token, &right->kaddr, 2723 &right->map_start, &right->map_len, 2724 KM_USER1); 2725 } 2726 2727 ioff = btrfs_item_offset(right, item); 2728 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2729 } 2730 2731 if (right->map_token) { 2732 unmap_extent_buffer(right, right->map_token, KM_USER1); 2733 right->map_token = NULL; 2734 } 2735 2736 btrfs_set_header_nritems(l, mid); 2737 ret = 0; 2738 btrfs_item_key(right, &disk_key, 0); 2739 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2740 path->slots[1] + 1, 1); 2741 if (wret) 2742 ret = wret; 2743 2744 btrfs_mark_buffer_dirty(right); 2745 btrfs_mark_buffer_dirty(l); 2746 BUG_ON(path->slots[0] != slot); 2747 2748 if (mid <= slot) { 2749 btrfs_tree_unlock(path->nodes[0]); 2750 free_extent_buffer(path->nodes[0]); 2751 path->nodes[0] = right; 2752 path->slots[0] -= mid; 2753 path->slots[1] += 1; 2754 } else { 2755 btrfs_tree_unlock(right); 2756 free_extent_buffer(right); 2757 } 2758 2759 BUG_ON(path->slots[0] < 0); 2760 2761 return ret; 2762 } 2763 2764 /* 2765 * double splits happen when we need to insert a big item in the middle 2766 * of a leaf. A double split can leave us with 3 mostly empty leaves: 2767 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] 2768 * A B C 2769 * 2770 * We avoid this by trying to push the items on either side of our target 2771 * into the adjacent leaves. If all goes well we can avoid the double split 2772 * completely. 2773 */ 2774 static noinline int push_for_double_split(struct btrfs_trans_handle *trans, 2775 struct btrfs_root *root, 2776 struct btrfs_path *path, 2777 int data_size) 2778 { 2779 int ret; 2780 int progress = 0; 2781 int slot; 2782 u32 nritems; 2783 2784 slot = path->slots[0]; 2785 2786 /* 2787 * try to push all the items after our slot into the 2788 * right leaf 2789 */ 2790 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); 2791 if (ret < 0) 2792 return ret; 2793 2794 if (ret == 0) 2795 progress++; 2796 2797 nritems = btrfs_header_nritems(path->nodes[0]); 2798 /* 2799 * our goal is to get our slot at the start or end of a leaf. If 2800 * we've done so we're done 2801 */ 2802 if (path->slots[0] == 0 || path->slots[0] == nritems) 2803 return 0; 2804 2805 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) 2806 return 0; 2807 2808 /* try to push all the items before our slot into the next leaf */ 2809 slot = path->slots[0]; 2810 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); 2811 if (ret < 0) 2812 return ret; 2813 2814 if (ret == 0) 2815 progress++; 2816 2817 if (progress) 2818 return 0; 2819 return 1; 2820 } 2821 2822 /* 2823 * split the path's leaf in two, making sure there is at least data_size 2824 * available for the resulting leaf level of the path. 2825 * 2826 * returns 0 if all went well and < 0 on failure. 2827 */ 2828 static noinline int split_leaf(struct btrfs_trans_handle *trans, 2829 struct btrfs_root *root, 2830 struct btrfs_key *ins_key, 2831 struct btrfs_path *path, int data_size, 2832 int extend) 2833 { 2834 struct btrfs_disk_key disk_key; 2835 struct extent_buffer *l; 2836 u32 nritems; 2837 int mid; 2838 int slot; 2839 struct extent_buffer *right; 2840 int ret = 0; 2841 int wret; 2842 int split; 2843 int num_doubles = 0; 2844 int tried_avoid_double = 0; 2845 2846 l = path->nodes[0]; 2847 slot = path->slots[0]; 2848 if (extend && data_size + btrfs_item_size_nr(l, slot) + 2849 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) 2850 return -EOVERFLOW; 2851 2852 /* first try to make some room by pushing left and right */ 2853 if (data_size) { 2854 wret = push_leaf_right(trans, root, path, data_size, 2855 data_size, 0, 0); 2856 if (wret < 0) 2857 return wret; 2858 if (wret) { 2859 wret = push_leaf_left(trans, root, path, data_size, 2860 data_size, 0, (u32)-1); 2861 if (wret < 0) 2862 return wret; 2863 } 2864 l = path->nodes[0]; 2865 2866 /* did the pushes work? */ 2867 if (btrfs_leaf_free_space(root, l) >= data_size) 2868 return 0; 2869 } 2870 2871 if (!path->nodes[1]) { 2872 ret = insert_new_root(trans, root, path, 1); 2873 if (ret) 2874 return ret; 2875 } 2876 again: 2877 split = 1; 2878 l = path->nodes[0]; 2879 slot = path->slots[0]; 2880 nritems = btrfs_header_nritems(l); 2881 mid = (nritems + 1) / 2; 2882 2883 if (mid <= slot) { 2884 if (nritems == 1 || 2885 leaf_space_used(l, mid, nritems - mid) + data_size > 2886 BTRFS_LEAF_DATA_SIZE(root)) { 2887 if (slot >= nritems) { 2888 split = 0; 2889 } else { 2890 mid = slot; 2891 if (mid != nritems && 2892 leaf_space_used(l, mid, nritems - mid) + 2893 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2894 if (data_size && !tried_avoid_double) 2895 goto push_for_double; 2896 split = 2; 2897 } 2898 } 2899 } 2900 } else { 2901 if (leaf_space_used(l, 0, mid) + data_size > 2902 BTRFS_LEAF_DATA_SIZE(root)) { 2903 if (!extend && data_size && slot == 0) { 2904 split = 0; 2905 } else if ((extend || !data_size) && slot == 0) { 2906 mid = 1; 2907 } else { 2908 mid = slot; 2909 if (mid != nritems && 2910 leaf_space_used(l, mid, nritems - mid) + 2911 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2912 if (data_size && !tried_avoid_double) 2913 goto push_for_double; 2914 split = 2 ; 2915 } 2916 } 2917 } 2918 } 2919 2920 if (split == 0) 2921 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2922 else 2923 btrfs_item_key(l, &disk_key, mid); 2924 2925 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 2926 root->root_key.objectid, 2927 &disk_key, 0, l->start, 0); 2928 if (IS_ERR(right)) 2929 return PTR_ERR(right); 2930 2931 root_add_used(root, root->leafsize); 2932 2933 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2934 btrfs_set_header_bytenr(right, right->start); 2935 btrfs_set_header_generation(right, trans->transid); 2936 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); 2937 btrfs_set_header_owner(right, root->root_key.objectid); 2938 btrfs_set_header_level(right, 0); 2939 write_extent_buffer(right, root->fs_info->fsid, 2940 (unsigned long)btrfs_header_fsid(right), 2941 BTRFS_FSID_SIZE); 2942 2943 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 2944 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2945 BTRFS_UUID_SIZE); 2946 2947 if (split == 0) { 2948 if (mid <= slot) { 2949 btrfs_set_header_nritems(right, 0); 2950 wret = insert_ptr(trans, root, path, 2951 &disk_key, right->start, 2952 path->slots[1] + 1, 1); 2953 if (wret) 2954 ret = wret; 2955 2956 btrfs_tree_unlock(path->nodes[0]); 2957 free_extent_buffer(path->nodes[0]); 2958 path->nodes[0] = right; 2959 path->slots[0] = 0; 2960 path->slots[1] += 1; 2961 } else { 2962 btrfs_set_header_nritems(right, 0); 2963 wret = insert_ptr(trans, root, path, 2964 &disk_key, 2965 right->start, 2966 path->slots[1], 1); 2967 if (wret) 2968 ret = wret; 2969 btrfs_tree_unlock(path->nodes[0]); 2970 free_extent_buffer(path->nodes[0]); 2971 path->nodes[0] = right; 2972 path->slots[0] = 0; 2973 if (path->slots[1] == 0) { 2974 wret = fixup_low_keys(trans, root, 2975 path, &disk_key, 1); 2976 if (wret) 2977 ret = wret; 2978 } 2979 } 2980 btrfs_mark_buffer_dirty(right); 2981 return ret; 2982 } 2983 2984 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 2985 BUG_ON(ret); 2986 2987 if (split == 2) { 2988 BUG_ON(num_doubles != 0); 2989 num_doubles++; 2990 goto again; 2991 } 2992 2993 return ret; 2994 2995 push_for_double: 2996 push_for_double_split(trans, root, path, data_size); 2997 tried_avoid_double = 1; 2998 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) 2999 return 0; 3000 goto again; 3001 } 3002 3003 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 3004 struct btrfs_root *root, 3005 struct btrfs_path *path, int ins_len) 3006 { 3007 struct btrfs_key key; 3008 struct extent_buffer *leaf; 3009 struct btrfs_file_extent_item *fi; 3010 u64 extent_len = 0; 3011 u32 item_size; 3012 int ret; 3013 3014 leaf = path->nodes[0]; 3015 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3016 3017 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && 3018 key.type != BTRFS_EXTENT_CSUM_KEY); 3019 3020 if (btrfs_leaf_free_space(root, leaf) >= ins_len) 3021 return 0; 3022 3023 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3024 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3025 fi = btrfs_item_ptr(leaf, path->slots[0], 3026 struct btrfs_file_extent_item); 3027 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 3028 } 3029 btrfs_release_path(root, path); 3030 3031 path->keep_locks = 1; 3032 path->search_for_split = 1; 3033 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 3034 path->search_for_split = 0; 3035 if (ret < 0) 3036 goto err; 3037 3038 ret = -EAGAIN; 3039 leaf = path->nodes[0]; 3040 /* if our item isn't there or got smaller, return now */ 3041 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) 3042 goto err; 3043 3044 /* the leaf has changed, it now has room. return now */ 3045 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) 3046 goto err; 3047 3048 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3049 fi = btrfs_item_ptr(leaf, path->slots[0], 3050 struct btrfs_file_extent_item); 3051 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) 3052 goto err; 3053 } 3054 3055 btrfs_set_path_blocking(path); 3056 ret = split_leaf(trans, root, &key, path, ins_len, 1); 3057 if (ret) 3058 goto err; 3059 3060 path->keep_locks = 0; 3061 btrfs_unlock_up_safe(path, 1); 3062 return 0; 3063 err: 3064 path->keep_locks = 0; 3065 return ret; 3066 } 3067 3068 static noinline int split_item(struct btrfs_trans_handle *trans, 3069 struct btrfs_root *root, 3070 struct btrfs_path *path, 3071 struct btrfs_key *new_key, 3072 unsigned long split_offset) 3073 { 3074 struct extent_buffer *leaf; 3075 struct btrfs_item *item; 3076 struct btrfs_item *new_item; 3077 int slot; 3078 char *buf; 3079 u32 nritems; 3080 u32 item_size; 3081 u32 orig_offset; 3082 struct btrfs_disk_key disk_key; 3083 3084 leaf = path->nodes[0]; 3085 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3086 3087 btrfs_set_path_blocking(path); 3088 3089 item = btrfs_item_nr(leaf, path->slots[0]); 3090 orig_offset = btrfs_item_offset(leaf, item); 3091 item_size = btrfs_item_size(leaf, item); 3092 3093 buf = kmalloc(item_size, GFP_NOFS); 3094 if (!buf) 3095 return -ENOMEM; 3096 3097 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3098 path->slots[0]), item_size); 3099 3100 slot = path->slots[0] + 1; 3101 nritems = btrfs_header_nritems(leaf); 3102 if (slot != nritems) { 3103 /* shift the items */ 3104 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3105 btrfs_item_nr_offset(slot), 3106 (nritems - slot) * sizeof(struct btrfs_item)); 3107 } 3108 3109 btrfs_cpu_key_to_disk(&disk_key, new_key); 3110 btrfs_set_item_key(leaf, &disk_key, slot); 3111 3112 new_item = btrfs_item_nr(leaf, slot); 3113 3114 btrfs_set_item_offset(leaf, new_item, orig_offset); 3115 btrfs_set_item_size(leaf, new_item, item_size - split_offset); 3116 3117 btrfs_set_item_offset(leaf, item, 3118 orig_offset + item_size - split_offset); 3119 btrfs_set_item_size(leaf, item, split_offset); 3120 3121 btrfs_set_header_nritems(leaf, nritems + 1); 3122 3123 /* write the data for the start of the original item */ 3124 write_extent_buffer(leaf, buf, 3125 btrfs_item_ptr_offset(leaf, path->slots[0]), 3126 split_offset); 3127 3128 /* write the data for the new item */ 3129 write_extent_buffer(leaf, buf + split_offset, 3130 btrfs_item_ptr_offset(leaf, slot), 3131 item_size - split_offset); 3132 btrfs_mark_buffer_dirty(leaf); 3133 3134 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); 3135 kfree(buf); 3136 return 0; 3137 } 3138 3139 /* 3140 * This function splits a single item into two items, 3141 * giving 'new_key' to the new item and splitting the 3142 * old one at split_offset (from the start of the item). 3143 * 3144 * The path may be released by this operation. After 3145 * the split, the path is pointing to the old item. The 3146 * new item is going to be in the same node as the old one. 3147 * 3148 * Note, the item being split must be smaller enough to live alone on 3149 * a tree block with room for one extra struct btrfs_item 3150 * 3151 * This allows us to split the item in place, keeping a lock on the 3152 * leaf the entire time. 3153 */ 3154 int btrfs_split_item(struct btrfs_trans_handle *trans, 3155 struct btrfs_root *root, 3156 struct btrfs_path *path, 3157 struct btrfs_key *new_key, 3158 unsigned long split_offset) 3159 { 3160 int ret; 3161 ret = setup_leaf_for_split(trans, root, path, 3162 sizeof(struct btrfs_item)); 3163 if (ret) 3164 return ret; 3165 3166 ret = split_item(trans, root, path, new_key, split_offset); 3167 return ret; 3168 } 3169 3170 /* 3171 * This function duplicate a item, giving 'new_key' to the new item. 3172 * It guarantees both items live in the same tree leaf and the new item 3173 * is contiguous with the original item. 3174 * 3175 * This allows us to split file extent in place, keeping a lock on the 3176 * leaf the entire time. 3177 */ 3178 int btrfs_duplicate_item(struct btrfs_trans_handle *trans, 3179 struct btrfs_root *root, 3180 struct btrfs_path *path, 3181 struct btrfs_key *new_key) 3182 { 3183 struct extent_buffer *leaf; 3184 int ret; 3185 u32 item_size; 3186 3187 leaf = path->nodes[0]; 3188 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3189 ret = setup_leaf_for_split(trans, root, path, 3190 item_size + sizeof(struct btrfs_item)); 3191 if (ret) 3192 return ret; 3193 3194 path->slots[0]++; 3195 ret = setup_items_for_insert(trans, root, path, new_key, &item_size, 3196 item_size, item_size + 3197 sizeof(struct btrfs_item), 1); 3198 BUG_ON(ret); 3199 3200 leaf = path->nodes[0]; 3201 memcpy_extent_buffer(leaf, 3202 btrfs_item_ptr_offset(leaf, path->slots[0]), 3203 btrfs_item_ptr_offset(leaf, path->slots[0] - 1), 3204 item_size); 3205 return 0; 3206 } 3207 3208 /* 3209 * make the item pointed to by the path smaller. new_size indicates 3210 * how small to make it, and from_end tells us if we just chop bytes 3211 * off the end of the item or if we shift the item to chop bytes off 3212 * the front. 3213 */ 3214 int btrfs_truncate_item(struct btrfs_trans_handle *trans, 3215 struct btrfs_root *root, 3216 struct btrfs_path *path, 3217 u32 new_size, int from_end) 3218 { 3219 int ret = 0; 3220 int slot; 3221 struct extent_buffer *leaf; 3222 struct btrfs_item *item; 3223 u32 nritems; 3224 unsigned int data_end; 3225 unsigned int old_data_start; 3226 unsigned int old_size; 3227 unsigned int size_diff; 3228 int i; 3229 3230 leaf = path->nodes[0]; 3231 slot = path->slots[0]; 3232 3233 old_size = btrfs_item_size_nr(leaf, slot); 3234 if (old_size == new_size) 3235 return 0; 3236 3237 nritems = btrfs_header_nritems(leaf); 3238 data_end = leaf_data_end(root, leaf); 3239 3240 old_data_start = btrfs_item_offset_nr(leaf, slot); 3241 3242 size_diff = old_size - new_size; 3243 3244 BUG_ON(slot < 0); 3245 BUG_ON(slot >= nritems); 3246 3247 /* 3248 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3249 */ 3250 /* first correct the data pointers */ 3251 for (i = slot; i < nritems; i++) { 3252 u32 ioff; 3253 item = btrfs_item_nr(leaf, i); 3254 3255 if (!leaf->map_token) { 3256 map_extent_buffer(leaf, (unsigned long)item, 3257 sizeof(struct btrfs_item), 3258 &leaf->map_token, &leaf->kaddr, 3259 &leaf->map_start, &leaf->map_len, 3260 KM_USER1); 3261 } 3262 3263 ioff = btrfs_item_offset(leaf, item); 3264 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3265 } 3266 3267 if (leaf->map_token) { 3268 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3269 leaf->map_token = NULL; 3270 } 3271 3272 /* shift the data */ 3273 if (from_end) { 3274 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3275 data_end + size_diff, btrfs_leaf_data(leaf) + 3276 data_end, old_data_start + new_size - data_end); 3277 } else { 3278 struct btrfs_disk_key disk_key; 3279 u64 offset; 3280 3281 btrfs_item_key(leaf, &disk_key, slot); 3282 3283 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 3284 unsigned long ptr; 3285 struct btrfs_file_extent_item *fi; 3286 3287 fi = btrfs_item_ptr(leaf, slot, 3288 struct btrfs_file_extent_item); 3289 fi = (struct btrfs_file_extent_item *)( 3290 (unsigned long)fi - size_diff); 3291 3292 if (btrfs_file_extent_type(leaf, fi) == 3293 BTRFS_FILE_EXTENT_INLINE) { 3294 ptr = btrfs_item_ptr_offset(leaf, slot); 3295 memmove_extent_buffer(leaf, ptr, 3296 (unsigned long)fi, 3297 offsetof(struct btrfs_file_extent_item, 3298 disk_bytenr)); 3299 } 3300 } 3301 3302 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3303 data_end + size_diff, btrfs_leaf_data(leaf) + 3304 data_end, old_data_start - data_end); 3305 3306 offset = btrfs_disk_key_offset(&disk_key); 3307 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 3308 btrfs_set_item_key(leaf, &disk_key, slot); 3309 if (slot == 0) 3310 fixup_low_keys(trans, root, path, &disk_key, 1); 3311 } 3312 3313 item = btrfs_item_nr(leaf, slot); 3314 btrfs_set_item_size(leaf, item, new_size); 3315 btrfs_mark_buffer_dirty(leaf); 3316 3317 ret = 0; 3318 if (btrfs_leaf_free_space(root, leaf) < 0) { 3319 btrfs_print_leaf(root, leaf); 3320 BUG(); 3321 } 3322 return ret; 3323 } 3324 3325 /* 3326 * make the item pointed to by the path bigger, data_size is the new size. 3327 */ 3328 int btrfs_extend_item(struct btrfs_trans_handle *trans, 3329 struct btrfs_root *root, struct btrfs_path *path, 3330 u32 data_size) 3331 { 3332 int ret = 0; 3333 int slot; 3334 struct extent_buffer *leaf; 3335 struct btrfs_item *item; 3336 u32 nritems; 3337 unsigned int data_end; 3338 unsigned int old_data; 3339 unsigned int old_size; 3340 int i; 3341 3342 leaf = path->nodes[0]; 3343 3344 nritems = btrfs_header_nritems(leaf); 3345 data_end = leaf_data_end(root, leaf); 3346 3347 if (btrfs_leaf_free_space(root, leaf) < data_size) { 3348 btrfs_print_leaf(root, leaf); 3349 BUG(); 3350 } 3351 slot = path->slots[0]; 3352 old_data = btrfs_item_end_nr(leaf, slot); 3353 3354 BUG_ON(slot < 0); 3355 if (slot >= nritems) { 3356 btrfs_print_leaf(root, leaf); 3357 printk(KERN_CRIT "slot %d too large, nritems %d\n", 3358 slot, nritems); 3359 BUG_ON(1); 3360 } 3361 3362 /* 3363 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3364 */ 3365 /* first correct the data pointers */ 3366 for (i = slot; i < nritems; i++) { 3367 u32 ioff; 3368 item = btrfs_item_nr(leaf, i); 3369 3370 if (!leaf->map_token) { 3371 map_extent_buffer(leaf, (unsigned long)item, 3372 sizeof(struct btrfs_item), 3373 &leaf->map_token, &leaf->kaddr, 3374 &leaf->map_start, &leaf->map_len, 3375 KM_USER1); 3376 } 3377 ioff = btrfs_item_offset(leaf, item); 3378 btrfs_set_item_offset(leaf, item, ioff - data_size); 3379 } 3380 3381 if (leaf->map_token) { 3382 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3383 leaf->map_token = NULL; 3384 } 3385 3386 /* shift the data */ 3387 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3388 data_end - data_size, btrfs_leaf_data(leaf) + 3389 data_end, old_data - data_end); 3390 3391 data_end = old_data; 3392 old_size = btrfs_item_size_nr(leaf, slot); 3393 item = btrfs_item_nr(leaf, slot); 3394 btrfs_set_item_size(leaf, item, old_size + data_size); 3395 btrfs_mark_buffer_dirty(leaf); 3396 3397 ret = 0; 3398 if (btrfs_leaf_free_space(root, leaf) < 0) { 3399 btrfs_print_leaf(root, leaf); 3400 BUG(); 3401 } 3402 return ret; 3403 } 3404 3405 /* 3406 * Given a key and some data, insert items into the tree. 3407 * This does all the path init required, making room in the tree if needed. 3408 * Returns the number of keys that were inserted. 3409 */ 3410 int btrfs_insert_some_items(struct btrfs_trans_handle *trans, 3411 struct btrfs_root *root, 3412 struct btrfs_path *path, 3413 struct btrfs_key *cpu_key, u32 *data_size, 3414 int nr) 3415 { 3416 struct extent_buffer *leaf; 3417 struct btrfs_item *item; 3418 int ret = 0; 3419 int slot; 3420 int i; 3421 u32 nritems; 3422 u32 total_data = 0; 3423 u32 total_size = 0; 3424 unsigned int data_end; 3425 struct btrfs_disk_key disk_key; 3426 struct btrfs_key found_key; 3427 3428 for (i = 0; i < nr; i++) { 3429 if (total_size + data_size[i] + sizeof(struct btrfs_item) > 3430 BTRFS_LEAF_DATA_SIZE(root)) { 3431 break; 3432 nr = i; 3433 } 3434 total_data += data_size[i]; 3435 total_size += data_size[i] + sizeof(struct btrfs_item); 3436 } 3437 BUG_ON(nr == 0); 3438 3439 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3440 if (ret == 0) 3441 return -EEXIST; 3442 if (ret < 0) 3443 goto out; 3444 3445 leaf = path->nodes[0]; 3446 3447 nritems = btrfs_header_nritems(leaf); 3448 data_end = leaf_data_end(root, leaf); 3449 3450 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3451 for (i = nr; i >= 0; i--) { 3452 total_data -= data_size[i]; 3453 total_size -= data_size[i] + sizeof(struct btrfs_item); 3454 if (total_size < btrfs_leaf_free_space(root, leaf)) 3455 break; 3456 } 3457 nr = i; 3458 } 3459 3460 slot = path->slots[0]; 3461 BUG_ON(slot < 0); 3462 3463 if (slot != nritems) { 3464 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3465 3466 item = btrfs_item_nr(leaf, slot); 3467 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3468 3469 /* figure out how many keys we can insert in here */ 3470 total_data = data_size[0]; 3471 for (i = 1; i < nr; i++) { 3472 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3473 break; 3474 total_data += data_size[i]; 3475 } 3476 nr = i; 3477 3478 if (old_data < data_end) { 3479 btrfs_print_leaf(root, leaf); 3480 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3481 slot, old_data, data_end); 3482 BUG_ON(1); 3483 } 3484 /* 3485 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3486 */ 3487 /* first correct the data pointers */ 3488 WARN_ON(leaf->map_token); 3489 for (i = slot; i < nritems; i++) { 3490 u32 ioff; 3491 3492 item = btrfs_item_nr(leaf, i); 3493 if (!leaf->map_token) { 3494 map_extent_buffer(leaf, (unsigned long)item, 3495 sizeof(struct btrfs_item), 3496 &leaf->map_token, &leaf->kaddr, 3497 &leaf->map_start, &leaf->map_len, 3498 KM_USER1); 3499 } 3500 3501 ioff = btrfs_item_offset(leaf, item); 3502 btrfs_set_item_offset(leaf, item, ioff - total_data); 3503 } 3504 if (leaf->map_token) { 3505 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3506 leaf->map_token = NULL; 3507 } 3508 3509 /* shift the items */ 3510 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3511 btrfs_item_nr_offset(slot), 3512 (nritems - slot) * sizeof(struct btrfs_item)); 3513 3514 /* shift the data */ 3515 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3516 data_end - total_data, btrfs_leaf_data(leaf) + 3517 data_end, old_data - data_end); 3518 data_end = old_data; 3519 } else { 3520 /* 3521 * this sucks but it has to be done, if we are inserting at 3522 * the end of the leaf only insert 1 of the items, since we 3523 * have no way of knowing whats on the next leaf and we'd have 3524 * to drop our current locks to figure it out 3525 */ 3526 nr = 1; 3527 } 3528 3529 /* setup the item for the new data */ 3530 for (i = 0; i < nr; i++) { 3531 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3532 btrfs_set_item_key(leaf, &disk_key, slot + i); 3533 item = btrfs_item_nr(leaf, slot + i); 3534 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3535 data_end -= data_size[i]; 3536 btrfs_set_item_size(leaf, item, data_size[i]); 3537 } 3538 btrfs_set_header_nritems(leaf, nritems + nr); 3539 btrfs_mark_buffer_dirty(leaf); 3540 3541 ret = 0; 3542 if (slot == 0) { 3543 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3544 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3545 } 3546 3547 if (btrfs_leaf_free_space(root, leaf) < 0) { 3548 btrfs_print_leaf(root, leaf); 3549 BUG(); 3550 } 3551 out: 3552 if (!ret) 3553 ret = nr; 3554 return ret; 3555 } 3556 3557 /* 3558 * this is a helper for btrfs_insert_empty_items, the main goal here is 3559 * to save stack depth by doing the bulk of the work in a function 3560 * that doesn't call btrfs_search_slot 3561 */ 3562 static noinline_for_stack int 3563 setup_items_for_insert(struct btrfs_trans_handle *trans, 3564 struct btrfs_root *root, struct btrfs_path *path, 3565 struct btrfs_key *cpu_key, u32 *data_size, 3566 u32 total_data, u32 total_size, int nr) 3567 { 3568 struct btrfs_item *item; 3569 int i; 3570 u32 nritems; 3571 unsigned int data_end; 3572 struct btrfs_disk_key disk_key; 3573 int ret; 3574 struct extent_buffer *leaf; 3575 int slot; 3576 3577 leaf = path->nodes[0]; 3578 slot = path->slots[0]; 3579 3580 nritems = btrfs_header_nritems(leaf); 3581 data_end = leaf_data_end(root, leaf); 3582 3583 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3584 btrfs_print_leaf(root, leaf); 3585 printk(KERN_CRIT "not enough freespace need %u have %d\n", 3586 total_size, btrfs_leaf_free_space(root, leaf)); 3587 BUG(); 3588 } 3589 3590 if (slot != nritems) { 3591 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3592 3593 if (old_data < data_end) { 3594 btrfs_print_leaf(root, leaf); 3595 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3596 slot, old_data, data_end); 3597 BUG_ON(1); 3598 } 3599 /* 3600 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3601 */ 3602 /* first correct the data pointers */ 3603 WARN_ON(leaf->map_token); 3604 for (i = slot; i < nritems; i++) { 3605 u32 ioff; 3606 3607 item = btrfs_item_nr(leaf, i); 3608 if (!leaf->map_token) { 3609 map_extent_buffer(leaf, (unsigned long)item, 3610 sizeof(struct btrfs_item), 3611 &leaf->map_token, &leaf->kaddr, 3612 &leaf->map_start, &leaf->map_len, 3613 KM_USER1); 3614 } 3615 3616 ioff = btrfs_item_offset(leaf, item); 3617 btrfs_set_item_offset(leaf, item, ioff - total_data); 3618 } 3619 if (leaf->map_token) { 3620 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3621 leaf->map_token = NULL; 3622 } 3623 3624 /* shift the items */ 3625 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3626 btrfs_item_nr_offset(slot), 3627 (nritems - slot) * sizeof(struct btrfs_item)); 3628 3629 /* shift the data */ 3630 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3631 data_end - total_data, btrfs_leaf_data(leaf) + 3632 data_end, old_data - data_end); 3633 data_end = old_data; 3634 } 3635 3636 /* setup the item for the new data */ 3637 for (i = 0; i < nr; i++) { 3638 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3639 btrfs_set_item_key(leaf, &disk_key, slot + i); 3640 item = btrfs_item_nr(leaf, slot + i); 3641 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3642 data_end -= data_size[i]; 3643 btrfs_set_item_size(leaf, item, data_size[i]); 3644 } 3645 3646 btrfs_set_header_nritems(leaf, nritems + nr); 3647 3648 ret = 0; 3649 if (slot == 0) { 3650 struct btrfs_disk_key disk_key; 3651 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3652 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3653 } 3654 btrfs_unlock_up_safe(path, 1); 3655 btrfs_mark_buffer_dirty(leaf); 3656 3657 if (btrfs_leaf_free_space(root, leaf) < 0) { 3658 btrfs_print_leaf(root, leaf); 3659 BUG(); 3660 } 3661 return ret; 3662 } 3663 3664 /* 3665 * Given a key and some data, insert items into the tree. 3666 * This does all the path init required, making room in the tree if needed. 3667 */ 3668 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 3669 struct btrfs_root *root, 3670 struct btrfs_path *path, 3671 struct btrfs_key *cpu_key, u32 *data_size, 3672 int nr) 3673 { 3674 int ret = 0; 3675 int slot; 3676 int i; 3677 u32 total_size = 0; 3678 u32 total_data = 0; 3679 3680 for (i = 0; i < nr; i++) 3681 total_data += data_size[i]; 3682 3683 total_size = total_data + (nr * sizeof(struct btrfs_item)); 3684 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3685 if (ret == 0) 3686 return -EEXIST; 3687 if (ret < 0) 3688 goto out; 3689 3690 slot = path->slots[0]; 3691 BUG_ON(slot < 0); 3692 3693 ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, 3694 total_data, total_size, nr); 3695 3696 out: 3697 return ret; 3698 } 3699 3700 /* 3701 * Given a key and some data, insert an item into the tree. 3702 * This does all the path init required, making room in the tree if needed. 3703 */ 3704 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 3705 *root, struct btrfs_key *cpu_key, void *data, u32 3706 data_size) 3707 { 3708 int ret = 0; 3709 struct btrfs_path *path; 3710 struct extent_buffer *leaf; 3711 unsigned long ptr; 3712 3713 path = btrfs_alloc_path(); 3714 if (!path) 3715 return -ENOMEM; 3716 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 3717 if (!ret) { 3718 leaf = path->nodes[0]; 3719 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 3720 write_extent_buffer(leaf, data, ptr, data_size); 3721 btrfs_mark_buffer_dirty(leaf); 3722 } 3723 btrfs_free_path(path); 3724 return ret; 3725 } 3726 3727 /* 3728 * delete the pointer from a given node. 3729 * 3730 * the tree should have been previously balanced so the deletion does not 3731 * empty a node. 3732 */ 3733 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3734 struct btrfs_path *path, int level, int slot) 3735 { 3736 struct extent_buffer *parent = path->nodes[level]; 3737 u32 nritems; 3738 int ret = 0; 3739 int wret; 3740 3741 nritems = btrfs_header_nritems(parent); 3742 if (slot != nritems - 1) { 3743 memmove_extent_buffer(parent, 3744 btrfs_node_key_ptr_offset(slot), 3745 btrfs_node_key_ptr_offset(slot + 1), 3746 sizeof(struct btrfs_key_ptr) * 3747 (nritems - slot - 1)); 3748 } 3749 nritems--; 3750 btrfs_set_header_nritems(parent, nritems); 3751 if (nritems == 0 && parent == root->node) { 3752 BUG_ON(btrfs_header_level(root->node) != 1); 3753 /* just turn the root into a leaf and break */ 3754 btrfs_set_header_level(root->node, 0); 3755 } else if (slot == 0) { 3756 struct btrfs_disk_key disk_key; 3757 3758 btrfs_node_key(parent, &disk_key, 0); 3759 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 3760 if (wret) 3761 ret = wret; 3762 } 3763 btrfs_mark_buffer_dirty(parent); 3764 return ret; 3765 } 3766 3767 /* 3768 * a helper function to delete the leaf pointed to by path->slots[1] and 3769 * path->nodes[1]. 3770 * 3771 * This deletes the pointer in path->nodes[1] and frees the leaf 3772 * block extent. zero is returned if it all worked out, < 0 otherwise. 3773 * 3774 * The path must have already been setup for deleting the leaf, including 3775 * all the proper balancing. path->nodes[1] must be locked. 3776 */ 3777 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3778 struct btrfs_root *root, 3779 struct btrfs_path *path, 3780 struct extent_buffer *leaf) 3781 { 3782 int ret; 3783 3784 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 3785 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3786 if (ret) 3787 return ret; 3788 3789 /* 3790 * btrfs_free_extent is expensive, we want to make sure we 3791 * aren't holding any locks when we call it 3792 */ 3793 btrfs_unlock_up_safe(path, 0); 3794 3795 root_sub_used(root, leaf->len); 3796 3797 btrfs_free_tree_block(trans, root, leaf, 0, 1); 3798 return 0; 3799 } 3800 /* 3801 * delete the item at the leaf level in path. If that empties 3802 * the leaf, remove it from the tree 3803 */ 3804 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3805 struct btrfs_path *path, int slot, int nr) 3806 { 3807 struct extent_buffer *leaf; 3808 struct btrfs_item *item; 3809 int last_off; 3810 int dsize = 0; 3811 int ret = 0; 3812 int wret; 3813 int i; 3814 u32 nritems; 3815 3816 leaf = path->nodes[0]; 3817 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 3818 3819 for (i = 0; i < nr; i++) 3820 dsize += btrfs_item_size_nr(leaf, slot + i); 3821 3822 nritems = btrfs_header_nritems(leaf); 3823 3824 if (slot + nr != nritems) { 3825 int data_end = leaf_data_end(root, leaf); 3826 3827 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3828 data_end + dsize, 3829 btrfs_leaf_data(leaf) + data_end, 3830 last_off - data_end); 3831 3832 for (i = slot + nr; i < nritems; i++) { 3833 u32 ioff; 3834 3835 item = btrfs_item_nr(leaf, i); 3836 if (!leaf->map_token) { 3837 map_extent_buffer(leaf, (unsigned long)item, 3838 sizeof(struct btrfs_item), 3839 &leaf->map_token, &leaf->kaddr, 3840 &leaf->map_start, &leaf->map_len, 3841 KM_USER1); 3842 } 3843 ioff = btrfs_item_offset(leaf, item); 3844 btrfs_set_item_offset(leaf, item, ioff + dsize); 3845 } 3846 3847 if (leaf->map_token) { 3848 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3849 leaf->map_token = NULL; 3850 } 3851 3852 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3853 btrfs_item_nr_offset(slot + nr), 3854 sizeof(struct btrfs_item) * 3855 (nritems - slot - nr)); 3856 } 3857 btrfs_set_header_nritems(leaf, nritems - nr); 3858 nritems -= nr; 3859 3860 /* delete the leaf if we've emptied it */ 3861 if (nritems == 0) { 3862 if (leaf == root->node) { 3863 btrfs_set_header_level(leaf, 0); 3864 } else { 3865 btrfs_set_path_blocking(path); 3866 clean_tree_block(trans, root, leaf); 3867 ret = btrfs_del_leaf(trans, root, path, leaf); 3868 BUG_ON(ret); 3869 } 3870 } else { 3871 int used = leaf_space_used(leaf, 0, nritems); 3872 if (slot == 0) { 3873 struct btrfs_disk_key disk_key; 3874 3875 btrfs_item_key(leaf, &disk_key, 0); 3876 wret = fixup_low_keys(trans, root, path, 3877 &disk_key, 1); 3878 if (wret) 3879 ret = wret; 3880 } 3881 3882 /* delete the leaf if it is mostly empty */ 3883 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { 3884 /* push_leaf_left fixes the path. 3885 * make sure the path still points to our leaf 3886 * for possible call to del_ptr below 3887 */ 3888 slot = path->slots[1]; 3889 extent_buffer_get(leaf); 3890 3891 btrfs_set_path_blocking(path); 3892 wret = push_leaf_left(trans, root, path, 1, 1, 3893 1, (u32)-1); 3894 if (wret < 0 && wret != -ENOSPC) 3895 ret = wret; 3896 3897 if (path->nodes[0] == leaf && 3898 btrfs_header_nritems(leaf)) { 3899 wret = push_leaf_right(trans, root, path, 1, 3900 1, 1, 0); 3901 if (wret < 0 && wret != -ENOSPC) 3902 ret = wret; 3903 } 3904 3905 if (btrfs_header_nritems(leaf) == 0) { 3906 path->slots[1] = slot; 3907 ret = btrfs_del_leaf(trans, root, path, leaf); 3908 BUG_ON(ret); 3909 free_extent_buffer(leaf); 3910 } else { 3911 /* if we're still in the path, make sure 3912 * we're dirty. Otherwise, one of the 3913 * push_leaf functions must have already 3914 * dirtied this buffer 3915 */ 3916 if (path->nodes[0] == leaf) 3917 btrfs_mark_buffer_dirty(leaf); 3918 free_extent_buffer(leaf); 3919 } 3920 } else { 3921 btrfs_mark_buffer_dirty(leaf); 3922 } 3923 } 3924 return ret; 3925 } 3926 3927 /* 3928 * search the tree again to find a leaf with lesser keys 3929 * returns 0 if it found something or 1 if there are no lesser leaves. 3930 * returns < 0 on io errors. 3931 * 3932 * This may release the path, and so you may lose any locks held at the 3933 * time you call it. 3934 */ 3935 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 3936 { 3937 struct btrfs_key key; 3938 struct btrfs_disk_key found_key; 3939 int ret; 3940 3941 btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 3942 3943 if (key.offset > 0) 3944 key.offset--; 3945 else if (key.type > 0) 3946 key.type--; 3947 else if (key.objectid > 0) 3948 key.objectid--; 3949 else 3950 return 1; 3951 3952 btrfs_release_path(root, path); 3953 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3954 if (ret < 0) 3955 return ret; 3956 btrfs_item_key(path->nodes[0], &found_key, 0); 3957 ret = comp_keys(&found_key, &key); 3958 if (ret < 0) 3959 return 0; 3960 return 1; 3961 } 3962 3963 /* 3964 * A helper function to walk down the tree starting at min_key, and looking 3965 * for nodes or leaves that are either in cache or have a minimum 3966 * transaction id. This is used by the btree defrag code, and tree logging 3967 * 3968 * This does not cow, but it does stuff the starting key it finds back 3969 * into min_key, so you can call btrfs_search_slot with cow=1 on the 3970 * key and get a writable path. 3971 * 3972 * This does lock as it descends, and path->keep_locks should be set 3973 * to 1 by the caller. 3974 * 3975 * This honors path->lowest_level to prevent descent past a given level 3976 * of the tree. 3977 * 3978 * min_trans indicates the oldest transaction that you are interested 3979 * in walking through. Any nodes or leaves older than min_trans are 3980 * skipped over (without reading them). 3981 * 3982 * returns zero if something useful was found, < 0 on error and 1 if there 3983 * was nothing in the tree that matched the search criteria. 3984 */ 3985 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 3986 struct btrfs_key *max_key, 3987 struct btrfs_path *path, int cache_only, 3988 u64 min_trans) 3989 { 3990 struct extent_buffer *cur; 3991 struct btrfs_key found_key; 3992 int slot; 3993 int sret; 3994 u32 nritems; 3995 int level; 3996 int ret = 1; 3997 3998 WARN_ON(!path->keep_locks); 3999 again: 4000 cur = btrfs_lock_root_node(root); 4001 level = btrfs_header_level(cur); 4002 WARN_ON(path->nodes[level]); 4003 path->nodes[level] = cur; 4004 path->locks[level] = 1; 4005 4006 if (btrfs_header_generation(cur) < min_trans) { 4007 ret = 1; 4008 goto out; 4009 } 4010 while (1) { 4011 nritems = btrfs_header_nritems(cur); 4012 level = btrfs_header_level(cur); 4013 sret = bin_search(cur, min_key, level, &slot); 4014 4015 /* at the lowest level, we're done, setup the path and exit */ 4016 if (level == path->lowest_level) { 4017 if (slot >= nritems) 4018 goto find_next_key; 4019 ret = 0; 4020 path->slots[level] = slot; 4021 btrfs_item_key_to_cpu(cur, &found_key, slot); 4022 goto out; 4023 } 4024 if (sret && slot > 0) 4025 slot--; 4026 /* 4027 * check this node pointer against the cache_only and 4028 * min_trans parameters. If it isn't in cache or is too 4029 * old, skip to the next one. 4030 */ 4031 while (slot < nritems) { 4032 u64 blockptr; 4033 u64 gen; 4034 struct extent_buffer *tmp; 4035 struct btrfs_disk_key disk_key; 4036 4037 blockptr = btrfs_node_blockptr(cur, slot); 4038 gen = btrfs_node_ptr_generation(cur, slot); 4039 if (gen < min_trans) { 4040 slot++; 4041 continue; 4042 } 4043 if (!cache_only) 4044 break; 4045 4046 if (max_key) { 4047 btrfs_node_key(cur, &disk_key, slot); 4048 if (comp_keys(&disk_key, max_key) >= 0) { 4049 ret = 1; 4050 goto out; 4051 } 4052 } 4053 4054 tmp = btrfs_find_tree_block(root, blockptr, 4055 btrfs_level_size(root, level - 1)); 4056 4057 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 4058 free_extent_buffer(tmp); 4059 break; 4060 } 4061 if (tmp) 4062 free_extent_buffer(tmp); 4063 slot++; 4064 } 4065 find_next_key: 4066 /* 4067 * we didn't find a candidate key in this node, walk forward 4068 * and find another one 4069 */ 4070 if (slot >= nritems) { 4071 path->slots[level] = slot; 4072 btrfs_set_path_blocking(path); 4073 sret = btrfs_find_next_key(root, path, min_key, level, 4074 cache_only, min_trans); 4075 if (sret == 0) { 4076 btrfs_release_path(root, path); 4077 goto again; 4078 } else { 4079 goto out; 4080 } 4081 } 4082 /* save our key for returning back */ 4083 btrfs_node_key_to_cpu(cur, &found_key, slot); 4084 path->slots[level] = slot; 4085 if (level == path->lowest_level) { 4086 ret = 0; 4087 unlock_up(path, level, 1); 4088 goto out; 4089 } 4090 btrfs_set_path_blocking(path); 4091 cur = read_node_slot(root, cur, slot); 4092 BUG_ON(!cur); 4093 4094 btrfs_tree_lock(cur); 4095 4096 path->locks[level - 1] = 1; 4097 path->nodes[level - 1] = cur; 4098 unlock_up(path, level, 1); 4099 btrfs_clear_path_blocking(path, NULL); 4100 } 4101 out: 4102 if (ret == 0) 4103 memcpy(min_key, &found_key, sizeof(found_key)); 4104 btrfs_set_path_blocking(path); 4105 return ret; 4106 } 4107 4108 /* 4109 * this is similar to btrfs_next_leaf, but does not try to preserve 4110 * and fixup the path. It looks for and returns the next key in the 4111 * tree based on the current path and the cache_only and min_trans 4112 * parameters. 4113 * 4114 * 0 is returned if another key is found, < 0 if there are any errors 4115 * and 1 is returned if there are no higher keys in the tree 4116 * 4117 * path->keep_locks should be set to 1 on the search made before 4118 * calling this function. 4119 */ 4120 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 4121 struct btrfs_key *key, int level, 4122 int cache_only, u64 min_trans) 4123 { 4124 int slot; 4125 struct extent_buffer *c; 4126 4127 WARN_ON(!path->keep_locks); 4128 while (level < BTRFS_MAX_LEVEL) { 4129 if (!path->nodes[level]) 4130 return 1; 4131 4132 slot = path->slots[level] + 1; 4133 c = path->nodes[level]; 4134 next: 4135 if (slot >= btrfs_header_nritems(c)) { 4136 int ret; 4137 int orig_lowest; 4138 struct btrfs_key cur_key; 4139 if (level + 1 >= BTRFS_MAX_LEVEL || 4140 !path->nodes[level + 1]) 4141 return 1; 4142 4143 if (path->locks[level + 1]) { 4144 level++; 4145 continue; 4146 } 4147 4148 slot = btrfs_header_nritems(c) - 1; 4149 if (level == 0) 4150 btrfs_item_key_to_cpu(c, &cur_key, slot); 4151 else 4152 btrfs_node_key_to_cpu(c, &cur_key, slot); 4153 4154 orig_lowest = path->lowest_level; 4155 btrfs_release_path(root, path); 4156 path->lowest_level = level; 4157 ret = btrfs_search_slot(NULL, root, &cur_key, path, 4158 0, 0); 4159 path->lowest_level = orig_lowest; 4160 if (ret < 0) 4161 return ret; 4162 4163 c = path->nodes[level]; 4164 slot = path->slots[level]; 4165 if (ret == 0) 4166 slot++; 4167 goto next; 4168 } 4169 4170 if (level == 0) 4171 btrfs_item_key_to_cpu(c, key, slot); 4172 else { 4173 u64 blockptr = btrfs_node_blockptr(c, slot); 4174 u64 gen = btrfs_node_ptr_generation(c, slot); 4175 4176 if (cache_only) { 4177 struct extent_buffer *cur; 4178 cur = btrfs_find_tree_block(root, blockptr, 4179 btrfs_level_size(root, level - 1)); 4180 if (!cur || !btrfs_buffer_uptodate(cur, gen)) { 4181 slot++; 4182 if (cur) 4183 free_extent_buffer(cur); 4184 goto next; 4185 } 4186 free_extent_buffer(cur); 4187 } 4188 if (gen < min_trans) { 4189 slot++; 4190 goto next; 4191 } 4192 btrfs_node_key_to_cpu(c, key, slot); 4193 } 4194 return 0; 4195 } 4196 return 1; 4197 } 4198 4199 /* 4200 * search the tree again to find a leaf with greater keys 4201 * returns 0 if it found something or 1 if there are no greater leaves. 4202 * returns < 0 on io errors. 4203 */ 4204 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 4205 { 4206 int slot; 4207 int level; 4208 struct extent_buffer *c; 4209 struct extent_buffer *next; 4210 struct btrfs_key key; 4211 u32 nritems; 4212 int ret; 4213 int old_spinning = path->leave_spinning; 4214 int force_blocking = 0; 4215 4216 nritems = btrfs_header_nritems(path->nodes[0]); 4217 if (nritems == 0) 4218 return 1; 4219 4220 /* 4221 * we take the blocks in an order that upsets lockdep. Using 4222 * blocking mode is the only way around it. 4223 */ 4224 #ifdef CONFIG_DEBUG_LOCK_ALLOC 4225 force_blocking = 1; 4226 #endif 4227 4228 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 4229 again: 4230 level = 1; 4231 next = NULL; 4232 btrfs_release_path(root, path); 4233 4234 path->keep_locks = 1; 4235 4236 if (!force_blocking) 4237 path->leave_spinning = 1; 4238 4239 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4240 path->keep_locks = 0; 4241 4242 if (ret < 0) 4243 return ret; 4244 4245 nritems = btrfs_header_nritems(path->nodes[0]); 4246 /* 4247 * by releasing the path above we dropped all our locks. A balance 4248 * could have added more items next to the key that used to be 4249 * at the very end of the block. So, check again here and 4250 * advance the path if there are now more items available. 4251 */ 4252 if (nritems > 0 && path->slots[0] < nritems - 1) { 4253 if (ret == 0) 4254 path->slots[0]++; 4255 ret = 0; 4256 goto done; 4257 } 4258 4259 while (level < BTRFS_MAX_LEVEL) { 4260 if (!path->nodes[level]) { 4261 ret = 1; 4262 goto done; 4263 } 4264 4265 slot = path->slots[level] + 1; 4266 c = path->nodes[level]; 4267 if (slot >= btrfs_header_nritems(c)) { 4268 level++; 4269 if (level == BTRFS_MAX_LEVEL) { 4270 ret = 1; 4271 goto done; 4272 } 4273 continue; 4274 } 4275 4276 if (next) { 4277 btrfs_tree_unlock(next); 4278 free_extent_buffer(next); 4279 } 4280 4281 next = c; 4282 ret = read_block_for_search(NULL, root, path, &next, level, 4283 slot, &key); 4284 if (ret == -EAGAIN) 4285 goto again; 4286 4287 if (ret < 0) { 4288 btrfs_release_path(root, path); 4289 goto done; 4290 } 4291 4292 if (!path->skip_locking) { 4293 ret = btrfs_try_spin_lock(next); 4294 if (!ret) { 4295 btrfs_set_path_blocking(path); 4296 btrfs_tree_lock(next); 4297 if (!force_blocking) 4298 btrfs_clear_path_blocking(path, next); 4299 } 4300 if (force_blocking) 4301 btrfs_set_lock_blocking(next); 4302 } 4303 break; 4304 } 4305 path->slots[level] = slot; 4306 while (1) { 4307 level--; 4308 c = path->nodes[level]; 4309 if (path->locks[level]) 4310 btrfs_tree_unlock(c); 4311 4312 free_extent_buffer(c); 4313 path->nodes[level] = next; 4314 path->slots[level] = 0; 4315 if (!path->skip_locking) 4316 path->locks[level] = 1; 4317 4318 if (!level) 4319 break; 4320 4321 ret = read_block_for_search(NULL, root, path, &next, level, 4322 0, &key); 4323 if (ret == -EAGAIN) 4324 goto again; 4325 4326 if (ret < 0) { 4327 btrfs_release_path(root, path); 4328 goto done; 4329 } 4330 4331 if (!path->skip_locking) { 4332 btrfs_assert_tree_locked(path->nodes[level]); 4333 ret = btrfs_try_spin_lock(next); 4334 if (!ret) { 4335 btrfs_set_path_blocking(path); 4336 btrfs_tree_lock(next); 4337 if (!force_blocking) 4338 btrfs_clear_path_blocking(path, next); 4339 } 4340 if (force_blocking) 4341 btrfs_set_lock_blocking(next); 4342 } 4343 } 4344 ret = 0; 4345 done: 4346 unlock_up(path, 0, 1); 4347 path->leave_spinning = old_spinning; 4348 if (!old_spinning) 4349 btrfs_set_path_blocking(path); 4350 4351 return ret; 4352 } 4353 4354 /* 4355 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 4356 * searching until it gets past min_objectid or finds an item of 'type' 4357 * 4358 * returns 0 if something is found, 1 if nothing was found and < 0 on error 4359 */ 4360 int btrfs_previous_item(struct btrfs_root *root, 4361 struct btrfs_path *path, u64 min_objectid, 4362 int type) 4363 { 4364 struct btrfs_key found_key; 4365 struct extent_buffer *leaf; 4366 u32 nritems; 4367 int ret; 4368 4369 while (1) { 4370 if (path->slots[0] == 0) { 4371 btrfs_set_path_blocking(path); 4372 ret = btrfs_prev_leaf(root, path); 4373 if (ret != 0) 4374 return ret; 4375 } else { 4376 path->slots[0]--; 4377 } 4378 leaf = path->nodes[0]; 4379 nritems = btrfs_header_nritems(leaf); 4380 if (nritems == 0) 4381 return 1; 4382 if (path->slots[0] == nritems) 4383 path->slots[0]--; 4384 4385 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4386 if (found_key.objectid < min_objectid) 4387 break; 4388 if (found_key.type == type) 4389 return 0; 4390 if (found_key.objectid == min_objectid && 4391 found_key.type < type) 4392 break; 4393 } 4394 return 1; 4395 } 4396