1 /* 2 * Copyright (C) 2007,2008 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/slab.h> 21 #include "ctree.h" 22 #include "disk-io.h" 23 #include "transaction.h" 24 #include "print-tree.h" 25 #include "locking.h" 26 27 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 28 *root, struct btrfs_path *path, int level); 29 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 30 *root, struct btrfs_key *ins_key, 31 struct btrfs_path *path, int data_size, int extend); 32 static int push_node_left(struct btrfs_trans_handle *trans, 33 struct btrfs_root *root, struct extent_buffer *dst, 34 struct extent_buffer *src, int empty); 35 static int balance_node_right(struct btrfs_trans_handle *trans, 36 struct btrfs_root *root, 37 struct extent_buffer *dst_buf, 38 struct extent_buffer *src_buf); 39 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 40 struct btrfs_path *path, int level, int slot); 41 42 struct btrfs_path *btrfs_alloc_path(void) 43 { 44 struct btrfs_path *path; 45 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 46 return path; 47 } 48 49 /* 50 * set all locked nodes in the path to blocking locks. This should 51 * be done before scheduling 52 */ 53 noinline void btrfs_set_path_blocking(struct btrfs_path *p) 54 { 55 int i; 56 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 57 if (!p->nodes[i] || !p->locks[i]) 58 continue; 59 btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); 60 if (p->locks[i] == BTRFS_READ_LOCK) 61 p->locks[i] = BTRFS_READ_LOCK_BLOCKING; 62 else if (p->locks[i] == BTRFS_WRITE_LOCK) 63 p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; 64 } 65 } 66 67 /* 68 * reset all the locked nodes in the patch to spinning locks. 69 * 70 * held is used to keep lockdep happy, when lockdep is enabled 71 * we set held to a blocking lock before we go around and 72 * retake all the spinlocks in the path. You can safely use NULL 73 * for held 74 */ 75 noinline void btrfs_clear_path_blocking(struct btrfs_path *p, 76 struct extent_buffer *held, int held_rw) 77 { 78 int i; 79 80 #ifdef CONFIG_DEBUG_LOCK_ALLOC 81 /* lockdep really cares that we take all of these spinlocks 82 * in the right order. If any of the locks in the path are not 83 * currently blocking, it is going to complain. So, make really 84 * really sure by forcing the path to blocking before we clear 85 * the path blocking. 86 */ 87 if (held) { 88 btrfs_set_lock_blocking_rw(held, held_rw); 89 if (held_rw == BTRFS_WRITE_LOCK) 90 held_rw = BTRFS_WRITE_LOCK_BLOCKING; 91 else if (held_rw == BTRFS_READ_LOCK) 92 held_rw = BTRFS_READ_LOCK_BLOCKING; 93 } 94 btrfs_set_path_blocking(p); 95 #endif 96 97 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 98 if (p->nodes[i] && p->locks[i]) { 99 btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); 100 if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) 101 p->locks[i] = BTRFS_WRITE_LOCK; 102 else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) 103 p->locks[i] = BTRFS_READ_LOCK; 104 } 105 } 106 107 #ifdef CONFIG_DEBUG_LOCK_ALLOC 108 if (held) 109 btrfs_clear_lock_blocking_rw(held, held_rw); 110 #endif 111 } 112 113 /* this also releases the path */ 114 void btrfs_free_path(struct btrfs_path *p) 115 { 116 if (!p) 117 return; 118 btrfs_release_path(p); 119 kmem_cache_free(btrfs_path_cachep, p); 120 } 121 122 /* 123 * path release drops references on the extent buffers in the path 124 * and it drops any locks held by this path 125 * 126 * It is safe to call this on paths that no locks or extent buffers held. 127 */ 128 noinline void btrfs_release_path(struct btrfs_path *p) 129 { 130 int i; 131 132 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 133 p->slots[i] = 0; 134 if (!p->nodes[i]) 135 continue; 136 if (p->locks[i]) { 137 btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); 138 p->locks[i] = 0; 139 } 140 free_extent_buffer(p->nodes[i]); 141 p->nodes[i] = NULL; 142 } 143 } 144 145 /* 146 * safely gets a reference on the root node of a tree. A lock 147 * is not taken, so a concurrent writer may put a different node 148 * at the root of the tree. See btrfs_lock_root_node for the 149 * looping required. 150 * 151 * The extent buffer returned by this has a reference taken, so 152 * it won't disappear. It may stop being the root of the tree 153 * at any time because there are no locks held. 154 */ 155 struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 156 { 157 struct extent_buffer *eb; 158 159 rcu_read_lock(); 160 eb = rcu_dereference(root->node); 161 extent_buffer_get(eb); 162 rcu_read_unlock(); 163 return eb; 164 } 165 166 /* loop around taking references on and locking the root node of the 167 * tree until you end up with a lock on the root. A locked buffer 168 * is returned, with a reference held. 169 */ 170 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 171 { 172 struct extent_buffer *eb; 173 174 while (1) { 175 eb = btrfs_root_node(root); 176 btrfs_tree_lock(eb); 177 if (eb == root->node) 178 break; 179 btrfs_tree_unlock(eb); 180 free_extent_buffer(eb); 181 } 182 return eb; 183 } 184 185 /* loop around taking references on and locking the root node of the 186 * tree until you end up with a lock on the root. A locked buffer 187 * is returned, with a reference held. 188 */ 189 struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) 190 { 191 struct extent_buffer *eb; 192 193 while (1) { 194 eb = btrfs_root_node(root); 195 btrfs_tree_read_lock(eb); 196 if (eb == root->node) 197 break; 198 btrfs_tree_read_unlock(eb); 199 free_extent_buffer(eb); 200 } 201 return eb; 202 } 203 204 /* cowonly root (everything not a reference counted cow subvolume), just get 205 * put onto a simple dirty list. transaction.c walks this to make sure they 206 * get properly updated on disk. 207 */ 208 static void add_root_to_dirty_list(struct btrfs_root *root) 209 { 210 if (root->track_dirty && list_empty(&root->dirty_list)) { 211 list_add(&root->dirty_list, 212 &root->fs_info->dirty_cowonly_roots); 213 } 214 } 215 216 /* 217 * used by snapshot creation to make a copy of a root for a tree with 218 * a given objectid. The buffer with the new root node is returned in 219 * cow_ret, and this func returns zero on success or a negative error code. 220 */ 221 int btrfs_copy_root(struct btrfs_trans_handle *trans, 222 struct btrfs_root *root, 223 struct extent_buffer *buf, 224 struct extent_buffer **cow_ret, u64 new_root_objectid) 225 { 226 struct extent_buffer *cow; 227 int ret = 0; 228 int level; 229 struct btrfs_disk_key disk_key; 230 231 WARN_ON(root->ref_cows && trans->transid != 232 root->fs_info->running_transaction->transid); 233 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 234 235 level = btrfs_header_level(buf); 236 if (level == 0) 237 btrfs_item_key(buf, &disk_key, 0); 238 else 239 btrfs_node_key(buf, &disk_key, 0); 240 241 cow = btrfs_alloc_free_block(trans, root, buf->len, 0, 242 new_root_objectid, &disk_key, level, 243 buf->start, 0); 244 if (IS_ERR(cow)) 245 return PTR_ERR(cow); 246 247 copy_extent_buffer(cow, buf, 0, 0, cow->len); 248 btrfs_set_header_bytenr(cow, cow->start); 249 btrfs_set_header_generation(cow, trans->transid); 250 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 251 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 252 BTRFS_HEADER_FLAG_RELOC); 253 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 254 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 255 else 256 btrfs_set_header_owner(cow, new_root_objectid); 257 258 write_extent_buffer(cow, root->fs_info->fsid, 259 (unsigned long)btrfs_header_fsid(cow), 260 BTRFS_FSID_SIZE); 261 262 WARN_ON(btrfs_header_generation(buf) > trans->transid); 263 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 264 ret = btrfs_inc_ref(trans, root, cow, 1); 265 else 266 ret = btrfs_inc_ref(trans, root, cow, 0); 267 268 if (ret) 269 return ret; 270 271 btrfs_mark_buffer_dirty(cow); 272 *cow_ret = cow; 273 return 0; 274 } 275 276 /* 277 * check if the tree block can be shared by multiple trees 278 */ 279 int btrfs_block_can_be_shared(struct btrfs_root *root, 280 struct extent_buffer *buf) 281 { 282 /* 283 * Tree blocks not in refernece counted trees and tree roots 284 * are never shared. If a block was allocated after the last 285 * snapshot and the block was not allocated by tree relocation, 286 * we know the block is not shared. 287 */ 288 if (root->ref_cows && 289 buf != root->node && buf != root->commit_root && 290 (btrfs_header_generation(buf) <= 291 btrfs_root_last_snapshot(&root->root_item) || 292 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 293 return 1; 294 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 295 if (root->ref_cows && 296 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 297 return 1; 298 #endif 299 return 0; 300 } 301 302 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 303 struct btrfs_root *root, 304 struct extent_buffer *buf, 305 struct extent_buffer *cow, 306 int *last_ref) 307 { 308 u64 refs; 309 u64 owner; 310 u64 flags; 311 u64 new_flags = 0; 312 int ret; 313 314 /* 315 * Backrefs update rules: 316 * 317 * Always use full backrefs for extent pointers in tree block 318 * allocated by tree relocation. 319 * 320 * If a shared tree block is no longer referenced by its owner 321 * tree (btrfs_header_owner(buf) == root->root_key.objectid), 322 * use full backrefs for extent pointers in tree block. 323 * 324 * If a tree block is been relocating 325 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), 326 * use full backrefs for extent pointers in tree block. 327 * The reason for this is some operations (such as drop tree) 328 * are only allowed for blocks use full backrefs. 329 */ 330 331 if (btrfs_block_can_be_shared(root, buf)) { 332 ret = btrfs_lookup_extent_info(trans, root, buf->start, 333 buf->len, &refs, &flags); 334 BUG_ON(ret); 335 BUG_ON(refs == 0); 336 } else { 337 refs = 1; 338 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 339 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 340 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 341 else 342 flags = 0; 343 } 344 345 owner = btrfs_header_owner(buf); 346 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && 347 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 348 349 if (refs > 1) { 350 if ((owner == root->root_key.objectid || 351 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 352 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 353 ret = btrfs_inc_ref(trans, root, buf, 1); 354 BUG_ON(ret); 355 356 if (root->root_key.objectid == 357 BTRFS_TREE_RELOC_OBJECTID) { 358 ret = btrfs_dec_ref(trans, root, buf, 0); 359 BUG_ON(ret); 360 ret = btrfs_inc_ref(trans, root, cow, 1); 361 BUG_ON(ret); 362 } 363 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 364 } else { 365 366 if (root->root_key.objectid == 367 BTRFS_TREE_RELOC_OBJECTID) 368 ret = btrfs_inc_ref(trans, root, cow, 1); 369 else 370 ret = btrfs_inc_ref(trans, root, cow, 0); 371 BUG_ON(ret); 372 } 373 if (new_flags != 0) { 374 ret = btrfs_set_disk_extent_flags(trans, root, 375 buf->start, 376 buf->len, 377 new_flags, 0); 378 BUG_ON(ret); 379 } 380 } else { 381 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 382 if (root->root_key.objectid == 383 BTRFS_TREE_RELOC_OBJECTID) 384 ret = btrfs_inc_ref(trans, root, cow, 1); 385 else 386 ret = btrfs_inc_ref(trans, root, cow, 0); 387 BUG_ON(ret); 388 ret = btrfs_dec_ref(trans, root, buf, 1); 389 BUG_ON(ret); 390 } 391 clean_tree_block(trans, root, buf); 392 *last_ref = 1; 393 } 394 return 0; 395 } 396 397 /* 398 * does the dirty work in cow of a single block. The parent block (if 399 * supplied) is updated to point to the new cow copy. The new buffer is marked 400 * dirty and returned locked. If you modify the block it needs to be marked 401 * dirty again. 402 * 403 * search_start -- an allocation hint for the new block 404 * 405 * empty_size -- a hint that you plan on doing more cow. This is the size in 406 * bytes the allocator should try to find free next to the block it returns. 407 * This is just a hint and may be ignored by the allocator. 408 */ 409 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 410 struct btrfs_root *root, 411 struct extent_buffer *buf, 412 struct extent_buffer *parent, int parent_slot, 413 struct extent_buffer **cow_ret, 414 u64 search_start, u64 empty_size) 415 { 416 struct btrfs_disk_key disk_key; 417 struct extent_buffer *cow; 418 int level; 419 int last_ref = 0; 420 int unlock_orig = 0; 421 u64 parent_start; 422 423 if (*cow_ret == buf) 424 unlock_orig = 1; 425 426 btrfs_assert_tree_locked(buf); 427 428 WARN_ON(root->ref_cows && trans->transid != 429 root->fs_info->running_transaction->transid); 430 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 431 432 level = btrfs_header_level(buf); 433 434 if (level == 0) 435 btrfs_item_key(buf, &disk_key, 0); 436 else 437 btrfs_node_key(buf, &disk_key, 0); 438 439 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 440 if (parent) 441 parent_start = parent->start; 442 else 443 parent_start = 0; 444 } else 445 parent_start = 0; 446 447 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, 448 root->root_key.objectid, &disk_key, 449 level, search_start, empty_size); 450 if (IS_ERR(cow)) 451 return PTR_ERR(cow); 452 453 /* cow is set to blocking by btrfs_init_new_buffer */ 454 455 copy_extent_buffer(cow, buf, 0, 0, cow->len); 456 btrfs_set_header_bytenr(cow, cow->start); 457 btrfs_set_header_generation(cow, trans->transid); 458 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 459 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 460 BTRFS_HEADER_FLAG_RELOC); 461 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 462 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 463 else 464 btrfs_set_header_owner(cow, root->root_key.objectid); 465 466 write_extent_buffer(cow, root->fs_info->fsid, 467 (unsigned long)btrfs_header_fsid(cow), 468 BTRFS_FSID_SIZE); 469 470 update_ref_for_cow(trans, root, buf, cow, &last_ref); 471 472 if (root->ref_cows) 473 btrfs_reloc_cow_block(trans, root, buf, cow); 474 475 if (buf == root->node) { 476 WARN_ON(parent && parent != buf); 477 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 478 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 479 parent_start = buf->start; 480 else 481 parent_start = 0; 482 483 extent_buffer_get(cow); 484 rcu_assign_pointer(root->node, cow); 485 486 btrfs_free_tree_block(trans, root, buf, parent_start, 487 last_ref); 488 free_extent_buffer(buf); 489 add_root_to_dirty_list(root); 490 } else { 491 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 492 parent_start = parent->start; 493 else 494 parent_start = 0; 495 496 WARN_ON(trans->transid != btrfs_header_generation(parent)); 497 btrfs_set_node_blockptr(parent, parent_slot, 498 cow->start); 499 btrfs_set_node_ptr_generation(parent, parent_slot, 500 trans->transid); 501 btrfs_mark_buffer_dirty(parent); 502 btrfs_free_tree_block(trans, root, buf, parent_start, 503 last_ref); 504 } 505 if (unlock_orig) 506 btrfs_tree_unlock(buf); 507 free_extent_buffer(buf); 508 btrfs_mark_buffer_dirty(cow); 509 *cow_ret = cow; 510 return 0; 511 } 512 513 static inline int should_cow_block(struct btrfs_trans_handle *trans, 514 struct btrfs_root *root, 515 struct extent_buffer *buf) 516 { 517 if (btrfs_header_generation(buf) == trans->transid && 518 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 519 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 520 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 521 return 0; 522 return 1; 523 } 524 525 /* 526 * cows a single block, see __btrfs_cow_block for the real work. 527 * This version of it has extra checks so that a block isn't cow'd more than 528 * once per transaction, as long as it hasn't been written yet 529 */ 530 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 531 struct btrfs_root *root, struct extent_buffer *buf, 532 struct extent_buffer *parent, int parent_slot, 533 struct extent_buffer **cow_ret) 534 { 535 u64 search_start; 536 int ret; 537 538 if (trans->transaction != root->fs_info->running_transaction) { 539 printk(KERN_CRIT "trans %llu running %llu\n", 540 (unsigned long long)trans->transid, 541 (unsigned long long) 542 root->fs_info->running_transaction->transid); 543 WARN_ON(1); 544 } 545 if (trans->transid != root->fs_info->generation) { 546 printk(KERN_CRIT "trans %llu running %llu\n", 547 (unsigned long long)trans->transid, 548 (unsigned long long)root->fs_info->generation); 549 WARN_ON(1); 550 } 551 552 if (!should_cow_block(trans, root, buf)) { 553 *cow_ret = buf; 554 return 0; 555 } 556 557 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 558 559 if (parent) 560 btrfs_set_lock_blocking(parent); 561 btrfs_set_lock_blocking(buf); 562 563 ret = __btrfs_cow_block(trans, root, buf, parent, 564 parent_slot, cow_ret, search_start, 0); 565 566 trace_btrfs_cow_block(root, buf, *cow_ret); 567 568 return ret; 569 } 570 571 /* 572 * helper function for defrag to decide if two blocks pointed to by a 573 * node are actually close by 574 */ 575 static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 576 { 577 if (blocknr < other && other - (blocknr + blocksize) < 32768) 578 return 1; 579 if (blocknr > other && blocknr - (other + blocksize) < 32768) 580 return 1; 581 return 0; 582 } 583 584 /* 585 * compare two keys in a memcmp fashion 586 */ 587 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 588 { 589 struct btrfs_key k1; 590 591 btrfs_disk_key_to_cpu(&k1, disk); 592 593 return btrfs_comp_cpu_keys(&k1, k2); 594 } 595 596 /* 597 * same as comp_keys only with two btrfs_key's 598 */ 599 int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 600 { 601 if (k1->objectid > k2->objectid) 602 return 1; 603 if (k1->objectid < k2->objectid) 604 return -1; 605 if (k1->type > k2->type) 606 return 1; 607 if (k1->type < k2->type) 608 return -1; 609 if (k1->offset > k2->offset) 610 return 1; 611 if (k1->offset < k2->offset) 612 return -1; 613 return 0; 614 } 615 616 /* 617 * this is used by the defrag code to go through all the 618 * leaves pointed to by a node and reallocate them so that 619 * disk order is close to key order 620 */ 621 int btrfs_realloc_node(struct btrfs_trans_handle *trans, 622 struct btrfs_root *root, struct extent_buffer *parent, 623 int start_slot, int cache_only, u64 *last_ret, 624 struct btrfs_key *progress) 625 { 626 struct extent_buffer *cur; 627 u64 blocknr; 628 u64 gen; 629 u64 search_start = *last_ret; 630 u64 last_block = 0; 631 u64 other; 632 u32 parent_nritems; 633 int end_slot; 634 int i; 635 int err = 0; 636 int parent_level; 637 int uptodate; 638 u32 blocksize; 639 int progress_passed = 0; 640 struct btrfs_disk_key disk_key; 641 642 parent_level = btrfs_header_level(parent); 643 if (cache_only && parent_level != 1) 644 return 0; 645 646 if (trans->transaction != root->fs_info->running_transaction) 647 WARN_ON(1); 648 if (trans->transid != root->fs_info->generation) 649 WARN_ON(1); 650 651 parent_nritems = btrfs_header_nritems(parent); 652 blocksize = btrfs_level_size(root, parent_level - 1); 653 end_slot = parent_nritems; 654 655 if (parent_nritems == 1) 656 return 0; 657 658 btrfs_set_lock_blocking(parent); 659 660 for (i = start_slot; i < end_slot; i++) { 661 int close = 1; 662 663 btrfs_node_key(parent, &disk_key, i); 664 if (!progress_passed && comp_keys(&disk_key, progress) < 0) 665 continue; 666 667 progress_passed = 1; 668 blocknr = btrfs_node_blockptr(parent, i); 669 gen = btrfs_node_ptr_generation(parent, i); 670 if (last_block == 0) 671 last_block = blocknr; 672 673 if (i > 0) { 674 other = btrfs_node_blockptr(parent, i - 1); 675 close = close_blocks(blocknr, other, blocksize); 676 } 677 if (!close && i < end_slot - 2) { 678 other = btrfs_node_blockptr(parent, i + 1); 679 close = close_blocks(blocknr, other, blocksize); 680 } 681 if (close) { 682 last_block = blocknr; 683 continue; 684 } 685 686 cur = btrfs_find_tree_block(root, blocknr, blocksize); 687 if (cur) 688 uptodate = btrfs_buffer_uptodate(cur, gen); 689 else 690 uptodate = 0; 691 if (!cur || !uptodate) { 692 if (cache_only) { 693 free_extent_buffer(cur); 694 continue; 695 } 696 if (!cur) { 697 cur = read_tree_block(root, blocknr, 698 blocksize, gen); 699 if (!cur) 700 return -EIO; 701 } else if (!uptodate) { 702 btrfs_read_buffer(cur, gen); 703 } 704 } 705 if (search_start == 0) 706 search_start = last_block; 707 708 btrfs_tree_lock(cur); 709 btrfs_set_lock_blocking(cur); 710 err = __btrfs_cow_block(trans, root, cur, parent, i, 711 &cur, search_start, 712 min(16 * blocksize, 713 (end_slot - i) * blocksize)); 714 if (err) { 715 btrfs_tree_unlock(cur); 716 free_extent_buffer(cur); 717 break; 718 } 719 search_start = cur->start; 720 last_block = cur->start; 721 *last_ret = search_start; 722 btrfs_tree_unlock(cur); 723 free_extent_buffer(cur); 724 } 725 return err; 726 } 727 728 /* 729 * The leaf data grows from end-to-front in the node. 730 * this returns the address of the start of the last item, 731 * which is the stop of the leaf data stack 732 */ 733 static inline unsigned int leaf_data_end(struct btrfs_root *root, 734 struct extent_buffer *leaf) 735 { 736 u32 nr = btrfs_header_nritems(leaf); 737 if (nr == 0) 738 return BTRFS_LEAF_DATA_SIZE(root); 739 return btrfs_item_offset_nr(leaf, nr - 1); 740 } 741 742 743 /* 744 * search for key in the extent_buffer. The items start at offset p, 745 * and they are item_size apart. There are 'max' items in p. 746 * 747 * the slot in the array is returned via slot, and it points to 748 * the place where you would insert key if it is not found in 749 * the array. 750 * 751 * slot may point to max if the key is bigger than all of the keys 752 */ 753 static noinline int generic_bin_search(struct extent_buffer *eb, 754 unsigned long p, 755 int item_size, struct btrfs_key *key, 756 int max, int *slot) 757 { 758 int low = 0; 759 int high = max; 760 int mid; 761 int ret; 762 struct btrfs_disk_key *tmp = NULL; 763 struct btrfs_disk_key unaligned; 764 unsigned long offset; 765 char *kaddr = NULL; 766 unsigned long map_start = 0; 767 unsigned long map_len = 0; 768 int err; 769 770 while (low < high) { 771 mid = (low + high) / 2; 772 offset = p + mid * item_size; 773 774 if (!kaddr || offset < map_start || 775 (offset + sizeof(struct btrfs_disk_key)) > 776 map_start + map_len) { 777 778 err = map_private_extent_buffer(eb, offset, 779 sizeof(struct btrfs_disk_key), 780 &kaddr, &map_start, &map_len); 781 782 if (!err) { 783 tmp = (struct btrfs_disk_key *)(kaddr + offset - 784 map_start); 785 } else { 786 read_extent_buffer(eb, &unaligned, 787 offset, sizeof(unaligned)); 788 tmp = &unaligned; 789 } 790 791 } else { 792 tmp = (struct btrfs_disk_key *)(kaddr + offset - 793 map_start); 794 } 795 ret = comp_keys(tmp, key); 796 797 if (ret < 0) 798 low = mid + 1; 799 else if (ret > 0) 800 high = mid; 801 else { 802 *slot = mid; 803 return 0; 804 } 805 } 806 *slot = low; 807 return 1; 808 } 809 810 /* 811 * simple bin_search frontend that does the right thing for 812 * leaves vs nodes 813 */ 814 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 815 int level, int *slot) 816 { 817 if (level == 0) { 818 return generic_bin_search(eb, 819 offsetof(struct btrfs_leaf, items), 820 sizeof(struct btrfs_item), 821 key, btrfs_header_nritems(eb), 822 slot); 823 } else { 824 return generic_bin_search(eb, 825 offsetof(struct btrfs_node, ptrs), 826 sizeof(struct btrfs_key_ptr), 827 key, btrfs_header_nritems(eb), 828 slot); 829 } 830 return -1; 831 } 832 833 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 834 int level, int *slot) 835 { 836 return bin_search(eb, key, level, slot); 837 } 838 839 static void root_add_used(struct btrfs_root *root, u32 size) 840 { 841 spin_lock(&root->accounting_lock); 842 btrfs_set_root_used(&root->root_item, 843 btrfs_root_used(&root->root_item) + size); 844 spin_unlock(&root->accounting_lock); 845 } 846 847 static void root_sub_used(struct btrfs_root *root, u32 size) 848 { 849 spin_lock(&root->accounting_lock); 850 btrfs_set_root_used(&root->root_item, 851 btrfs_root_used(&root->root_item) - size); 852 spin_unlock(&root->accounting_lock); 853 } 854 855 /* given a node and slot number, this reads the blocks it points to. The 856 * extent buffer is returned with a reference taken (but unlocked). 857 * NULL is returned on error. 858 */ 859 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, 860 struct extent_buffer *parent, int slot) 861 { 862 int level = btrfs_header_level(parent); 863 if (slot < 0) 864 return NULL; 865 if (slot >= btrfs_header_nritems(parent)) 866 return NULL; 867 868 BUG_ON(level == 0); 869 870 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 871 btrfs_level_size(root, level - 1), 872 btrfs_node_ptr_generation(parent, slot)); 873 } 874 875 /* 876 * node level balancing, used to make sure nodes are in proper order for 877 * item deletion. We balance from the top down, so we have to make sure 878 * that a deletion won't leave an node completely empty later on. 879 */ 880 static noinline int balance_level(struct btrfs_trans_handle *trans, 881 struct btrfs_root *root, 882 struct btrfs_path *path, int level) 883 { 884 struct extent_buffer *right = NULL; 885 struct extent_buffer *mid; 886 struct extent_buffer *left = NULL; 887 struct extent_buffer *parent = NULL; 888 int ret = 0; 889 int wret; 890 int pslot; 891 int orig_slot = path->slots[level]; 892 u64 orig_ptr; 893 894 if (level == 0) 895 return 0; 896 897 mid = path->nodes[level]; 898 899 WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && 900 path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); 901 WARN_ON(btrfs_header_generation(mid) != trans->transid); 902 903 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 904 905 if (level < BTRFS_MAX_LEVEL - 1) 906 parent = path->nodes[level + 1]; 907 pslot = path->slots[level + 1]; 908 909 /* 910 * deal with the case where there is only one pointer in the root 911 * by promoting the node below to a root 912 */ 913 if (!parent) { 914 struct extent_buffer *child; 915 916 if (btrfs_header_nritems(mid) != 1) 917 return 0; 918 919 /* promote the child to a root */ 920 child = read_node_slot(root, mid, 0); 921 BUG_ON(!child); 922 btrfs_tree_lock(child); 923 btrfs_set_lock_blocking(child); 924 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 925 if (ret) { 926 btrfs_tree_unlock(child); 927 free_extent_buffer(child); 928 goto enospc; 929 } 930 931 rcu_assign_pointer(root->node, child); 932 933 add_root_to_dirty_list(root); 934 btrfs_tree_unlock(child); 935 936 path->locks[level] = 0; 937 path->nodes[level] = NULL; 938 clean_tree_block(trans, root, mid); 939 btrfs_tree_unlock(mid); 940 /* once for the path */ 941 free_extent_buffer(mid); 942 943 root_sub_used(root, mid->len); 944 btrfs_free_tree_block(trans, root, mid, 0, 1); 945 /* once for the root ptr */ 946 free_extent_buffer(mid); 947 return 0; 948 } 949 if (btrfs_header_nritems(mid) > 950 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 951 return 0; 952 953 btrfs_header_nritems(mid); 954 955 left = read_node_slot(root, parent, pslot - 1); 956 if (left) { 957 btrfs_tree_lock(left); 958 btrfs_set_lock_blocking(left); 959 wret = btrfs_cow_block(trans, root, left, 960 parent, pslot - 1, &left); 961 if (wret) { 962 ret = wret; 963 goto enospc; 964 } 965 } 966 right = read_node_slot(root, parent, pslot + 1); 967 if (right) { 968 btrfs_tree_lock(right); 969 btrfs_set_lock_blocking(right); 970 wret = btrfs_cow_block(trans, root, right, 971 parent, pslot + 1, &right); 972 if (wret) { 973 ret = wret; 974 goto enospc; 975 } 976 } 977 978 /* first, try to make some room in the middle buffer */ 979 if (left) { 980 orig_slot += btrfs_header_nritems(left); 981 wret = push_node_left(trans, root, left, mid, 1); 982 if (wret < 0) 983 ret = wret; 984 btrfs_header_nritems(mid); 985 } 986 987 /* 988 * then try to empty the right most buffer into the middle 989 */ 990 if (right) { 991 wret = push_node_left(trans, root, mid, right, 1); 992 if (wret < 0 && wret != -ENOSPC) 993 ret = wret; 994 if (btrfs_header_nritems(right) == 0) { 995 clean_tree_block(trans, root, right); 996 btrfs_tree_unlock(right); 997 wret = del_ptr(trans, root, path, level + 1, pslot + 998 1); 999 if (wret) 1000 ret = wret; 1001 root_sub_used(root, right->len); 1002 btrfs_free_tree_block(trans, root, right, 0, 1); 1003 free_extent_buffer(right); 1004 right = NULL; 1005 } else { 1006 struct btrfs_disk_key right_key; 1007 btrfs_node_key(right, &right_key, 0); 1008 btrfs_set_node_key(parent, &right_key, pslot + 1); 1009 btrfs_mark_buffer_dirty(parent); 1010 } 1011 } 1012 if (btrfs_header_nritems(mid) == 1) { 1013 /* 1014 * we're not allowed to leave a node with one item in the 1015 * tree during a delete. A deletion from lower in the tree 1016 * could try to delete the only pointer in this node. 1017 * So, pull some keys from the left. 1018 * There has to be a left pointer at this point because 1019 * otherwise we would have pulled some pointers from the 1020 * right 1021 */ 1022 BUG_ON(!left); 1023 wret = balance_node_right(trans, root, mid, left); 1024 if (wret < 0) { 1025 ret = wret; 1026 goto enospc; 1027 } 1028 if (wret == 1) { 1029 wret = push_node_left(trans, root, left, mid, 1); 1030 if (wret < 0) 1031 ret = wret; 1032 } 1033 BUG_ON(wret == 1); 1034 } 1035 if (btrfs_header_nritems(mid) == 0) { 1036 clean_tree_block(trans, root, mid); 1037 btrfs_tree_unlock(mid); 1038 wret = del_ptr(trans, root, path, level + 1, pslot); 1039 if (wret) 1040 ret = wret; 1041 root_sub_used(root, mid->len); 1042 btrfs_free_tree_block(trans, root, mid, 0, 1); 1043 free_extent_buffer(mid); 1044 mid = NULL; 1045 } else { 1046 /* update the parent key to reflect our changes */ 1047 struct btrfs_disk_key mid_key; 1048 btrfs_node_key(mid, &mid_key, 0); 1049 btrfs_set_node_key(parent, &mid_key, pslot); 1050 btrfs_mark_buffer_dirty(parent); 1051 } 1052 1053 /* update the path */ 1054 if (left) { 1055 if (btrfs_header_nritems(left) > orig_slot) { 1056 extent_buffer_get(left); 1057 /* left was locked after cow */ 1058 path->nodes[level] = left; 1059 path->slots[level + 1] -= 1; 1060 path->slots[level] = orig_slot; 1061 if (mid) { 1062 btrfs_tree_unlock(mid); 1063 free_extent_buffer(mid); 1064 } 1065 } else { 1066 orig_slot -= btrfs_header_nritems(left); 1067 path->slots[level] = orig_slot; 1068 } 1069 } 1070 /* double check we haven't messed things up */ 1071 if (orig_ptr != 1072 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1073 BUG(); 1074 enospc: 1075 if (right) { 1076 btrfs_tree_unlock(right); 1077 free_extent_buffer(right); 1078 } 1079 if (left) { 1080 if (path->nodes[level] != left) 1081 btrfs_tree_unlock(left); 1082 free_extent_buffer(left); 1083 } 1084 return ret; 1085 } 1086 1087 /* Node balancing for insertion. Here we only split or push nodes around 1088 * when they are completely full. This is also done top down, so we 1089 * have to be pessimistic. 1090 */ 1091 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 1092 struct btrfs_root *root, 1093 struct btrfs_path *path, int level) 1094 { 1095 struct extent_buffer *right = NULL; 1096 struct extent_buffer *mid; 1097 struct extent_buffer *left = NULL; 1098 struct extent_buffer *parent = NULL; 1099 int ret = 0; 1100 int wret; 1101 int pslot; 1102 int orig_slot = path->slots[level]; 1103 1104 if (level == 0) 1105 return 1; 1106 1107 mid = path->nodes[level]; 1108 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1109 1110 if (level < BTRFS_MAX_LEVEL - 1) 1111 parent = path->nodes[level + 1]; 1112 pslot = path->slots[level + 1]; 1113 1114 if (!parent) 1115 return 1; 1116 1117 left = read_node_slot(root, parent, pslot - 1); 1118 1119 /* first, try to make some room in the middle buffer */ 1120 if (left) { 1121 u32 left_nr; 1122 1123 btrfs_tree_lock(left); 1124 btrfs_set_lock_blocking(left); 1125 1126 left_nr = btrfs_header_nritems(left); 1127 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1128 wret = 1; 1129 } else { 1130 ret = btrfs_cow_block(trans, root, left, parent, 1131 pslot - 1, &left); 1132 if (ret) 1133 wret = 1; 1134 else { 1135 wret = push_node_left(trans, root, 1136 left, mid, 0); 1137 } 1138 } 1139 if (wret < 0) 1140 ret = wret; 1141 if (wret == 0) { 1142 struct btrfs_disk_key disk_key; 1143 orig_slot += left_nr; 1144 btrfs_node_key(mid, &disk_key, 0); 1145 btrfs_set_node_key(parent, &disk_key, pslot); 1146 btrfs_mark_buffer_dirty(parent); 1147 if (btrfs_header_nritems(left) > orig_slot) { 1148 path->nodes[level] = left; 1149 path->slots[level + 1] -= 1; 1150 path->slots[level] = orig_slot; 1151 btrfs_tree_unlock(mid); 1152 free_extent_buffer(mid); 1153 } else { 1154 orig_slot -= 1155 btrfs_header_nritems(left); 1156 path->slots[level] = orig_slot; 1157 btrfs_tree_unlock(left); 1158 free_extent_buffer(left); 1159 } 1160 return 0; 1161 } 1162 btrfs_tree_unlock(left); 1163 free_extent_buffer(left); 1164 } 1165 right = read_node_slot(root, parent, pslot + 1); 1166 1167 /* 1168 * then try to empty the right most buffer into the middle 1169 */ 1170 if (right) { 1171 u32 right_nr; 1172 1173 btrfs_tree_lock(right); 1174 btrfs_set_lock_blocking(right); 1175 1176 right_nr = btrfs_header_nritems(right); 1177 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1178 wret = 1; 1179 } else { 1180 ret = btrfs_cow_block(trans, root, right, 1181 parent, pslot + 1, 1182 &right); 1183 if (ret) 1184 wret = 1; 1185 else { 1186 wret = balance_node_right(trans, root, 1187 right, mid); 1188 } 1189 } 1190 if (wret < 0) 1191 ret = wret; 1192 if (wret == 0) { 1193 struct btrfs_disk_key disk_key; 1194 1195 btrfs_node_key(right, &disk_key, 0); 1196 btrfs_set_node_key(parent, &disk_key, pslot + 1); 1197 btrfs_mark_buffer_dirty(parent); 1198 1199 if (btrfs_header_nritems(mid) <= orig_slot) { 1200 path->nodes[level] = right; 1201 path->slots[level + 1] += 1; 1202 path->slots[level] = orig_slot - 1203 btrfs_header_nritems(mid); 1204 btrfs_tree_unlock(mid); 1205 free_extent_buffer(mid); 1206 } else { 1207 btrfs_tree_unlock(right); 1208 free_extent_buffer(right); 1209 } 1210 return 0; 1211 } 1212 btrfs_tree_unlock(right); 1213 free_extent_buffer(right); 1214 } 1215 return 1; 1216 } 1217 1218 /* 1219 * readahead one full node of leaves, finding things that are close 1220 * to the block in 'slot', and triggering ra on them. 1221 */ 1222 static void reada_for_search(struct btrfs_root *root, 1223 struct btrfs_path *path, 1224 int level, int slot, u64 objectid) 1225 { 1226 struct extent_buffer *node; 1227 struct btrfs_disk_key disk_key; 1228 u32 nritems; 1229 u64 search; 1230 u64 target; 1231 u64 nread = 0; 1232 u64 gen; 1233 int direction = path->reada; 1234 struct extent_buffer *eb; 1235 u32 nr; 1236 u32 blocksize; 1237 u32 nscan = 0; 1238 1239 if (level != 1) 1240 return; 1241 1242 if (!path->nodes[level]) 1243 return; 1244 1245 node = path->nodes[level]; 1246 1247 search = btrfs_node_blockptr(node, slot); 1248 blocksize = btrfs_level_size(root, level - 1); 1249 eb = btrfs_find_tree_block(root, search, blocksize); 1250 if (eb) { 1251 free_extent_buffer(eb); 1252 return; 1253 } 1254 1255 target = search; 1256 1257 nritems = btrfs_header_nritems(node); 1258 nr = slot; 1259 1260 while (1) { 1261 if (direction < 0) { 1262 if (nr == 0) 1263 break; 1264 nr--; 1265 } else if (direction > 0) { 1266 nr++; 1267 if (nr >= nritems) 1268 break; 1269 } 1270 if (path->reada < 0 && objectid) { 1271 btrfs_node_key(node, &disk_key, nr); 1272 if (btrfs_disk_key_objectid(&disk_key) != objectid) 1273 break; 1274 } 1275 search = btrfs_node_blockptr(node, nr); 1276 if ((search <= target && target - search <= 65536) || 1277 (search > target && search - target <= 65536)) { 1278 gen = btrfs_node_ptr_generation(node, nr); 1279 readahead_tree_block(root, search, blocksize, gen); 1280 nread += blocksize; 1281 } 1282 nscan++; 1283 if ((nread > 65536 || nscan > 32)) 1284 break; 1285 } 1286 } 1287 1288 /* 1289 * returns -EAGAIN if it had to drop the path, or zero if everything was in 1290 * cache 1291 */ 1292 static noinline int reada_for_balance(struct btrfs_root *root, 1293 struct btrfs_path *path, int level) 1294 { 1295 int slot; 1296 int nritems; 1297 struct extent_buffer *parent; 1298 struct extent_buffer *eb; 1299 u64 gen; 1300 u64 block1 = 0; 1301 u64 block2 = 0; 1302 int ret = 0; 1303 int blocksize; 1304 1305 parent = path->nodes[level + 1]; 1306 if (!parent) 1307 return 0; 1308 1309 nritems = btrfs_header_nritems(parent); 1310 slot = path->slots[level + 1]; 1311 blocksize = btrfs_level_size(root, level); 1312 1313 if (slot > 0) { 1314 block1 = btrfs_node_blockptr(parent, slot - 1); 1315 gen = btrfs_node_ptr_generation(parent, slot - 1); 1316 eb = btrfs_find_tree_block(root, block1, blocksize); 1317 if (eb && btrfs_buffer_uptodate(eb, gen)) 1318 block1 = 0; 1319 free_extent_buffer(eb); 1320 } 1321 if (slot + 1 < nritems) { 1322 block2 = btrfs_node_blockptr(parent, slot + 1); 1323 gen = btrfs_node_ptr_generation(parent, slot + 1); 1324 eb = btrfs_find_tree_block(root, block2, blocksize); 1325 if (eb && btrfs_buffer_uptodate(eb, gen)) 1326 block2 = 0; 1327 free_extent_buffer(eb); 1328 } 1329 if (block1 || block2) { 1330 ret = -EAGAIN; 1331 1332 /* release the whole path */ 1333 btrfs_release_path(path); 1334 1335 /* read the blocks */ 1336 if (block1) 1337 readahead_tree_block(root, block1, blocksize, 0); 1338 if (block2) 1339 readahead_tree_block(root, block2, blocksize, 0); 1340 1341 if (block1) { 1342 eb = read_tree_block(root, block1, blocksize, 0); 1343 free_extent_buffer(eb); 1344 } 1345 if (block2) { 1346 eb = read_tree_block(root, block2, blocksize, 0); 1347 free_extent_buffer(eb); 1348 } 1349 } 1350 return ret; 1351 } 1352 1353 1354 /* 1355 * when we walk down the tree, it is usually safe to unlock the higher layers 1356 * in the tree. The exceptions are when our path goes through slot 0, because 1357 * operations on the tree might require changing key pointers higher up in the 1358 * tree. 1359 * 1360 * callers might also have set path->keep_locks, which tells this code to keep 1361 * the lock if the path points to the last slot in the block. This is part of 1362 * walking through the tree, and selecting the next slot in the higher block. 1363 * 1364 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 1365 * if lowest_unlock is 1, level 0 won't be unlocked 1366 */ 1367 static noinline void unlock_up(struct btrfs_path *path, int level, 1368 int lowest_unlock) 1369 { 1370 int i; 1371 int skip_level = level; 1372 int no_skips = 0; 1373 struct extent_buffer *t; 1374 1375 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1376 if (!path->nodes[i]) 1377 break; 1378 if (!path->locks[i]) 1379 break; 1380 if (!no_skips && path->slots[i] == 0) { 1381 skip_level = i + 1; 1382 continue; 1383 } 1384 if (!no_skips && path->keep_locks) { 1385 u32 nritems; 1386 t = path->nodes[i]; 1387 nritems = btrfs_header_nritems(t); 1388 if (nritems < 1 || path->slots[i] >= nritems - 1) { 1389 skip_level = i + 1; 1390 continue; 1391 } 1392 } 1393 if (skip_level < i && i >= lowest_unlock) 1394 no_skips = 1; 1395 1396 t = path->nodes[i]; 1397 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1398 btrfs_tree_unlock_rw(t, path->locks[i]); 1399 path->locks[i] = 0; 1400 } 1401 } 1402 } 1403 1404 /* 1405 * This releases any locks held in the path starting at level and 1406 * going all the way up to the root. 1407 * 1408 * btrfs_search_slot will keep the lock held on higher nodes in a few 1409 * corner cases, such as COW of the block at slot zero in the node. This 1410 * ignores those rules, and it should only be called when there are no 1411 * more updates to be done higher up in the tree. 1412 */ 1413 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 1414 { 1415 int i; 1416 1417 if (path->keep_locks) 1418 return; 1419 1420 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1421 if (!path->nodes[i]) 1422 continue; 1423 if (!path->locks[i]) 1424 continue; 1425 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); 1426 path->locks[i] = 0; 1427 } 1428 } 1429 1430 /* 1431 * helper function for btrfs_search_slot. The goal is to find a block 1432 * in cache without setting the path to blocking. If we find the block 1433 * we return zero and the path is unchanged. 1434 * 1435 * If we can't find the block, we set the path blocking and do some 1436 * reada. -EAGAIN is returned and the search must be repeated. 1437 */ 1438 static int 1439 read_block_for_search(struct btrfs_trans_handle *trans, 1440 struct btrfs_root *root, struct btrfs_path *p, 1441 struct extent_buffer **eb_ret, int level, int slot, 1442 struct btrfs_key *key) 1443 { 1444 u64 blocknr; 1445 u64 gen; 1446 u32 blocksize; 1447 struct extent_buffer *b = *eb_ret; 1448 struct extent_buffer *tmp; 1449 int ret; 1450 1451 blocknr = btrfs_node_blockptr(b, slot); 1452 gen = btrfs_node_ptr_generation(b, slot); 1453 blocksize = btrfs_level_size(root, level - 1); 1454 1455 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1456 if (tmp) { 1457 if (btrfs_buffer_uptodate(tmp, 0)) { 1458 if (btrfs_buffer_uptodate(tmp, gen)) { 1459 /* 1460 * we found an up to date block without 1461 * sleeping, return 1462 * right away 1463 */ 1464 *eb_ret = tmp; 1465 return 0; 1466 } 1467 /* the pages were up to date, but we failed 1468 * the generation number check. Do a full 1469 * read for the generation number that is correct. 1470 * We must do this without dropping locks so 1471 * we can trust our generation number 1472 */ 1473 free_extent_buffer(tmp); 1474 btrfs_set_path_blocking(p); 1475 1476 tmp = read_tree_block(root, blocknr, blocksize, gen); 1477 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1478 *eb_ret = tmp; 1479 return 0; 1480 } 1481 free_extent_buffer(tmp); 1482 btrfs_release_path(p); 1483 return -EIO; 1484 } 1485 } 1486 1487 /* 1488 * reduce lock contention at high levels 1489 * of the btree by dropping locks before 1490 * we read. Don't release the lock on the current 1491 * level because we need to walk this node to figure 1492 * out which blocks to read. 1493 */ 1494 btrfs_unlock_up_safe(p, level + 1); 1495 btrfs_set_path_blocking(p); 1496 1497 free_extent_buffer(tmp); 1498 if (p->reada) 1499 reada_for_search(root, p, level, slot, key->objectid); 1500 1501 btrfs_release_path(p); 1502 1503 ret = -EAGAIN; 1504 tmp = read_tree_block(root, blocknr, blocksize, 0); 1505 if (tmp) { 1506 /* 1507 * If the read above didn't mark this buffer up to date, 1508 * it will never end up being up to date. Set ret to EIO now 1509 * and give up so that our caller doesn't loop forever 1510 * on our EAGAINs. 1511 */ 1512 if (!btrfs_buffer_uptodate(tmp, 0)) 1513 ret = -EIO; 1514 free_extent_buffer(tmp); 1515 } 1516 return ret; 1517 } 1518 1519 /* 1520 * helper function for btrfs_search_slot. This does all of the checks 1521 * for node-level blocks and does any balancing required based on 1522 * the ins_len. 1523 * 1524 * If no extra work was required, zero is returned. If we had to 1525 * drop the path, -EAGAIN is returned and btrfs_search_slot must 1526 * start over 1527 */ 1528 static int 1529 setup_nodes_for_search(struct btrfs_trans_handle *trans, 1530 struct btrfs_root *root, struct btrfs_path *p, 1531 struct extent_buffer *b, int level, int ins_len, 1532 int *write_lock_level) 1533 { 1534 int ret; 1535 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 1536 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1537 int sret; 1538 1539 if (*write_lock_level < level + 1) { 1540 *write_lock_level = level + 1; 1541 btrfs_release_path(p); 1542 goto again; 1543 } 1544 1545 sret = reada_for_balance(root, p, level); 1546 if (sret) 1547 goto again; 1548 1549 btrfs_set_path_blocking(p); 1550 sret = split_node(trans, root, p, level); 1551 btrfs_clear_path_blocking(p, NULL, 0); 1552 1553 BUG_ON(sret > 0); 1554 if (sret) { 1555 ret = sret; 1556 goto done; 1557 } 1558 b = p->nodes[level]; 1559 } else if (ins_len < 0 && btrfs_header_nritems(b) < 1560 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { 1561 int sret; 1562 1563 if (*write_lock_level < level + 1) { 1564 *write_lock_level = level + 1; 1565 btrfs_release_path(p); 1566 goto again; 1567 } 1568 1569 sret = reada_for_balance(root, p, level); 1570 if (sret) 1571 goto again; 1572 1573 btrfs_set_path_blocking(p); 1574 sret = balance_level(trans, root, p, level); 1575 btrfs_clear_path_blocking(p, NULL, 0); 1576 1577 if (sret) { 1578 ret = sret; 1579 goto done; 1580 } 1581 b = p->nodes[level]; 1582 if (!b) { 1583 btrfs_release_path(p); 1584 goto again; 1585 } 1586 BUG_ON(btrfs_header_nritems(b) == 1); 1587 } 1588 return 0; 1589 1590 again: 1591 ret = -EAGAIN; 1592 done: 1593 return ret; 1594 } 1595 1596 /* 1597 * look for key in the tree. path is filled in with nodes along the way 1598 * if key is found, we return zero and you can find the item in the leaf 1599 * level of the path (level 0) 1600 * 1601 * If the key isn't found, the path points to the slot where it should 1602 * be inserted, and 1 is returned. If there are other errors during the 1603 * search a negative error number is returned. 1604 * 1605 * if ins_len > 0, nodes and leaves will be split as we walk down the 1606 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 1607 * possible) 1608 */ 1609 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1610 *root, struct btrfs_key *key, struct btrfs_path *p, int 1611 ins_len, int cow) 1612 { 1613 struct extent_buffer *b; 1614 int slot; 1615 int ret; 1616 int err; 1617 int level; 1618 int lowest_unlock = 1; 1619 int root_lock; 1620 /* everything at write_lock_level or lower must be write locked */ 1621 int write_lock_level = 0; 1622 u8 lowest_level = 0; 1623 1624 lowest_level = p->lowest_level; 1625 WARN_ON(lowest_level && ins_len > 0); 1626 WARN_ON(p->nodes[0] != NULL); 1627 1628 if (ins_len < 0) { 1629 lowest_unlock = 2; 1630 1631 /* when we are removing items, we might have to go up to level 1632 * two as we update tree pointers Make sure we keep write 1633 * for those levels as well 1634 */ 1635 write_lock_level = 2; 1636 } else if (ins_len > 0) { 1637 /* 1638 * for inserting items, make sure we have a write lock on 1639 * level 1 so we can update keys 1640 */ 1641 write_lock_level = 1; 1642 } 1643 1644 if (!cow) 1645 write_lock_level = -1; 1646 1647 if (cow && (p->keep_locks || p->lowest_level)) 1648 write_lock_level = BTRFS_MAX_LEVEL; 1649 1650 again: 1651 /* 1652 * we try very hard to do read locks on the root 1653 */ 1654 root_lock = BTRFS_READ_LOCK; 1655 level = 0; 1656 if (p->search_commit_root) { 1657 /* 1658 * the commit roots are read only 1659 * so we always do read locks 1660 */ 1661 b = root->commit_root; 1662 extent_buffer_get(b); 1663 level = btrfs_header_level(b); 1664 if (!p->skip_locking) 1665 btrfs_tree_read_lock(b); 1666 } else { 1667 if (p->skip_locking) { 1668 b = btrfs_root_node(root); 1669 level = btrfs_header_level(b); 1670 } else { 1671 /* we don't know the level of the root node 1672 * until we actually have it read locked 1673 */ 1674 b = btrfs_read_lock_root_node(root); 1675 level = btrfs_header_level(b); 1676 if (level <= write_lock_level) { 1677 /* whoops, must trade for write lock */ 1678 btrfs_tree_read_unlock(b); 1679 free_extent_buffer(b); 1680 b = btrfs_lock_root_node(root); 1681 root_lock = BTRFS_WRITE_LOCK; 1682 1683 /* the level might have changed, check again */ 1684 level = btrfs_header_level(b); 1685 } 1686 } 1687 } 1688 p->nodes[level] = b; 1689 if (!p->skip_locking) 1690 p->locks[level] = root_lock; 1691 1692 while (b) { 1693 level = btrfs_header_level(b); 1694 1695 /* 1696 * setup the path here so we can release it under lock 1697 * contention with the cow code 1698 */ 1699 if (cow) { 1700 /* 1701 * if we don't really need to cow this block 1702 * then we don't want to set the path blocking, 1703 * so we test it here 1704 */ 1705 if (!should_cow_block(trans, root, b)) 1706 goto cow_done; 1707 1708 btrfs_set_path_blocking(p); 1709 1710 /* 1711 * must have write locks on this node and the 1712 * parent 1713 */ 1714 if (level + 1 > write_lock_level) { 1715 write_lock_level = level + 1; 1716 btrfs_release_path(p); 1717 goto again; 1718 } 1719 1720 err = btrfs_cow_block(trans, root, b, 1721 p->nodes[level + 1], 1722 p->slots[level + 1], &b); 1723 if (err) { 1724 ret = err; 1725 goto done; 1726 } 1727 } 1728 cow_done: 1729 BUG_ON(!cow && ins_len); 1730 1731 p->nodes[level] = b; 1732 btrfs_clear_path_blocking(p, NULL, 0); 1733 1734 /* 1735 * we have a lock on b and as long as we aren't changing 1736 * the tree, there is no way to for the items in b to change. 1737 * It is safe to drop the lock on our parent before we 1738 * go through the expensive btree search on b. 1739 * 1740 * If cow is true, then we might be changing slot zero, 1741 * which may require changing the parent. So, we can't 1742 * drop the lock until after we know which slot we're 1743 * operating on. 1744 */ 1745 if (!cow) 1746 btrfs_unlock_up_safe(p, level + 1); 1747 1748 ret = bin_search(b, key, level, &slot); 1749 1750 if (level != 0) { 1751 int dec = 0; 1752 if (ret && slot > 0) { 1753 dec = 1; 1754 slot -= 1; 1755 } 1756 p->slots[level] = slot; 1757 err = setup_nodes_for_search(trans, root, p, b, level, 1758 ins_len, &write_lock_level); 1759 if (err == -EAGAIN) 1760 goto again; 1761 if (err) { 1762 ret = err; 1763 goto done; 1764 } 1765 b = p->nodes[level]; 1766 slot = p->slots[level]; 1767 1768 /* 1769 * slot 0 is special, if we change the key 1770 * we have to update the parent pointer 1771 * which means we must have a write lock 1772 * on the parent 1773 */ 1774 if (slot == 0 && cow && 1775 write_lock_level < level + 1) { 1776 write_lock_level = level + 1; 1777 btrfs_release_path(p); 1778 goto again; 1779 } 1780 1781 unlock_up(p, level, lowest_unlock); 1782 1783 if (level == lowest_level) { 1784 if (dec) 1785 p->slots[level]++; 1786 goto done; 1787 } 1788 1789 err = read_block_for_search(trans, root, p, 1790 &b, level, slot, key); 1791 if (err == -EAGAIN) 1792 goto again; 1793 if (err) { 1794 ret = err; 1795 goto done; 1796 } 1797 1798 if (!p->skip_locking) { 1799 level = btrfs_header_level(b); 1800 if (level <= write_lock_level) { 1801 err = btrfs_try_tree_write_lock(b); 1802 if (!err) { 1803 btrfs_set_path_blocking(p); 1804 btrfs_tree_lock(b); 1805 btrfs_clear_path_blocking(p, b, 1806 BTRFS_WRITE_LOCK); 1807 } 1808 p->locks[level] = BTRFS_WRITE_LOCK; 1809 } else { 1810 err = btrfs_try_tree_read_lock(b); 1811 if (!err) { 1812 btrfs_set_path_blocking(p); 1813 btrfs_tree_read_lock(b); 1814 btrfs_clear_path_blocking(p, b, 1815 BTRFS_READ_LOCK); 1816 } 1817 p->locks[level] = BTRFS_READ_LOCK; 1818 } 1819 p->nodes[level] = b; 1820 } 1821 } else { 1822 p->slots[level] = slot; 1823 if (ins_len > 0 && 1824 btrfs_leaf_free_space(root, b) < ins_len) { 1825 if (write_lock_level < 1) { 1826 write_lock_level = 1; 1827 btrfs_release_path(p); 1828 goto again; 1829 } 1830 1831 btrfs_set_path_blocking(p); 1832 err = split_leaf(trans, root, key, 1833 p, ins_len, ret == 0); 1834 btrfs_clear_path_blocking(p, NULL, 0); 1835 1836 BUG_ON(err > 0); 1837 if (err) { 1838 ret = err; 1839 goto done; 1840 } 1841 } 1842 if (!p->search_for_split) 1843 unlock_up(p, level, lowest_unlock); 1844 goto done; 1845 } 1846 } 1847 ret = 1; 1848 done: 1849 /* 1850 * we don't really know what they plan on doing with the path 1851 * from here on, so for now just mark it as blocking 1852 */ 1853 if (!p->leave_spinning) 1854 btrfs_set_path_blocking(p); 1855 if (ret < 0) 1856 btrfs_release_path(p); 1857 return ret; 1858 } 1859 1860 /* 1861 * adjust the pointers going up the tree, starting at level 1862 * making sure the right key of each node is points to 'key'. 1863 * This is used after shifting pointers to the left, so it stops 1864 * fixing up pointers when a given leaf/node is not in slot 0 of the 1865 * higher levels 1866 * 1867 * If this fails to write a tree block, it returns -1, but continues 1868 * fixing up the blocks in ram so the tree is consistent. 1869 */ 1870 static int fixup_low_keys(struct btrfs_trans_handle *trans, 1871 struct btrfs_root *root, struct btrfs_path *path, 1872 struct btrfs_disk_key *key, int level) 1873 { 1874 int i; 1875 int ret = 0; 1876 struct extent_buffer *t; 1877 1878 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1879 int tslot = path->slots[i]; 1880 if (!path->nodes[i]) 1881 break; 1882 t = path->nodes[i]; 1883 btrfs_set_node_key(t, key, tslot); 1884 btrfs_mark_buffer_dirty(path->nodes[i]); 1885 if (tslot != 0) 1886 break; 1887 } 1888 return ret; 1889 } 1890 1891 /* 1892 * update item key. 1893 * 1894 * This function isn't completely safe. It's the caller's responsibility 1895 * that the new key won't break the order 1896 */ 1897 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1898 struct btrfs_root *root, struct btrfs_path *path, 1899 struct btrfs_key *new_key) 1900 { 1901 struct btrfs_disk_key disk_key; 1902 struct extent_buffer *eb; 1903 int slot; 1904 1905 eb = path->nodes[0]; 1906 slot = path->slots[0]; 1907 if (slot > 0) { 1908 btrfs_item_key(eb, &disk_key, slot - 1); 1909 if (comp_keys(&disk_key, new_key) >= 0) 1910 return -1; 1911 } 1912 if (slot < btrfs_header_nritems(eb) - 1) { 1913 btrfs_item_key(eb, &disk_key, slot + 1); 1914 if (comp_keys(&disk_key, new_key) <= 0) 1915 return -1; 1916 } 1917 1918 btrfs_cpu_key_to_disk(&disk_key, new_key); 1919 btrfs_set_item_key(eb, &disk_key, slot); 1920 btrfs_mark_buffer_dirty(eb); 1921 if (slot == 0) 1922 fixup_low_keys(trans, root, path, &disk_key, 1); 1923 return 0; 1924 } 1925 1926 /* 1927 * try to push data from one node into the next node left in the 1928 * tree. 1929 * 1930 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 1931 * error, and > 0 if there was no room in the left hand block. 1932 */ 1933 static int push_node_left(struct btrfs_trans_handle *trans, 1934 struct btrfs_root *root, struct extent_buffer *dst, 1935 struct extent_buffer *src, int empty) 1936 { 1937 int push_items = 0; 1938 int src_nritems; 1939 int dst_nritems; 1940 int ret = 0; 1941 1942 src_nritems = btrfs_header_nritems(src); 1943 dst_nritems = btrfs_header_nritems(dst); 1944 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1945 WARN_ON(btrfs_header_generation(src) != trans->transid); 1946 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1947 1948 if (!empty && src_nritems <= 8) 1949 return 1; 1950 1951 if (push_items <= 0) 1952 return 1; 1953 1954 if (empty) { 1955 push_items = min(src_nritems, push_items); 1956 if (push_items < src_nritems) { 1957 /* leave at least 8 pointers in the node if 1958 * we aren't going to empty it 1959 */ 1960 if (src_nritems - push_items < 8) { 1961 if (push_items <= 8) 1962 return 1; 1963 push_items -= 8; 1964 } 1965 } 1966 } else 1967 push_items = min(src_nritems - 8, push_items); 1968 1969 copy_extent_buffer(dst, src, 1970 btrfs_node_key_ptr_offset(dst_nritems), 1971 btrfs_node_key_ptr_offset(0), 1972 push_items * sizeof(struct btrfs_key_ptr)); 1973 1974 if (push_items < src_nritems) { 1975 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 1976 btrfs_node_key_ptr_offset(push_items), 1977 (src_nritems - push_items) * 1978 sizeof(struct btrfs_key_ptr)); 1979 } 1980 btrfs_set_header_nritems(src, src_nritems - push_items); 1981 btrfs_set_header_nritems(dst, dst_nritems + push_items); 1982 btrfs_mark_buffer_dirty(src); 1983 btrfs_mark_buffer_dirty(dst); 1984 1985 return ret; 1986 } 1987 1988 /* 1989 * try to push data from one node into the next node right in the 1990 * tree. 1991 * 1992 * returns 0 if some ptrs were pushed, < 0 if there was some horrible 1993 * error, and > 0 if there was no room in the right hand block. 1994 * 1995 * this will only push up to 1/2 the contents of the left node over 1996 */ 1997 static int balance_node_right(struct btrfs_trans_handle *trans, 1998 struct btrfs_root *root, 1999 struct extent_buffer *dst, 2000 struct extent_buffer *src) 2001 { 2002 int push_items = 0; 2003 int max_push; 2004 int src_nritems; 2005 int dst_nritems; 2006 int ret = 0; 2007 2008 WARN_ON(btrfs_header_generation(src) != trans->transid); 2009 WARN_ON(btrfs_header_generation(dst) != trans->transid); 2010 2011 src_nritems = btrfs_header_nritems(src); 2012 dst_nritems = btrfs_header_nritems(dst); 2013 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 2014 if (push_items <= 0) 2015 return 1; 2016 2017 if (src_nritems < 4) 2018 return 1; 2019 2020 max_push = src_nritems / 2 + 1; 2021 /* don't try to empty the node */ 2022 if (max_push >= src_nritems) 2023 return 1; 2024 2025 if (max_push < push_items) 2026 push_items = max_push; 2027 2028 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 2029 btrfs_node_key_ptr_offset(0), 2030 (dst_nritems) * 2031 sizeof(struct btrfs_key_ptr)); 2032 2033 copy_extent_buffer(dst, src, 2034 btrfs_node_key_ptr_offset(0), 2035 btrfs_node_key_ptr_offset(src_nritems - push_items), 2036 push_items * sizeof(struct btrfs_key_ptr)); 2037 2038 btrfs_set_header_nritems(src, src_nritems - push_items); 2039 btrfs_set_header_nritems(dst, dst_nritems + push_items); 2040 2041 btrfs_mark_buffer_dirty(src); 2042 btrfs_mark_buffer_dirty(dst); 2043 2044 return ret; 2045 } 2046 2047 /* 2048 * helper function to insert a new root level in the tree. 2049 * A new node is allocated, and a single item is inserted to 2050 * point to the existing root 2051 * 2052 * returns zero on success or < 0 on failure. 2053 */ 2054 static noinline int insert_new_root(struct btrfs_trans_handle *trans, 2055 struct btrfs_root *root, 2056 struct btrfs_path *path, int level) 2057 { 2058 u64 lower_gen; 2059 struct extent_buffer *lower; 2060 struct extent_buffer *c; 2061 struct extent_buffer *old; 2062 struct btrfs_disk_key lower_key; 2063 2064 BUG_ON(path->nodes[level]); 2065 BUG_ON(path->nodes[level-1] != root->node); 2066 2067 lower = path->nodes[level-1]; 2068 if (level == 1) 2069 btrfs_item_key(lower, &lower_key, 0); 2070 else 2071 btrfs_node_key(lower, &lower_key, 0); 2072 2073 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2074 root->root_key.objectid, &lower_key, 2075 level, root->node->start, 0); 2076 if (IS_ERR(c)) 2077 return PTR_ERR(c); 2078 2079 root_add_used(root, root->nodesize); 2080 2081 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); 2082 btrfs_set_header_nritems(c, 1); 2083 btrfs_set_header_level(c, level); 2084 btrfs_set_header_bytenr(c, c->start); 2085 btrfs_set_header_generation(c, trans->transid); 2086 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 2087 btrfs_set_header_owner(c, root->root_key.objectid); 2088 2089 write_extent_buffer(c, root->fs_info->fsid, 2090 (unsigned long)btrfs_header_fsid(c), 2091 BTRFS_FSID_SIZE); 2092 2093 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 2094 (unsigned long)btrfs_header_chunk_tree_uuid(c), 2095 BTRFS_UUID_SIZE); 2096 2097 btrfs_set_node_key(c, &lower_key, 0); 2098 btrfs_set_node_blockptr(c, 0, lower->start); 2099 lower_gen = btrfs_header_generation(lower); 2100 WARN_ON(lower_gen != trans->transid); 2101 2102 btrfs_set_node_ptr_generation(c, 0, lower_gen); 2103 2104 btrfs_mark_buffer_dirty(c); 2105 2106 old = root->node; 2107 rcu_assign_pointer(root->node, c); 2108 2109 /* the super has an extra ref to root->node */ 2110 free_extent_buffer(old); 2111 2112 add_root_to_dirty_list(root); 2113 extent_buffer_get(c); 2114 path->nodes[level] = c; 2115 path->locks[level] = BTRFS_WRITE_LOCK; 2116 path->slots[level] = 0; 2117 return 0; 2118 } 2119 2120 /* 2121 * worker function to insert a single pointer in a node. 2122 * the node should have enough room for the pointer already 2123 * 2124 * slot and level indicate where you want the key to go, and 2125 * blocknr is the block the key points to. 2126 * 2127 * returns zero on success and < 0 on any error 2128 */ 2129 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 2130 *root, struct btrfs_path *path, struct btrfs_disk_key 2131 *key, u64 bytenr, int slot, int level) 2132 { 2133 struct extent_buffer *lower; 2134 int nritems; 2135 2136 BUG_ON(!path->nodes[level]); 2137 btrfs_assert_tree_locked(path->nodes[level]); 2138 lower = path->nodes[level]; 2139 nritems = btrfs_header_nritems(lower); 2140 BUG_ON(slot > nritems); 2141 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2142 BUG(); 2143 if (slot != nritems) { 2144 memmove_extent_buffer(lower, 2145 btrfs_node_key_ptr_offset(slot + 1), 2146 btrfs_node_key_ptr_offset(slot), 2147 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 2148 } 2149 btrfs_set_node_key(lower, key, slot); 2150 btrfs_set_node_blockptr(lower, slot, bytenr); 2151 WARN_ON(trans->transid == 0); 2152 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 2153 btrfs_set_header_nritems(lower, nritems + 1); 2154 btrfs_mark_buffer_dirty(lower); 2155 return 0; 2156 } 2157 2158 /* 2159 * split the node at the specified level in path in two. 2160 * The path is corrected to point to the appropriate node after the split 2161 * 2162 * Before splitting this tries to make some room in the node by pushing 2163 * left and right, if either one works, it returns right away. 2164 * 2165 * returns 0 on success and < 0 on failure 2166 */ 2167 static noinline int split_node(struct btrfs_trans_handle *trans, 2168 struct btrfs_root *root, 2169 struct btrfs_path *path, int level) 2170 { 2171 struct extent_buffer *c; 2172 struct extent_buffer *split; 2173 struct btrfs_disk_key disk_key; 2174 int mid; 2175 int ret; 2176 int wret; 2177 u32 c_nritems; 2178 2179 c = path->nodes[level]; 2180 WARN_ON(btrfs_header_generation(c) != trans->transid); 2181 if (c == root->node) { 2182 /* trying to split the root, lets make a new one */ 2183 ret = insert_new_root(trans, root, path, level + 1); 2184 if (ret) 2185 return ret; 2186 } else { 2187 ret = push_nodes_for_insert(trans, root, path, level); 2188 c = path->nodes[level]; 2189 if (!ret && btrfs_header_nritems(c) < 2190 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) 2191 return 0; 2192 if (ret < 0) 2193 return ret; 2194 } 2195 2196 c_nritems = btrfs_header_nritems(c); 2197 mid = (c_nritems + 1) / 2; 2198 btrfs_node_key(c, &disk_key, mid); 2199 2200 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2201 root->root_key.objectid, 2202 &disk_key, level, c->start, 0); 2203 if (IS_ERR(split)) 2204 return PTR_ERR(split); 2205 2206 root_add_used(root, root->nodesize); 2207 2208 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); 2209 btrfs_set_header_level(split, btrfs_header_level(c)); 2210 btrfs_set_header_bytenr(split, split->start); 2211 btrfs_set_header_generation(split, trans->transid); 2212 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 2213 btrfs_set_header_owner(split, root->root_key.objectid); 2214 write_extent_buffer(split, root->fs_info->fsid, 2215 (unsigned long)btrfs_header_fsid(split), 2216 BTRFS_FSID_SIZE); 2217 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 2218 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2219 BTRFS_UUID_SIZE); 2220 2221 2222 copy_extent_buffer(split, c, 2223 btrfs_node_key_ptr_offset(0), 2224 btrfs_node_key_ptr_offset(mid), 2225 (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 2226 btrfs_set_header_nritems(split, c_nritems - mid); 2227 btrfs_set_header_nritems(c, mid); 2228 ret = 0; 2229 2230 btrfs_mark_buffer_dirty(c); 2231 btrfs_mark_buffer_dirty(split); 2232 2233 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2234 path->slots[level + 1] + 1, 2235 level + 1); 2236 if (wret) 2237 ret = wret; 2238 2239 if (path->slots[level] >= mid) { 2240 path->slots[level] -= mid; 2241 btrfs_tree_unlock(c); 2242 free_extent_buffer(c); 2243 path->nodes[level] = split; 2244 path->slots[level + 1] += 1; 2245 } else { 2246 btrfs_tree_unlock(split); 2247 free_extent_buffer(split); 2248 } 2249 return ret; 2250 } 2251 2252 /* 2253 * how many bytes are required to store the items in a leaf. start 2254 * and nr indicate which items in the leaf to check. This totals up the 2255 * space used both by the item structs and the item data 2256 */ 2257 static int leaf_space_used(struct extent_buffer *l, int start, int nr) 2258 { 2259 int data_len; 2260 int nritems = btrfs_header_nritems(l); 2261 int end = min(nritems, start + nr) - 1; 2262 2263 if (!nr) 2264 return 0; 2265 data_len = btrfs_item_end_nr(l, start); 2266 data_len = data_len - btrfs_item_offset_nr(l, end); 2267 data_len += sizeof(struct btrfs_item) * nr; 2268 WARN_ON(data_len < 0); 2269 return data_len; 2270 } 2271 2272 /* 2273 * The space between the end of the leaf items and 2274 * the start of the leaf data. IOW, how much room 2275 * the leaf has left for both items and data 2276 */ 2277 noinline int btrfs_leaf_free_space(struct btrfs_root *root, 2278 struct extent_buffer *leaf) 2279 { 2280 int nritems = btrfs_header_nritems(leaf); 2281 int ret; 2282 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 2283 if (ret < 0) { 2284 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " 2285 "used %d nritems %d\n", 2286 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 2287 leaf_space_used(leaf, 0, nritems), nritems); 2288 } 2289 return ret; 2290 } 2291 2292 /* 2293 * min slot controls the lowest index we're willing to push to the 2294 * right. We'll push up to and including min_slot, but no lower 2295 */ 2296 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2297 struct btrfs_root *root, 2298 struct btrfs_path *path, 2299 int data_size, int empty, 2300 struct extent_buffer *right, 2301 int free_space, u32 left_nritems, 2302 u32 min_slot) 2303 { 2304 struct extent_buffer *left = path->nodes[0]; 2305 struct extent_buffer *upper = path->nodes[1]; 2306 struct btrfs_disk_key disk_key; 2307 int slot; 2308 u32 i; 2309 int push_space = 0; 2310 int push_items = 0; 2311 struct btrfs_item *item; 2312 u32 nr; 2313 u32 right_nritems; 2314 u32 data_end; 2315 u32 this_item_size; 2316 2317 if (empty) 2318 nr = 0; 2319 else 2320 nr = max_t(u32, 1, min_slot); 2321 2322 if (path->slots[0] >= left_nritems) 2323 push_space += data_size; 2324 2325 slot = path->slots[1]; 2326 i = left_nritems - 1; 2327 while (i >= nr) { 2328 item = btrfs_item_nr(left, i); 2329 2330 if (!empty && push_items > 0) { 2331 if (path->slots[0] > i) 2332 break; 2333 if (path->slots[0] == i) { 2334 int space = btrfs_leaf_free_space(root, left); 2335 if (space + push_space * 2 > free_space) 2336 break; 2337 } 2338 } 2339 2340 if (path->slots[0] == i) 2341 push_space += data_size; 2342 2343 this_item_size = btrfs_item_size(left, item); 2344 if (this_item_size + sizeof(*item) + push_space > free_space) 2345 break; 2346 2347 push_items++; 2348 push_space += this_item_size + sizeof(*item); 2349 if (i == 0) 2350 break; 2351 i--; 2352 } 2353 2354 if (push_items == 0) 2355 goto out_unlock; 2356 2357 if (!empty && push_items == left_nritems) 2358 WARN_ON(1); 2359 2360 /* push left to right */ 2361 right_nritems = btrfs_header_nritems(right); 2362 2363 push_space = btrfs_item_end_nr(left, left_nritems - push_items); 2364 push_space -= leaf_data_end(root, left); 2365 2366 /* make room in the right data area */ 2367 data_end = leaf_data_end(root, right); 2368 memmove_extent_buffer(right, 2369 btrfs_leaf_data(right) + data_end - push_space, 2370 btrfs_leaf_data(right) + data_end, 2371 BTRFS_LEAF_DATA_SIZE(root) - data_end); 2372 2373 /* copy from the left data area */ 2374 copy_extent_buffer(right, left, btrfs_leaf_data(right) + 2375 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2376 btrfs_leaf_data(left) + leaf_data_end(root, left), 2377 push_space); 2378 2379 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 2380 btrfs_item_nr_offset(0), 2381 right_nritems * sizeof(struct btrfs_item)); 2382 2383 /* copy the items from left to right */ 2384 copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 2385 btrfs_item_nr_offset(left_nritems - push_items), 2386 push_items * sizeof(struct btrfs_item)); 2387 2388 /* update the item pointers */ 2389 right_nritems += push_items; 2390 btrfs_set_header_nritems(right, right_nritems); 2391 push_space = BTRFS_LEAF_DATA_SIZE(root); 2392 for (i = 0; i < right_nritems; i++) { 2393 item = btrfs_item_nr(right, i); 2394 push_space -= btrfs_item_size(right, item); 2395 btrfs_set_item_offset(right, item, push_space); 2396 } 2397 2398 left_nritems -= push_items; 2399 btrfs_set_header_nritems(left, left_nritems); 2400 2401 if (left_nritems) 2402 btrfs_mark_buffer_dirty(left); 2403 else 2404 clean_tree_block(trans, root, left); 2405 2406 btrfs_mark_buffer_dirty(right); 2407 2408 btrfs_item_key(right, &disk_key, 0); 2409 btrfs_set_node_key(upper, &disk_key, slot + 1); 2410 btrfs_mark_buffer_dirty(upper); 2411 2412 /* then fixup the leaf pointer in the path */ 2413 if (path->slots[0] >= left_nritems) { 2414 path->slots[0] -= left_nritems; 2415 if (btrfs_header_nritems(path->nodes[0]) == 0) 2416 clean_tree_block(trans, root, path->nodes[0]); 2417 btrfs_tree_unlock(path->nodes[0]); 2418 free_extent_buffer(path->nodes[0]); 2419 path->nodes[0] = right; 2420 path->slots[1] += 1; 2421 } else { 2422 btrfs_tree_unlock(right); 2423 free_extent_buffer(right); 2424 } 2425 return 0; 2426 2427 out_unlock: 2428 btrfs_tree_unlock(right); 2429 free_extent_buffer(right); 2430 return 1; 2431 } 2432 2433 /* 2434 * push some data in the path leaf to the right, trying to free up at 2435 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2436 * 2437 * returns 1 if the push failed because the other node didn't have enough 2438 * room, 0 if everything worked out and < 0 if there were major errors. 2439 * 2440 * this will push starting from min_slot to the end of the leaf. It won't 2441 * push any slot lower than min_slot 2442 */ 2443 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2444 *root, struct btrfs_path *path, 2445 int min_data_size, int data_size, 2446 int empty, u32 min_slot) 2447 { 2448 struct extent_buffer *left = path->nodes[0]; 2449 struct extent_buffer *right; 2450 struct extent_buffer *upper; 2451 int slot; 2452 int free_space; 2453 u32 left_nritems; 2454 int ret; 2455 2456 if (!path->nodes[1]) 2457 return 1; 2458 2459 slot = path->slots[1]; 2460 upper = path->nodes[1]; 2461 if (slot >= btrfs_header_nritems(upper) - 1) 2462 return 1; 2463 2464 btrfs_assert_tree_locked(path->nodes[1]); 2465 2466 right = read_node_slot(root, upper, slot + 1); 2467 if (right == NULL) 2468 return 1; 2469 2470 btrfs_tree_lock(right); 2471 btrfs_set_lock_blocking(right); 2472 2473 free_space = btrfs_leaf_free_space(root, right); 2474 if (free_space < data_size) 2475 goto out_unlock; 2476 2477 /* cow and double check */ 2478 ret = btrfs_cow_block(trans, root, right, upper, 2479 slot + 1, &right); 2480 if (ret) 2481 goto out_unlock; 2482 2483 free_space = btrfs_leaf_free_space(root, right); 2484 if (free_space < data_size) 2485 goto out_unlock; 2486 2487 left_nritems = btrfs_header_nritems(left); 2488 if (left_nritems == 0) 2489 goto out_unlock; 2490 2491 return __push_leaf_right(trans, root, path, min_data_size, empty, 2492 right, free_space, left_nritems, min_slot); 2493 out_unlock: 2494 btrfs_tree_unlock(right); 2495 free_extent_buffer(right); 2496 return 1; 2497 } 2498 2499 /* 2500 * push some data in the path leaf to the left, trying to free up at 2501 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2502 * 2503 * max_slot can put a limit on how far into the leaf we'll push items. The 2504 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the 2505 * items 2506 */ 2507 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2508 struct btrfs_root *root, 2509 struct btrfs_path *path, int data_size, 2510 int empty, struct extent_buffer *left, 2511 int free_space, u32 right_nritems, 2512 u32 max_slot) 2513 { 2514 struct btrfs_disk_key disk_key; 2515 struct extent_buffer *right = path->nodes[0]; 2516 int i; 2517 int push_space = 0; 2518 int push_items = 0; 2519 struct btrfs_item *item; 2520 u32 old_left_nritems; 2521 u32 nr; 2522 int ret = 0; 2523 int wret; 2524 u32 this_item_size; 2525 u32 old_left_item_size; 2526 2527 if (empty) 2528 nr = min(right_nritems, max_slot); 2529 else 2530 nr = min(right_nritems - 1, max_slot); 2531 2532 for (i = 0; i < nr; i++) { 2533 item = btrfs_item_nr(right, i); 2534 2535 if (!empty && push_items > 0) { 2536 if (path->slots[0] < i) 2537 break; 2538 if (path->slots[0] == i) { 2539 int space = btrfs_leaf_free_space(root, right); 2540 if (space + push_space * 2 > free_space) 2541 break; 2542 } 2543 } 2544 2545 if (path->slots[0] == i) 2546 push_space += data_size; 2547 2548 this_item_size = btrfs_item_size(right, item); 2549 if (this_item_size + sizeof(*item) + push_space > free_space) 2550 break; 2551 2552 push_items++; 2553 push_space += this_item_size + sizeof(*item); 2554 } 2555 2556 if (push_items == 0) { 2557 ret = 1; 2558 goto out; 2559 } 2560 if (!empty && push_items == btrfs_header_nritems(right)) 2561 WARN_ON(1); 2562 2563 /* push data from right to left */ 2564 copy_extent_buffer(left, right, 2565 btrfs_item_nr_offset(btrfs_header_nritems(left)), 2566 btrfs_item_nr_offset(0), 2567 push_items * sizeof(struct btrfs_item)); 2568 2569 push_space = BTRFS_LEAF_DATA_SIZE(root) - 2570 btrfs_item_offset_nr(right, push_items - 1); 2571 2572 copy_extent_buffer(left, right, btrfs_leaf_data(left) + 2573 leaf_data_end(root, left) - push_space, 2574 btrfs_leaf_data(right) + 2575 btrfs_item_offset_nr(right, push_items - 1), 2576 push_space); 2577 old_left_nritems = btrfs_header_nritems(left); 2578 BUG_ON(old_left_nritems <= 0); 2579 2580 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 2581 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 2582 u32 ioff; 2583 2584 item = btrfs_item_nr(left, i); 2585 2586 ioff = btrfs_item_offset(left, item); 2587 btrfs_set_item_offset(left, item, 2588 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2589 } 2590 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2591 2592 /* fixup right node */ 2593 if (push_items > right_nritems) { 2594 printk(KERN_CRIT "push items %d nr %u\n", push_items, 2595 right_nritems); 2596 WARN_ON(1); 2597 } 2598 2599 if (push_items < right_nritems) { 2600 push_space = btrfs_item_offset_nr(right, push_items - 1) - 2601 leaf_data_end(root, right); 2602 memmove_extent_buffer(right, btrfs_leaf_data(right) + 2603 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2604 btrfs_leaf_data(right) + 2605 leaf_data_end(root, right), push_space); 2606 2607 memmove_extent_buffer(right, btrfs_item_nr_offset(0), 2608 btrfs_item_nr_offset(push_items), 2609 (btrfs_header_nritems(right) - push_items) * 2610 sizeof(struct btrfs_item)); 2611 } 2612 right_nritems -= push_items; 2613 btrfs_set_header_nritems(right, right_nritems); 2614 push_space = BTRFS_LEAF_DATA_SIZE(root); 2615 for (i = 0; i < right_nritems; i++) { 2616 item = btrfs_item_nr(right, i); 2617 2618 push_space = push_space - btrfs_item_size(right, item); 2619 btrfs_set_item_offset(right, item, push_space); 2620 } 2621 2622 btrfs_mark_buffer_dirty(left); 2623 if (right_nritems) 2624 btrfs_mark_buffer_dirty(right); 2625 else 2626 clean_tree_block(trans, root, right); 2627 2628 btrfs_item_key(right, &disk_key, 0); 2629 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2630 if (wret) 2631 ret = wret; 2632 2633 /* then fixup the leaf pointer in the path */ 2634 if (path->slots[0] < push_items) { 2635 path->slots[0] += old_left_nritems; 2636 btrfs_tree_unlock(path->nodes[0]); 2637 free_extent_buffer(path->nodes[0]); 2638 path->nodes[0] = left; 2639 path->slots[1] -= 1; 2640 } else { 2641 btrfs_tree_unlock(left); 2642 free_extent_buffer(left); 2643 path->slots[0] -= push_items; 2644 } 2645 BUG_ON(path->slots[0] < 0); 2646 return ret; 2647 out: 2648 btrfs_tree_unlock(left); 2649 free_extent_buffer(left); 2650 return ret; 2651 } 2652 2653 /* 2654 * push some data in the path leaf to the left, trying to free up at 2655 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2656 * 2657 * max_slot can put a limit on how far into the leaf we'll push items. The 2658 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the 2659 * items 2660 */ 2661 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2662 *root, struct btrfs_path *path, int min_data_size, 2663 int data_size, int empty, u32 max_slot) 2664 { 2665 struct extent_buffer *right = path->nodes[0]; 2666 struct extent_buffer *left; 2667 int slot; 2668 int free_space; 2669 u32 right_nritems; 2670 int ret = 0; 2671 2672 slot = path->slots[1]; 2673 if (slot == 0) 2674 return 1; 2675 if (!path->nodes[1]) 2676 return 1; 2677 2678 right_nritems = btrfs_header_nritems(right); 2679 if (right_nritems == 0) 2680 return 1; 2681 2682 btrfs_assert_tree_locked(path->nodes[1]); 2683 2684 left = read_node_slot(root, path->nodes[1], slot - 1); 2685 if (left == NULL) 2686 return 1; 2687 2688 btrfs_tree_lock(left); 2689 btrfs_set_lock_blocking(left); 2690 2691 free_space = btrfs_leaf_free_space(root, left); 2692 if (free_space < data_size) { 2693 ret = 1; 2694 goto out; 2695 } 2696 2697 /* cow and double check */ 2698 ret = btrfs_cow_block(trans, root, left, 2699 path->nodes[1], slot - 1, &left); 2700 if (ret) { 2701 /* we hit -ENOSPC, but it isn't fatal here */ 2702 ret = 1; 2703 goto out; 2704 } 2705 2706 free_space = btrfs_leaf_free_space(root, left); 2707 if (free_space < data_size) { 2708 ret = 1; 2709 goto out; 2710 } 2711 2712 return __push_leaf_left(trans, root, path, min_data_size, 2713 empty, left, free_space, right_nritems, 2714 max_slot); 2715 out: 2716 btrfs_tree_unlock(left); 2717 free_extent_buffer(left); 2718 return ret; 2719 } 2720 2721 /* 2722 * split the path's leaf in two, making sure there is at least data_size 2723 * available for the resulting leaf level of the path. 2724 * 2725 * returns 0 if all went well and < 0 on failure. 2726 */ 2727 static noinline int copy_for_split(struct btrfs_trans_handle *trans, 2728 struct btrfs_root *root, 2729 struct btrfs_path *path, 2730 struct extent_buffer *l, 2731 struct extent_buffer *right, 2732 int slot, int mid, int nritems) 2733 { 2734 int data_copy_size; 2735 int rt_data_off; 2736 int i; 2737 int ret = 0; 2738 int wret; 2739 struct btrfs_disk_key disk_key; 2740 2741 nritems = nritems - mid; 2742 btrfs_set_header_nritems(right, nritems); 2743 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); 2744 2745 copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 2746 btrfs_item_nr_offset(mid), 2747 nritems * sizeof(struct btrfs_item)); 2748 2749 copy_extent_buffer(right, l, 2750 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 2751 data_copy_size, btrfs_leaf_data(l) + 2752 leaf_data_end(root, l), data_copy_size); 2753 2754 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 2755 btrfs_item_end_nr(l, mid); 2756 2757 for (i = 0; i < nritems; i++) { 2758 struct btrfs_item *item = btrfs_item_nr(right, i); 2759 u32 ioff; 2760 2761 ioff = btrfs_item_offset(right, item); 2762 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2763 } 2764 2765 btrfs_set_header_nritems(l, mid); 2766 ret = 0; 2767 btrfs_item_key(right, &disk_key, 0); 2768 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2769 path->slots[1] + 1, 1); 2770 if (wret) 2771 ret = wret; 2772 2773 btrfs_mark_buffer_dirty(right); 2774 btrfs_mark_buffer_dirty(l); 2775 BUG_ON(path->slots[0] != slot); 2776 2777 if (mid <= slot) { 2778 btrfs_tree_unlock(path->nodes[0]); 2779 free_extent_buffer(path->nodes[0]); 2780 path->nodes[0] = right; 2781 path->slots[0] -= mid; 2782 path->slots[1] += 1; 2783 } else { 2784 btrfs_tree_unlock(right); 2785 free_extent_buffer(right); 2786 } 2787 2788 BUG_ON(path->slots[0] < 0); 2789 2790 return ret; 2791 } 2792 2793 /* 2794 * double splits happen when we need to insert a big item in the middle 2795 * of a leaf. A double split can leave us with 3 mostly empty leaves: 2796 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] 2797 * A B C 2798 * 2799 * We avoid this by trying to push the items on either side of our target 2800 * into the adjacent leaves. If all goes well we can avoid the double split 2801 * completely. 2802 */ 2803 static noinline int push_for_double_split(struct btrfs_trans_handle *trans, 2804 struct btrfs_root *root, 2805 struct btrfs_path *path, 2806 int data_size) 2807 { 2808 int ret; 2809 int progress = 0; 2810 int slot; 2811 u32 nritems; 2812 2813 slot = path->slots[0]; 2814 2815 /* 2816 * try to push all the items after our slot into the 2817 * right leaf 2818 */ 2819 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); 2820 if (ret < 0) 2821 return ret; 2822 2823 if (ret == 0) 2824 progress++; 2825 2826 nritems = btrfs_header_nritems(path->nodes[0]); 2827 /* 2828 * our goal is to get our slot at the start or end of a leaf. If 2829 * we've done so we're done 2830 */ 2831 if (path->slots[0] == 0 || path->slots[0] == nritems) 2832 return 0; 2833 2834 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) 2835 return 0; 2836 2837 /* try to push all the items before our slot into the next leaf */ 2838 slot = path->slots[0]; 2839 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); 2840 if (ret < 0) 2841 return ret; 2842 2843 if (ret == 0) 2844 progress++; 2845 2846 if (progress) 2847 return 0; 2848 return 1; 2849 } 2850 2851 /* 2852 * split the path's leaf in two, making sure there is at least data_size 2853 * available for the resulting leaf level of the path. 2854 * 2855 * returns 0 if all went well and < 0 on failure. 2856 */ 2857 static noinline int split_leaf(struct btrfs_trans_handle *trans, 2858 struct btrfs_root *root, 2859 struct btrfs_key *ins_key, 2860 struct btrfs_path *path, int data_size, 2861 int extend) 2862 { 2863 struct btrfs_disk_key disk_key; 2864 struct extent_buffer *l; 2865 u32 nritems; 2866 int mid; 2867 int slot; 2868 struct extent_buffer *right; 2869 int ret = 0; 2870 int wret; 2871 int split; 2872 int num_doubles = 0; 2873 int tried_avoid_double = 0; 2874 2875 l = path->nodes[0]; 2876 slot = path->slots[0]; 2877 if (extend && data_size + btrfs_item_size_nr(l, slot) + 2878 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) 2879 return -EOVERFLOW; 2880 2881 /* first try to make some room by pushing left and right */ 2882 if (data_size) { 2883 wret = push_leaf_right(trans, root, path, data_size, 2884 data_size, 0, 0); 2885 if (wret < 0) 2886 return wret; 2887 if (wret) { 2888 wret = push_leaf_left(trans, root, path, data_size, 2889 data_size, 0, (u32)-1); 2890 if (wret < 0) 2891 return wret; 2892 } 2893 l = path->nodes[0]; 2894 2895 /* did the pushes work? */ 2896 if (btrfs_leaf_free_space(root, l) >= data_size) 2897 return 0; 2898 } 2899 2900 if (!path->nodes[1]) { 2901 ret = insert_new_root(trans, root, path, 1); 2902 if (ret) 2903 return ret; 2904 } 2905 again: 2906 split = 1; 2907 l = path->nodes[0]; 2908 slot = path->slots[0]; 2909 nritems = btrfs_header_nritems(l); 2910 mid = (nritems + 1) / 2; 2911 2912 if (mid <= slot) { 2913 if (nritems == 1 || 2914 leaf_space_used(l, mid, nritems - mid) + data_size > 2915 BTRFS_LEAF_DATA_SIZE(root)) { 2916 if (slot >= nritems) { 2917 split = 0; 2918 } else { 2919 mid = slot; 2920 if (mid != nritems && 2921 leaf_space_used(l, mid, nritems - mid) + 2922 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2923 if (data_size && !tried_avoid_double) 2924 goto push_for_double; 2925 split = 2; 2926 } 2927 } 2928 } 2929 } else { 2930 if (leaf_space_used(l, 0, mid) + data_size > 2931 BTRFS_LEAF_DATA_SIZE(root)) { 2932 if (!extend && data_size && slot == 0) { 2933 split = 0; 2934 } else if ((extend || !data_size) && slot == 0) { 2935 mid = 1; 2936 } else { 2937 mid = slot; 2938 if (mid != nritems && 2939 leaf_space_used(l, mid, nritems - mid) + 2940 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 2941 if (data_size && !tried_avoid_double) 2942 goto push_for_double; 2943 split = 2 ; 2944 } 2945 } 2946 } 2947 } 2948 2949 if (split == 0) 2950 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2951 else 2952 btrfs_item_key(l, &disk_key, mid); 2953 2954 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 2955 root->root_key.objectid, 2956 &disk_key, 0, l->start, 0); 2957 if (IS_ERR(right)) 2958 return PTR_ERR(right); 2959 2960 root_add_used(root, root->leafsize); 2961 2962 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2963 btrfs_set_header_bytenr(right, right->start); 2964 btrfs_set_header_generation(right, trans->transid); 2965 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); 2966 btrfs_set_header_owner(right, root->root_key.objectid); 2967 btrfs_set_header_level(right, 0); 2968 write_extent_buffer(right, root->fs_info->fsid, 2969 (unsigned long)btrfs_header_fsid(right), 2970 BTRFS_FSID_SIZE); 2971 2972 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 2973 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2974 BTRFS_UUID_SIZE); 2975 2976 if (split == 0) { 2977 if (mid <= slot) { 2978 btrfs_set_header_nritems(right, 0); 2979 wret = insert_ptr(trans, root, path, 2980 &disk_key, right->start, 2981 path->slots[1] + 1, 1); 2982 if (wret) 2983 ret = wret; 2984 2985 btrfs_tree_unlock(path->nodes[0]); 2986 free_extent_buffer(path->nodes[0]); 2987 path->nodes[0] = right; 2988 path->slots[0] = 0; 2989 path->slots[1] += 1; 2990 } else { 2991 btrfs_set_header_nritems(right, 0); 2992 wret = insert_ptr(trans, root, path, 2993 &disk_key, 2994 right->start, 2995 path->slots[1], 1); 2996 if (wret) 2997 ret = wret; 2998 btrfs_tree_unlock(path->nodes[0]); 2999 free_extent_buffer(path->nodes[0]); 3000 path->nodes[0] = right; 3001 path->slots[0] = 0; 3002 if (path->slots[1] == 0) { 3003 wret = fixup_low_keys(trans, root, 3004 path, &disk_key, 1); 3005 if (wret) 3006 ret = wret; 3007 } 3008 } 3009 btrfs_mark_buffer_dirty(right); 3010 return ret; 3011 } 3012 3013 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 3014 BUG_ON(ret); 3015 3016 if (split == 2) { 3017 BUG_ON(num_doubles != 0); 3018 num_doubles++; 3019 goto again; 3020 } 3021 3022 return ret; 3023 3024 push_for_double: 3025 push_for_double_split(trans, root, path, data_size); 3026 tried_avoid_double = 1; 3027 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) 3028 return 0; 3029 goto again; 3030 } 3031 3032 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 3033 struct btrfs_root *root, 3034 struct btrfs_path *path, int ins_len) 3035 { 3036 struct btrfs_key key; 3037 struct extent_buffer *leaf; 3038 struct btrfs_file_extent_item *fi; 3039 u64 extent_len = 0; 3040 u32 item_size; 3041 int ret; 3042 3043 leaf = path->nodes[0]; 3044 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3045 3046 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && 3047 key.type != BTRFS_EXTENT_CSUM_KEY); 3048 3049 if (btrfs_leaf_free_space(root, leaf) >= ins_len) 3050 return 0; 3051 3052 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3053 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3054 fi = btrfs_item_ptr(leaf, path->slots[0], 3055 struct btrfs_file_extent_item); 3056 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 3057 } 3058 btrfs_release_path(path); 3059 3060 path->keep_locks = 1; 3061 path->search_for_split = 1; 3062 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 3063 path->search_for_split = 0; 3064 if (ret < 0) 3065 goto err; 3066 3067 ret = -EAGAIN; 3068 leaf = path->nodes[0]; 3069 /* if our item isn't there or got smaller, return now */ 3070 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) 3071 goto err; 3072 3073 /* the leaf has changed, it now has room. return now */ 3074 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) 3075 goto err; 3076 3077 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3078 fi = btrfs_item_ptr(leaf, path->slots[0], 3079 struct btrfs_file_extent_item); 3080 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) 3081 goto err; 3082 } 3083 3084 btrfs_set_path_blocking(path); 3085 ret = split_leaf(trans, root, &key, path, ins_len, 1); 3086 if (ret) 3087 goto err; 3088 3089 path->keep_locks = 0; 3090 btrfs_unlock_up_safe(path, 1); 3091 return 0; 3092 err: 3093 path->keep_locks = 0; 3094 return ret; 3095 } 3096 3097 static noinline int split_item(struct btrfs_trans_handle *trans, 3098 struct btrfs_root *root, 3099 struct btrfs_path *path, 3100 struct btrfs_key *new_key, 3101 unsigned long split_offset) 3102 { 3103 struct extent_buffer *leaf; 3104 struct btrfs_item *item; 3105 struct btrfs_item *new_item; 3106 int slot; 3107 char *buf; 3108 u32 nritems; 3109 u32 item_size; 3110 u32 orig_offset; 3111 struct btrfs_disk_key disk_key; 3112 3113 leaf = path->nodes[0]; 3114 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3115 3116 btrfs_set_path_blocking(path); 3117 3118 item = btrfs_item_nr(leaf, path->slots[0]); 3119 orig_offset = btrfs_item_offset(leaf, item); 3120 item_size = btrfs_item_size(leaf, item); 3121 3122 buf = kmalloc(item_size, GFP_NOFS); 3123 if (!buf) 3124 return -ENOMEM; 3125 3126 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3127 path->slots[0]), item_size); 3128 3129 slot = path->slots[0] + 1; 3130 nritems = btrfs_header_nritems(leaf); 3131 if (slot != nritems) { 3132 /* shift the items */ 3133 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3134 btrfs_item_nr_offset(slot), 3135 (nritems - slot) * sizeof(struct btrfs_item)); 3136 } 3137 3138 btrfs_cpu_key_to_disk(&disk_key, new_key); 3139 btrfs_set_item_key(leaf, &disk_key, slot); 3140 3141 new_item = btrfs_item_nr(leaf, slot); 3142 3143 btrfs_set_item_offset(leaf, new_item, orig_offset); 3144 btrfs_set_item_size(leaf, new_item, item_size - split_offset); 3145 3146 btrfs_set_item_offset(leaf, item, 3147 orig_offset + item_size - split_offset); 3148 btrfs_set_item_size(leaf, item, split_offset); 3149 3150 btrfs_set_header_nritems(leaf, nritems + 1); 3151 3152 /* write the data for the start of the original item */ 3153 write_extent_buffer(leaf, buf, 3154 btrfs_item_ptr_offset(leaf, path->slots[0]), 3155 split_offset); 3156 3157 /* write the data for the new item */ 3158 write_extent_buffer(leaf, buf + split_offset, 3159 btrfs_item_ptr_offset(leaf, slot), 3160 item_size - split_offset); 3161 btrfs_mark_buffer_dirty(leaf); 3162 3163 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); 3164 kfree(buf); 3165 return 0; 3166 } 3167 3168 /* 3169 * This function splits a single item into two items, 3170 * giving 'new_key' to the new item and splitting the 3171 * old one at split_offset (from the start of the item). 3172 * 3173 * The path may be released by this operation. After 3174 * the split, the path is pointing to the old item. The 3175 * new item is going to be in the same node as the old one. 3176 * 3177 * Note, the item being split must be smaller enough to live alone on 3178 * a tree block with room for one extra struct btrfs_item 3179 * 3180 * This allows us to split the item in place, keeping a lock on the 3181 * leaf the entire time. 3182 */ 3183 int btrfs_split_item(struct btrfs_trans_handle *trans, 3184 struct btrfs_root *root, 3185 struct btrfs_path *path, 3186 struct btrfs_key *new_key, 3187 unsigned long split_offset) 3188 { 3189 int ret; 3190 ret = setup_leaf_for_split(trans, root, path, 3191 sizeof(struct btrfs_item)); 3192 if (ret) 3193 return ret; 3194 3195 ret = split_item(trans, root, path, new_key, split_offset); 3196 return ret; 3197 } 3198 3199 /* 3200 * This function duplicate a item, giving 'new_key' to the new item. 3201 * It guarantees both items live in the same tree leaf and the new item 3202 * is contiguous with the original item. 3203 * 3204 * This allows us to split file extent in place, keeping a lock on the 3205 * leaf the entire time. 3206 */ 3207 int btrfs_duplicate_item(struct btrfs_trans_handle *trans, 3208 struct btrfs_root *root, 3209 struct btrfs_path *path, 3210 struct btrfs_key *new_key) 3211 { 3212 struct extent_buffer *leaf; 3213 int ret; 3214 u32 item_size; 3215 3216 leaf = path->nodes[0]; 3217 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3218 ret = setup_leaf_for_split(trans, root, path, 3219 item_size + sizeof(struct btrfs_item)); 3220 if (ret) 3221 return ret; 3222 3223 path->slots[0]++; 3224 ret = setup_items_for_insert(trans, root, path, new_key, &item_size, 3225 item_size, item_size + 3226 sizeof(struct btrfs_item), 1); 3227 BUG_ON(ret); 3228 3229 leaf = path->nodes[0]; 3230 memcpy_extent_buffer(leaf, 3231 btrfs_item_ptr_offset(leaf, path->slots[0]), 3232 btrfs_item_ptr_offset(leaf, path->slots[0] - 1), 3233 item_size); 3234 return 0; 3235 } 3236 3237 /* 3238 * make the item pointed to by the path smaller. new_size indicates 3239 * how small to make it, and from_end tells us if we just chop bytes 3240 * off the end of the item or if we shift the item to chop bytes off 3241 * the front. 3242 */ 3243 int btrfs_truncate_item(struct btrfs_trans_handle *trans, 3244 struct btrfs_root *root, 3245 struct btrfs_path *path, 3246 u32 new_size, int from_end) 3247 { 3248 int slot; 3249 struct extent_buffer *leaf; 3250 struct btrfs_item *item; 3251 u32 nritems; 3252 unsigned int data_end; 3253 unsigned int old_data_start; 3254 unsigned int old_size; 3255 unsigned int size_diff; 3256 int i; 3257 3258 leaf = path->nodes[0]; 3259 slot = path->slots[0]; 3260 3261 old_size = btrfs_item_size_nr(leaf, slot); 3262 if (old_size == new_size) 3263 return 0; 3264 3265 nritems = btrfs_header_nritems(leaf); 3266 data_end = leaf_data_end(root, leaf); 3267 3268 old_data_start = btrfs_item_offset_nr(leaf, slot); 3269 3270 size_diff = old_size - new_size; 3271 3272 BUG_ON(slot < 0); 3273 BUG_ON(slot >= nritems); 3274 3275 /* 3276 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3277 */ 3278 /* first correct the data pointers */ 3279 for (i = slot; i < nritems; i++) { 3280 u32 ioff; 3281 item = btrfs_item_nr(leaf, i); 3282 3283 ioff = btrfs_item_offset(leaf, item); 3284 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3285 } 3286 3287 /* shift the data */ 3288 if (from_end) { 3289 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3290 data_end + size_diff, btrfs_leaf_data(leaf) + 3291 data_end, old_data_start + new_size - data_end); 3292 } else { 3293 struct btrfs_disk_key disk_key; 3294 u64 offset; 3295 3296 btrfs_item_key(leaf, &disk_key, slot); 3297 3298 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 3299 unsigned long ptr; 3300 struct btrfs_file_extent_item *fi; 3301 3302 fi = btrfs_item_ptr(leaf, slot, 3303 struct btrfs_file_extent_item); 3304 fi = (struct btrfs_file_extent_item *)( 3305 (unsigned long)fi - size_diff); 3306 3307 if (btrfs_file_extent_type(leaf, fi) == 3308 BTRFS_FILE_EXTENT_INLINE) { 3309 ptr = btrfs_item_ptr_offset(leaf, slot); 3310 memmove_extent_buffer(leaf, ptr, 3311 (unsigned long)fi, 3312 offsetof(struct btrfs_file_extent_item, 3313 disk_bytenr)); 3314 } 3315 } 3316 3317 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3318 data_end + size_diff, btrfs_leaf_data(leaf) + 3319 data_end, old_data_start - data_end); 3320 3321 offset = btrfs_disk_key_offset(&disk_key); 3322 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 3323 btrfs_set_item_key(leaf, &disk_key, slot); 3324 if (slot == 0) 3325 fixup_low_keys(trans, root, path, &disk_key, 1); 3326 } 3327 3328 item = btrfs_item_nr(leaf, slot); 3329 btrfs_set_item_size(leaf, item, new_size); 3330 btrfs_mark_buffer_dirty(leaf); 3331 3332 if (btrfs_leaf_free_space(root, leaf) < 0) { 3333 btrfs_print_leaf(root, leaf); 3334 BUG(); 3335 } 3336 return 0; 3337 } 3338 3339 /* 3340 * make the item pointed to by the path bigger, data_size is the new size. 3341 */ 3342 int btrfs_extend_item(struct btrfs_trans_handle *trans, 3343 struct btrfs_root *root, struct btrfs_path *path, 3344 u32 data_size) 3345 { 3346 int slot; 3347 struct extent_buffer *leaf; 3348 struct btrfs_item *item; 3349 u32 nritems; 3350 unsigned int data_end; 3351 unsigned int old_data; 3352 unsigned int old_size; 3353 int i; 3354 3355 leaf = path->nodes[0]; 3356 3357 nritems = btrfs_header_nritems(leaf); 3358 data_end = leaf_data_end(root, leaf); 3359 3360 if (btrfs_leaf_free_space(root, leaf) < data_size) { 3361 btrfs_print_leaf(root, leaf); 3362 BUG(); 3363 } 3364 slot = path->slots[0]; 3365 old_data = btrfs_item_end_nr(leaf, slot); 3366 3367 BUG_ON(slot < 0); 3368 if (slot >= nritems) { 3369 btrfs_print_leaf(root, leaf); 3370 printk(KERN_CRIT "slot %d too large, nritems %d\n", 3371 slot, nritems); 3372 BUG_ON(1); 3373 } 3374 3375 /* 3376 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3377 */ 3378 /* first correct the data pointers */ 3379 for (i = slot; i < nritems; i++) { 3380 u32 ioff; 3381 item = btrfs_item_nr(leaf, i); 3382 3383 ioff = btrfs_item_offset(leaf, item); 3384 btrfs_set_item_offset(leaf, item, ioff - data_size); 3385 } 3386 3387 /* shift the data */ 3388 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3389 data_end - data_size, btrfs_leaf_data(leaf) + 3390 data_end, old_data - data_end); 3391 3392 data_end = old_data; 3393 old_size = btrfs_item_size_nr(leaf, slot); 3394 item = btrfs_item_nr(leaf, slot); 3395 btrfs_set_item_size(leaf, item, old_size + data_size); 3396 btrfs_mark_buffer_dirty(leaf); 3397 3398 if (btrfs_leaf_free_space(root, leaf) < 0) { 3399 btrfs_print_leaf(root, leaf); 3400 BUG(); 3401 } 3402 return 0; 3403 } 3404 3405 /* 3406 * Given a key and some data, insert items into the tree. 3407 * This does all the path init required, making room in the tree if needed. 3408 * Returns the number of keys that were inserted. 3409 */ 3410 int btrfs_insert_some_items(struct btrfs_trans_handle *trans, 3411 struct btrfs_root *root, 3412 struct btrfs_path *path, 3413 struct btrfs_key *cpu_key, u32 *data_size, 3414 int nr) 3415 { 3416 struct extent_buffer *leaf; 3417 struct btrfs_item *item; 3418 int ret = 0; 3419 int slot; 3420 int i; 3421 u32 nritems; 3422 u32 total_data = 0; 3423 u32 total_size = 0; 3424 unsigned int data_end; 3425 struct btrfs_disk_key disk_key; 3426 struct btrfs_key found_key; 3427 3428 for (i = 0; i < nr; i++) { 3429 if (total_size + data_size[i] + sizeof(struct btrfs_item) > 3430 BTRFS_LEAF_DATA_SIZE(root)) { 3431 break; 3432 nr = i; 3433 } 3434 total_data += data_size[i]; 3435 total_size += data_size[i] + sizeof(struct btrfs_item); 3436 } 3437 BUG_ON(nr == 0); 3438 3439 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3440 if (ret == 0) 3441 return -EEXIST; 3442 if (ret < 0) 3443 goto out; 3444 3445 leaf = path->nodes[0]; 3446 3447 nritems = btrfs_header_nritems(leaf); 3448 data_end = leaf_data_end(root, leaf); 3449 3450 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3451 for (i = nr; i >= 0; i--) { 3452 total_data -= data_size[i]; 3453 total_size -= data_size[i] + sizeof(struct btrfs_item); 3454 if (total_size < btrfs_leaf_free_space(root, leaf)) 3455 break; 3456 } 3457 nr = i; 3458 } 3459 3460 slot = path->slots[0]; 3461 BUG_ON(slot < 0); 3462 3463 if (slot != nritems) { 3464 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3465 3466 item = btrfs_item_nr(leaf, slot); 3467 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3468 3469 /* figure out how many keys we can insert in here */ 3470 total_data = data_size[0]; 3471 for (i = 1; i < nr; i++) { 3472 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3473 break; 3474 total_data += data_size[i]; 3475 } 3476 nr = i; 3477 3478 if (old_data < data_end) { 3479 btrfs_print_leaf(root, leaf); 3480 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3481 slot, old_data, data_end); 3482 BUG_ON(1); 3483 } 3484 /* 3485 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3486 */ 3487 /* first correct the data pointers */ 3488 for (i = slot; i < nritems; i++) { 3489 u32 ioff; 3490 3491 item = btrfs_item_nr(leaf, i); 3492 ioff = btrfs_item_offset(leaf, item); 3493 btrfs_set_item_offset(leaf, item, ioff - total_data); 3494 } 3495 /* shift the items */ 3496 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3497 btrfs_item_nr_offset(slot), 3498 (nritems - slot) * sizeof(struct btrfs_item)); 3499 3500 /* shift the data */ 3501 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3502 data_end - total_data, btrfs_leaf_data(leaf) + 3503 data_end, old_data - data_end); 3504 data_end = old_data; 3505 } else { 3506 /* 3507 * this sucks but it has to be done, if we are inserting at 3508 * the end of the leaf only insert 1 of the items, since we 3509 * have no way of knowing whats on the next leaf and we'd have 3510 * to drop our current locks to figure it out 3511 */ 3512 nr = 1; 3513 } 3514 3515 /* setup the item for the new data */ 3516 for (i = 0; i < nr; i++) { 3517 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3518 btrfs_set_item_key(leaf, &disk_key, slot + i); 3519 item = btrfs_item_nr(leaf, slot + i); 3520 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3521 data_end -= data_size[i]; 3522 btrfs_set_item_size(leaf, item, data_size[i]); 3523 } 3524 btrfs_set_header_nritems(leaf, nritems + nr); 3525 btrfs_mark_buffer_dirty(leaf); 3526 3527 ret = 0; 3528 if (slot == 0) { 3529 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3530 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3531 } 3532 3533 if (btrfs_leaf_free_space(root, leaf) < 0) { 3534 btrfs_print_leaf(root, leaf); 3535 BUG(); 3536 } 3537 out: 3538 if (!ret) 3539 ret = nr; 3540 return ret; 3541 } 3542 3543 /* 3544 * this is a helper for btrfs_insert_empty_items, the main goal here is 3545 * to save stack depth by doing the bulk of the work in a function 3546 * that doesn't call btrfs_search_slot 3547 */ 3548 int setup_items_for_insert(struct btrfs_trans_handle *trans, 3549 struct btrfs_root *root, struct btrfs_path *path, 3550 struct btrfs_key *cpu_key, u32 *data_size, 3551 u32 total_data, u32 total_size, int nr) 3552 { 3553 struct btrfs_item *item; 3554 int i; 3555 u32 nritems; 3556 unsigned int data_end; 3557 struct btrfs_disk_key disk_key; 3558 int ret; 3559 struct extent_buffer *leaf; 3560 int slot; 3561 3562 leaf = path->nodes[0]; 3563 slot = path->slots[0]; 3564 3565 nritems = btrfs_header_nritems(leaf); 3566 data_end = leaf_data_end(root, leaf); 3567 3568 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3569 btrfs_print_leaf(root, leaf); 3570 printk(KERN_CRIT "not enough freespace need %u have %d\n", 3571 total_size, btrfs_leaf_free_space(root, leaf)); 3572 BUG(); 3573 } 3574 3575 if (slot != nritems) { 3576 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3577 3578 if (old_data < data_end) { 3579 btrfs_print_leaf(root, leaf); 3580 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3581 slot, old_data, data_end); 3582 BUG_ON(1); 3583 } 3584 /* 3585 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3586 */ 3587 /* first correct the data pointers */ 3588 for (i = slot; i < nritems; i++) { 3589 u32 ioff; 3590 3591 item = btrfs_item_nr(leaf, i); 3592 ioff = btrfs_item_offset(leaf, item); 3593 btrfs_set_item_offset(leaf, item, ioff - total_data); 3594 } 3595 /* shift the items */ 3596 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3597 btrfs_item_nr_offset(slot), 3598 (nritems - slot) * sizeof(struct btrfs_item)); 3599 3600 /* shift the data */ 3601 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3602 data_end - total_data, btrfs_leaf_data(leaf) + 3603 data_end, old_data - data_end); 3604 data_end = old_data; 3605 } 3606 3607 /* setup the item for the new data */ 3608 for (i = 0; i < nr; i++) { 3609 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3610 btrfs_set_item_key(leaf, &disk_key, slot + i); 3611 item = btrfs_item_nr(leaf, slot + i); 3612 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3613 data_end -= data_size[i]; 3614 btrfs_set_item_size(leaf, item, data_size[i]); 3615 } 3616 3617 btrfs_set_header_nritems(leaf, nritems + nr); 3618 3619 ret = 0; 3620 if (slot == 0) { 3621 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3622 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3623 } 3624 btrfs_unlock_up_safe(path, 1); 3625 btrfs_mark_buffer_dirty(leaf); 3626 3627 if (btrfs_leaf_free_space(root, leaf) < 0) { 3628 btrfs_print_leaf(root, leaf); 3629 BUG(); 3630 } 3631 return ret; 3632 } 3633 3634 /* 3635 * Given a key and some data, insert items into the tree. 3636 * This does all the path init required, making room in the tree if needed. 3637 */ 3638 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 3639 struct btrfs_root *root, 3640 struct btrfs_path *path, 3641 struct btrfs_key *cpu_key, u32 *data_size, 3642 int nr) 3643 { 3644 int ret = 0; 3645 int slot; 3646 int i; 3647 u32 total_size = 0; 3648 u32 total_data = 0; 3649 3650 for (i = 0; i < nr; i++) 3651 total_data += data_size[i]; 3652 3653 total_size = total_data + (nr * sizeof(struct btrfs_item)); 3654 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3655 if (ret == 0) 3656 return -EEXIST; 3657 if (ret < 0) 3658 goto out; 3659 3660 slot = path->slots[0]; 3661 BUG_ON(slot < 0); 3662 3663 ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, 3664 total_data, total_size, nr); 3665 3666 out: 3667 return ret; 3668 } 3669 3670 /* 3671 * Given a key and some data, insert an item into the tree. 3672 * This does all the path init required, making room in the tree if needed. 3673 */ 3674 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 3675 *root, struct btrfs_key *cpu_key, void *data, u32 3676 data_size) 3677 { 3678 int ret = 0; 3679 struct btrfs_path *path; 3680 struct extent_buffer *leaf; 3681 unsigned long ptr; 3682 3683 path = btrfs_alloc_path(); 3684 if (!path) 3685 return -ENOMEM; 3686 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 3687 if (!ret) { 3688 leaf = path->nodes[0]; 3689 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 3690 write_extent_buffer(leaf, data, ptr, data_size); 3691 btrfs_mark_buffer_dirty(leaf); 3692 } 3693 btrfs_free_path(path); 3694 return ret; 3695 } 3696 3697 /* 3698 * delete the pointer from a given node. 3699 * 3700 * the tree should have been previously balanced so the deletion does not 3701 * empty a node. 3702 */ 3703 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3704 struct btrfs_path *path, int level, int slot) 3705 { 3706 struct extent_buffer *parent = path->nodes[level]; 3707 u32 nritems; 3708 int ret = 0; 3709 int wret; 3710 3711 nritems = btrfs_header_nritems(parent); 3712 if (slot != nritems - 1) { 3713 memmove_extent_buffer(parent, 3714 btrfs_node_key_ptr_offset(slot), 3715 btrfs_node_key_ptr_offset(slot + 1), 3716 sizeof(struct btrfs_key_ptr) * 3717 (nritems - slot - 1)); 3718 } 3719 nritems--; 3720 btrfs_set_header_nritems(parent, nritems); 3721 if (nritems == 0 && parent == root->node) { 3722 BUG_ON(btrfs_header_level(root->node) != 1); 3723 /* just turn the root into a leaf and break */ 3724 btrfs_set_header_level(root->node, 0); 3725 } else if (slot == 0) { 3726 struct btrfs_disk_key disk_key; 3727 3728 btrfs_node_key(parent, &disk_key, 0); 3729 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 3730 if (wret) 3731 ret = wret; 3732 } 3733 btrfs_mark_buffer_dirty(parent); 3734 return ret; 3735 } 3736 3737 /* 3738 * a helper function to delete the leaf pointed to by path->slots[1] and 3739 * path->nodes[1]. 3740 * 3741 * This deletes the pointer in path->nodes[1] and frees the leaf 3742 * block extent. zero is returned if it all worked out, < 0 otherwise. 3743 * 3744 * The path must have already been setup for deleting the leaf, including 3745 * all the proper balancing. path->nodes[1] must be locked. 3746 */ 3747 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3748 struct btrfs_root *root, 3749 struct btrfs_path *path, 3750 struct extent_buffer *leaf) 3751 { 3752 int ret; 3753 3754 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 3755 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3756 if (ret) 3757 return ret; 3758 3759 /* 3760 * btrfs_free_extent is expensive, we want to make sure we 3761 * aren't holding any locks when we call it 3762 */ 3763 btrfs_unlock_up_safe(path, 0); 3764 3765 root_sub_used(root, leaf->len); 3766 3767 btrfs_free_tree_block(trans, root, leaf, 0, 1); 3768 return 0; 3769 } 3770 /* 3771 * delete the item at the leaf level in path. If that empties 3772 * the leaf, remove it from the tree 3773 */ 3774 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3775 struct btrfs_path *path, int slot, int nr) 3776 { 3777 struct extent_buffer *leaf; 3778 struct btrfs_item *item; 3779 int last_off; 3780 int dsize = 0; 3781 int ret = 0; 3782 int wret; 3783 int i; 3784 u32 nritems; 3785 3786 leaf = path->nodes[0]; 3787 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 3788 3789 for (i = 0; i < nr; i++) 3790 dsize += btrfs_item_size_nr(leaf, slot + i); 3791 3792 nritems = btrfs_header_nritems(leaf); 3793 3794 if (slot + nr != nritems) { 3795 int data_end = leaf_data_end(root, leaf); 3796 3797 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3798 data_end + dsize, 3799 btrfs_leaf_data(leaf) + data_end, 3800 last_off - data_end); 3801 3802 for (i = slot + nr; i < nritems; i++) { 3803 u32 ioff; 3804 3805 item = btrfs_item_nr(leaf, i); 3806 ioff = btrfs_item_offset(leaf, item); 3807 btrfs_set_item_offset(leaf, item, ioff + dsize); 3808 } 3809 3810 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3811 btrfs_item_nr_offset(slot + nr), 3812 sizeof(struct btrfs_item) * 3813 (nritems - slot - nr)); 3814 } 3815 btrfs_set_header_nritems(leaf, nritems - nr); 3816 nritems -= nr; 3817 3818 /* delete the leaf if we've emptied it */ 3819 if (nritems == 0) { 3820 if (leaf == root->node) { 3821 btrfs_set_header_level(leaf, 0); 3822 } else { 3823 btrfs_set_path_blocking(path); 3824 clean_tree_block(trans, root, leaf); 3825 ret = btrfs_del_leaf(trans, root, path, leaf); 3826 BUG_ON(ret); 3827 } 3828 } else { 3829 int used = leaf_space_used(leaf, 0, nritems); 3830 if (slot == 0) { 3831 struct btrfs_disk_key disk_key; 3832 3833 btrfs_item_key(leaf, &disk_key, 0); 3834 wret = fixup_low_keys(trans, root, path, 3835 &disk_key, 1); 3836 if (wret) 3837 ret = wret; 3838 } 3839 3840 /* delete the leaf if it is mostly empty */ 3841 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { 3842 /* push_leaf_left fixes the path. 3843 * make sure the path still points to our leaf 3844 * for possible call to del_ptr below 3845 */ 3846 slot = path->slots[1]; 3847 extent_buffer_get(leaf); 3848 3849 btrfs_set_path_blocking(path); 3850 wret = push_leaf_left(trans, root, path, 1, 1, 3851 1, (u32)-1); 3852 if (wret < 0 && wret != -ENOSPC) 3853 ret = wret; 3854 3855 if (path->nodes[0] == leaf && 3856 btrfs_header_nritems(leaf)) { 3857 wret = push_leaf_right(trans, root, path, 1, 3858 1, 1, 0); 3859 if (wret < 0 && wret != -ENOSPC) 3860 ret = wret; 3861 } 3862 3863 if (btrfs_header_nritems(leaf) == 0) { 3864 path->slots[1] = slot; 3865 ret = btrfs_del_leaf(trans, root, path, leaf); 3866 BUG_ON(ret); 3867 free_extent_buffer(leaf); 3868 } else { 3869 /* if we're still in the path, make sure 3870 * we're dirty. Otherwise, one of the 3871 * push_leaf functions must have already 3872 * dirtied this buffer 3873 */ 3874 if (path->nodes[0] == leaf) 3875 btrfs_mark_buffer_dirty(leaf); 3876 free_extent_buffer(leaf); 3877 } 3878 } else { 3879 btrfs_mark_buffer_dirty(leaf); 3880 } 3881 } 3882 return ret; 3883 } 3884 3885 /* 3886 * search the tree again to find a leaf with lesser keys 3887 * returns 0 if it found something or 1 if there are no lesser leaves. 3888 * returns < 0 on io errors. 3889 * 3890 * This may release the path, and so you may lose any locks held at the 3891 * time you call it. 3892 */ 3893 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 3894 { 3895 struct btrfs_key key; 3896 struct btrfs_disk_key found_key; 3897 int ret; 3898 3899 btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 3900 3901 if (key.offset > 0) 3902 key.offset--; 3903 else if (key.type > 0) 3904 key.type--; 3905 else if (key.objectid > 0) 3906 key.objectid--; 3907 else 3908 return 1; 3909 3910 btrfs_release_path(path); 3911 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3912 if (ret < 0) 3913 return ret; 3914 btrfs_item_key(path->nodes[0], &found_key, 0); 3915 ret = comp_keys(&found_key, &key); 3916 if (ret < 0) 3917 return 0; 3918 return 1; 3919 } 3920 3921 /* 3922 * A helper function to walk down the tree starting at min_key, and looking 3923 * for nodes or leaves that are either in cache or have a minimum 3924 * transaction id. This is used by the btree defrag code, and tree logging 3925 * 3926 * This does not cow, but it does stuff the starting key it finds back 3927 * into min_key, so you can call btrfs_search_slot with cow=1 on the 3928 * key and get a writable path. 3929 * 3930 * This does lock as it descends, and path->keep_locks should be set 3931 * to 1 by the caller. 3932 * 3933 * This honors path->lowest_level to prevent descent past a given level 3934 * of the tree. 3935 * 3936 * min_trans indicates the oldest transaction that you are interested 3937 * in walking through. Any nodes or leaves older than min_trans are 3938 * skipped over (without reading them). 3939 * 3940 * returns zero if something useful was found, < 0 on error and 1 if there 3941 * was nothing in the tree that matched the search criteria. 3942 */ 3943 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 3944 struct btrfs_key *max_key, 3945 struct btrfs_path *path, int cache_only, 3946 u64 min_trans) 3947 { 3948 struct extent_buffer *cur; 3949 struct btrfs_key found_key; 3950 int slot; 3951 int sret; 3952 u32 nritems; 3953 int level; 3954 int ret = 1; 3955 3956 WARN_ON(!path->keep_locks); 3957 again: 3958 cur = btrfs_read_lock_root_node(root); 3959 level = btrfs_header_level(cur); 3960 WARN_ON(path->nodes[level]); 3961 path->nodes[level] = cur; 3962 path->locks[level] = BTRFS_READ_LOCK; 3963 3964 if (btrfs_header_generation(cur) < min_trans) { 3965 ret = 1; 3966 goto out; 3967 } 3968 while (1) { 3969 nritems = btrfs_header_nritems(cur); 3970 level = btrfs_header_level(cur); 3971 sret = bin_search(cur, min_key, level, &slot); 3972 3973 /* at the lowest level, we're done, setup the path and exit */ 3974 if (level == path->lowest_level) { 3975 if (slot >= nritems) 3976 goto find_next_key; 3977 ret = 0; 3978 path->slots[level] = slot; 3979 btrfs_item_key_to_cpu(cur, &found_key, slot); 3980 goto out; 3981 } 3982 if (sret && slot > 0) 3983 slot--; 3984 /* 3985 * check this node pointer against the cache_only and 3986 * min_trans parameters. If it isn't in cache or is too 3987 * old, skip to the next one. 3988 */ 3989 while (slot < nritems) { 3990 u64 blockptr; 3991 u64 gen; 3992 struct extent_buffer *tmp; 3993 struct btrfs_disk_key disk_key; 3994 3995 blockptr = btrfs_node_blockptr(cur, slot); 3996 gen = btrfs_node_ptr_generation(cur, slot); 3997 if (gen < min_trans) { 3998 slot++; 3999 continue; 4000 } 4001 if (!cache_only) 4002 break; 4003 4004 if (max_key) { 4005 btrfs_node_key(cur, &disk_key, slot); 4006 if (comp_keys(&disk_key, max_key) >= 0) { 4007 ret = 1; 4008 goto out; 4009 } 4010 } 4011 4012 tmp = btrfs_find_tree_block(root, blockptr, 4013 btrfs_level_size(root, level - 1)); 4014 4015 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 4016 free_extent_buffer(tmp); 4017 break; 4018 } 4019 if (tmp) 4020 free_extent_buffer(tmp); 4021 slot++; 4022 } 4023 find_next_key: 4024 /* 4025 * we didn't find a candidate key in this node, walk forward 4026 * and find another one 4027 */ 4028 if (slot >= nritems) { 4029 path->slots[level] = slot; 4030 btrfs_set_path_blocking(path); 4031 sret = btrfs_find_next_key(root, path, min_key, level, 4032 cache_only, min_trans); 4033 if (sret == 0) { 4034 btrfs_release_path(path); 4035 goto again; 4036 } else { 4037 goto out; 4038 } 4039 } 4040 /* save our key for returning back */ 4041 btrfs_node_key_to_cpu(cur, &found_key, slot); 4042 path->slots[level] = slot; 4043 if (level == path->lowest_level) { 4044 ret = 0; 4045 unlock_up(path, level, 1); 4046 goto out; 4047 } 4048 btrfs_set_path_blocking(path); 4049 cur = read_node_slot(root, cur, slot); 4050 BUG_ON(!cur); 4051 4052 btrfs_tree_read_lock(cur); 4053 4054 path->locks[level - 1] = BTRFS_READ_LOCK; 4055 path->nodes[level - 1] = cur; 4056 unlock_up(path, level, 1); 4057 btrfs_clear_path_blocking(path, NULL, 0); 4058 } 4059 out: 4060 if (ret == 0) 4061 memcpy(min_key, &found_key, sizeof(found_key)); 4062 btrfs_set_path_blocking(path); 4063 return ret; 4064 } 4065 4066 /* 4067 * this is similar to btrfs_next_leaf, but does not try to preserve 4068 * and fixup the path. It looks for and returns the next key in the 4069 * tree based on the current path and the cache_only and min_trans 4070 * parameters. 4071 * 4072 * 0 is returned if another key is found, < 0 if there are any errors 4073 * and 1 is returned if there are no higher keys in the tree 4074 * 4075 * path->keep_locks should be set to 1 on the search made before 4076 * calling this function. 4077 */ 4078 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 4079 struct btrfs_key *key, int level, 4080 int cache_only, u64 min_trans) 4081 { 4082 int slot; 4083 struct extent_buffer *c; 4084 4085 WARN_ON(!path->keep_locks); 4086 while (level < BTRFS_MAX_LEVEL) { 4087 if (!path->nodes[level]) 4088 return 1; 4089 4090 slot = path->slots[level] + 1; 4091 c = path->nodes[level]; 4092 next: 4093 if (slot >= btrfs_header_nritems(c)) { 4094 int ret; 4095 int orig_lowest; 4096 struct btrfs_key cur_key; 4097 if (level + 1 >= BTRFS_MAX_LEVEL || 4098 !path->nodes[level + 1]) 4099 return 1; 4100 4101 if (path->locks[level + 1]) { 4102 level++; 4103 continue; 4104 } 4105 4106 slot = btrfs_header_nritems(c) - 1; 4107 if (level == 0) 4108 btrfs_item_key_to_cpu(c, &cur_key, slot); 4109 else 4110 btrfs_node_key_to_cpu(c, &cur_key, slot); 4111 4112 orig_lowest = path->lowest_level; 4113 btrfs_release_path(path); 4114 path->lowest_level = level; 4115 ret = btrfs_search_slot(NULL, root, &cur_key, path, 4116 0, 0); 4117 path->lowest_level = orig_lowest; 4118 if (ret < 0) 4119 return ret; 4120 4121 c = path->nodes[level]; 4122 slot = path->slots[level]; 4123 if (ret == 0) 4124 slot++; 4125 goto next; 4126 } 4127 4128 if (level == 0) 4129 btrfs_item_key_to_cpu(c, key, slot); 4130 else { 4131 u64 blockptr = btrfs_node_blockptr(c, slot); 4132 u64 gen = btrfs_node_ptr_generation(c, slot); 4133 4134 if (cache_only) { 4135 struct extent_buffer *cur; 4136 cur = btrfs_find_tree_block(root, blockptr, 4137 btrfs_level_size(root, level - 1)); 4138 if (!cur || !btrfs_buffer_uptodate(cur, gen)) { 4139 slot++; 4140 if (cur) 4141 free_extent_buffer(cur); 4142 goto next; 4143 } 4144 free_extent_buffer(cur); 4145 } 4146 if (gen < min_trans) { 4147 slot++; 4148 goto next; 4149 } 4150 btrfs_node_key_to_cpu(c, key, slot); 4151 } 4152 return 0; 4153 } 4154 return 1; 4155 } 4156 4157 /* 4158 * search the tree again to find a leaf with greater keys 4159 * returns 0 if it found something or 1 if there are no greater leaves. 4160 * returns < 0 on io errors. 4161 */ 4162 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 4163 { 4164 int slot; 4165 int level; 4166 struct extent_buffer *c; 4167 struct extent_buffer *next; 4168 struct btrfs_key key; 4169 u32 nritems; 4170 int ret; 4171 int old_spinning = path->leave_spinning; 4172 int next_rw_lock = 0; 4173 4174 nritems = btrfs_header_nritems(path->nodes[0]); 4175 if (nritems == 0) 4176 return 1; 4177 4178 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 4179 again: 4180 level = 1; 4181 next = NULL; 4182 next_rw_lock = 0; 4183 btrfs_release_path(path); 4184 4185 path->keep_locks = 1; 4186 path->leave_spinning = 1; 4187 4188 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4189 path->keep_locks = 0; 4190 4191 if (ret < 0) 4192 return ret; 4193 4194 nritems = btrfs_header_nritems(path->nodes[0]); 4195 /* 4196 * by releasing the path above we dropped all our locks. A balance 4197 * could have added more items next to the key that used to be 4198 * at the very end of the block. So, check again here and 4199 * advance the path if there are now more items available. 4200 */ 4201 if (nritems > 0 && path->slots[0] < nritems - 1) { 4202 if (ret == 0) 4203 path->slots[0]++; 4204 ret = 0; 4205 goto done; 4206 } 4207 4208 while (level < BTRFS_MAX_LEVEL) { 4209 if (!path->nodes[level]) { 4210 ret = 1; 4211 goto done; 4212 } 4213 4214 slot = path->slots[level] + 1; 4215 c = path->nodes[level]; 4216 if (slot >= btrfs_header_nritems(c)) { 4217 level++; 4218 if (level == BTRFS_MAX_LEVEL) { 4219 ret = 1; 4220 goto done; 4221 } 4222 continue; 4223 } 4224 4225 if (next) { 4226 btrfs_tree_unlock_rw(next, next_rw_lock); 4227 free_extent_buffer(next); 4228 } 4229 4230 next = c; 4231 next_rw_lock = path->locks[level]; 4232 ret = read_block_for_search(NULL, root, path, &next, level, 4233 slot, &key); 4234 if (ret == -EAGAIN) 4235 goto again; 4236 4237 if (ret < 0) { 4238 btrfs_release_path(path); 4239 goto done; 4240 } 4241 4242 if (!path->skip_locking) { 4243 ret = btrfs_try_tree_read_lock(next); 4244 if (!ret) { 4245 btrfs_set_path_blocking(path); 4246 btrfs_tree_read_lock(next); 4247 btrfs_clear_path_blocking(path, next, 4248 BTRFS_READ_LOCK); 4249 } 4250 next_rw_lock = BTRFS_READ_LOCK; 4251 } 4252 break; 4253 } 4254 path->slots[level] = slot; 4255 while (1) { 4256 level--; 4257 c = path->nodes[level]; 4258 if (path->locks[level]) 4259 btrfs_tree_unlock_rw(c, path->locks[level]); 4260 4261 free_extent_buffer(c); 4262 path->nodes[level] = next; 4263 path->slots[level] = 0; 4264 if (!path->skip_locking) 4265 path->locks[level] = next_rw_lock; 4266 if (!level) 4267 break; 4268 4269 ret = read_block_for_search(NULL, root, path, &next, level, 4270 0, &key); 4271 if (ret == -EAGAIN) 4272 goto again; 4273 4274 if (ret < 0) { 4275 btrfs_release_path(path); 4276 goto done; 4277 } 4278 4279 if (!path->skip_locking) { 4280 ret = btrfs_try_tree_read_lock(next); 4281 if (!ret) { 4282 btrfs_set_path_blocking(path); 4283 btrfs_tree_read_lock(next); 4284 btrfs_clear_path_blocking(path, next, 4285 BTRFS_READ_LOCK); 4286 } 4287 next_rw_lock = BTRFS_READ_LOCK; 4288 } 4289 } 4290 ret = 0; 4291 done: 4292 unlock_up(path, 0, 1); 4293 path->leave_spinning = old_spinning; 4294 if (!old_spinning) 4295 btrfs_set_path_blocking(path); 4296 4297 return ret; 4298 } 4299 4300 /* 4301 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 4302 * searching until it gets past min_objectid or finds an item of 'type' 4303 * 4304 * returns 0 if something is found, 1 if nothing was found and < 0 on error 4305 */ 4306 int btrfs_previous_item(struct btrfs_root *root, 4307 struct btrfs_path *path, u64 min_objectid, 4308 int type) 4309 { 4310 struct btrfs_key found_key; 4311 struct extent_buffer *leaf; 4312 u32 nritems; 4313 int ret; 4314 4315 while (1) { 4316 if (path->slots[0] == 0) { 4317 btrfs_set_path_blocking(path); 4318 ret = btrfs_prev_leaf(root, path); 4319 if (ret != 0) 4320 return ret; 4321 } else { 4322 path->slots[0]--; 4323 } 4324 leaf = path->nodes[0]; 4325 nritems = btrfs_header_nritems(leaf); 4326 if (nritems == 0) 4327 return 1; 4328 if (path->slots[0] == nritems) 4329 path->slots[0]--; 4330 4331 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4332 if (found_key.objectid < min_objectid) 4333 break; 4334 if (found_key.type == type) 4335 return 0; 4336 if (found_key.objectid == min_objectid && 4337 found_key.type < type) 4338 break; 4339 } 4340 return 1; 4341 } 4342