1 /* 2 * Copyright (C) 2007,2008 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/slab.h> 21 #include "ctree.h" 22 #include "disk-io.h" 23 #include "transaction.h" 24 #include "print-tree.h" 25 #include "locking.h" 26 27 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 28 *root, struct btrfs_path *path, int level); 29 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 30 *root, struct btrfs_key *ins_key, 31 struct btrfs_path *path, int data_size, int extend); 32 static int push_node_left(struct btrfs_trans_handle *trans, 33 struct btrfs_root *root, struct extent_buffer *dst, 34 struct extent_buffer *src, int empty); 35 static int balance_node_right(struct btrfs_trans_handle *trans, 36 struct btrfs_root *root, 37 struct extent_buffer *dst_buf, 38 struct extent_buffer *src_buf); 39 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 40 struct btrfs_path *path, int level, int slot); 41 static int setup_items_for_insert(struct btrfs_trans_handle *trans, 42 struct btrfs_root *root, struct btrfs_path *path, 43 struct btrfs_key *cpu_key, u32 *data_size, 44 u32 total_data, u32 total_size, int nr); 45 46 47 struct btrfs_path *btrfs_alloc_path(void) 48 { 49 struct btrfs_path *path; 50 path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 51 if (path) 52 path->reada = 1; 53 return path; 54 } 55 56 /* 57 * set all locked nodes in the path to blocking locks. This should 58 * be done before scheduling 59 */ 60 noinline void btrfs_set_path_blocking(struct btrfs_path *p) 61 { 62 int i; 63 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 64 if (p->nodes[i] && p->locks[i]) 65 btrfs_set_lock_blocking(p->nodes[i]); 66 } 67 } 68 69 /* 70 * reset all the locked nodes in the patch to spinning locks. 71 * 72 * held is used to keep lockdep happy, when lockdep is enabled 73 * we set held to a blocking lock before we go around and 74 * retake all the spinlocks in the path. You can safely use NULL 75 * for held 76 */ 77 noinline void btrfs_clear_path_blocking(struct btrfs_path *p, 78 struct extent_buffer *held) 79 { 80 int i; 81 82 #ifdef CONFIG_DEBUG_LOCK_ALLOC 83 /* lockdep really cares that we take all of these spinlocks 84 * in the right order. If any of the locks in the path are not 85 * currently blocking, it is going to complain. So, make really 86 * really sure by forcing the path to blocking before we clear 87 * the path blocking. 88 */ 89 if (held) 90 btrfs_set_lock_blocking(held); 91 btrfs_set_path_blocking(p); 92 #endif 93 94 for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { 95 if (p->nodes[i] && p->locks[i]) 96 btrfs_clear_lock_blocking(p->nodes[i]); 97 } 98 99 #ifdef CONFIG_DEBUG_LOCK_ALLOC 100 if (held) 101 btrfs_clear_lock_blocking(held); 102 #endif 103 } 104 105 /* this also releases the path */ 106 void btrfs_free_path(struct btrfs_path *p) 107 { 108 btrfs_release_path(NULL, p); 109 kmem_cache_free(btrfs_path_cachep, p); 110 } 111 112 /* 113 * path release drops references on the extent buffers in the path 114 * and it drops any locks held by this path 115 * 116 * It is safe to call this on paths that no locks or extent buffers held. 117 */ 118 noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 119 { 120 int i; 121 122 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 123 p->slots[i] = 0; 124 if (!p->nodes[i]) 125 continue; 126 if (p->locks[i]) { 127 btrfs_tree_unlock(p->nodes[i]); 128 p->locks[i] = 0; 129 } 130 free_extent_buffer(p->nodes[i]); 131 p->nodes[i] = NULL; 132 } 133 } 134 135 /* 136 * safely gets a reference on the root node of a tree. A lock 137 * is not taken, so a concurrent writer may put a different node 138 * at the root of the tree. See btrfs_lock_root_node for the 139 * looping required. 140 * 141 * The extent buffer returned by this has a reference taken, so 142 * it won't disappear. It may stop being the root of the tree 143 * at any time because there are no locks held. 144 */ 145 struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 146 { 147 struct extent_buffer *eb; 148 spin_lock(&root->node_lock); 149 eb = root->node; 150 extent_buffer_get(eb); 151 spin_unlock(&root->node_lock); 152 return eb; 153 } 154 155 /* loop around taking references on and locking the root node of the 156 * tree until you end up with a lock on the root. A locked buffer 157 * is returned, with a reference held. 158 */ 159 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 160 { 161 struct extent_buffer *eb; 162 163 while (1) { 164 eb = btrfs_root_node(root); 165 btrfs_tree_lock(eb); 166 167 spin_lock(&root->node_lock); 168 if (eb == root->node) { 169 spin_unlock(&root->node_lock); 170 break; 171 } 172 spin_unlock(&root->node_lock); 173 174 btrfs_tree_unlock(eb); 175 free_extent_buffer(eb); 176 } 177 return eb; 178 } 179 180 /* cowonly root (everything not a reference counted cow subvolume), just get 181 * put onto a simple dirty list. transaction.c walks this to make sure they 182 * get properly updated on disk. 183 */ 184 static void add_root_to_dirty_list(struct btrfs_root *root) 185 { 186 if (root->track_dirty && list_empty(&root->dirty_list)) { 187 list_add(&root->dirty_list, 188 &root->fs_info->dirty_cowonly_roots); 189 } 190 } 191 192 /* 193 * used by snapshot creation to make a copy of a root for a tree with 194 * a given objectid. The buffer with the new root node is returned in 195 * cow_ret, and this func returns zero on success or a negative error code. 196 */ 197 int btrfs_copy_root(struct btrfs_trans_handle *trans, 198 struct btrfs_root *root, 199 struct extent_buffer *buf, 200 struct extent_buffer **cow_ret, u64 new_root_objectid) 201 { 202 struct extent_buffer *cow; 203 int ret = 0; 204 int level; 205 struct btrfs_disk_key disk_key; 206 207 WARN_ON(root->ref_cows && trans->transid != 208 root->fs_info->running_transaction->transid); 209 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 210 211 level = btrfs_header_level(buf); 212 if (level == 0) 213 btrfs_item_key(buf, &disk_key, 0); 214 else 215 btrfs_node_key(buf, &disk_key, 0); 216 217 cow = btrfs_alloc_free_block(trans, root, buf->len, 0, 218 new_root_objectid, &disk_key, level, 219 buf->start, 0); 220 if (IS_ERR(cow)) 221 return PTR_ERR(cow); 222 223 copy_extent_buffer(cow, buf, 0, 0, cow->len); 224 btrfs_set_header_bytenr(cow, cow->start); 225 btrfs_set_header_generation(cow, trans->transid); 226 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 227 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 228 BTRFS_HEADER_FLAG_RELOC); 229 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 230 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 231 else 232 btrfs_set_header_owner(cow, new_root_objectid); 233 234 write_extent_buffer(cow, root->fs_info->fsid, 235 (unsigned long)btrfs_header_fsid(cow), 236 BTRFS_FSID_SIZE); 237 238 WARN_ON(btrfs_header_generation(buf) > trans->transid); 239 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 240 ret = btrfs_inc_ref(trans, root, cow, 1); 241 else 242 ret = btrfs_inc_ref(trans, root, cow, 0); 243 244 if (ret) 245 return ret; 246 247 btrfs_mark_buffer_dirty(cow); 248 *cow_ret = cow; 249 return 0; 250 } 251 252 /* 253 * check if the tree block can be shared by multiple trees 254 */ 255 int btrfs_block_can_be_shared(struct btrfs_root *root, 256 struct extent_buffer *buf) 257 { 258 /* 259 * Tree blocks not in refernece counted trees and tree roots 260 * are never shared. If a block was allocated after the last 261 * snapshot and the block was not allocated by tree relocation, 262 * we know the block is not shared. 263 */ 264 if (root->ref_cows && 265 buf != root->node && buf != root->commit_root && 266 (btrfs_header_generation(buf) <= 267 btrfs_root_last_snapshot(&root->root_item) || 268 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 269 return 1; 270 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 271 if (root->ref_cows && 272 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 273 return 1; 274 #endif 275 return 0; 276 } 277 278 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 279 struct btrfs_root *root, 280 struct extent_buffer *buf, 281 struct extent_buffer *cow, 282 int *last_ref) 283 { 284 u64 refs; 285 u64 owner; 286 u64 flags; 287 u64 new_flags = 0; 288 int ret; 289 290 /* 291 * Backrefs update rules: 292 * 293 * Always use full backrefs for extent pointers in tree block 294 * allocated by tree relocation. 295 * 296 * If a shared tree block is no longer referenced by its owner 297 * tree (btrfs_header_owner(buf) == root->root_key.objectid), 298 * use full backrefs for extent pointers in tree block. 299 * 300 * If a tree block is been relocating 301 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), 302 * use full backrefs for extent pointers in tree block. 303 * The reason for this is some operations (such as drop tree) 304 * are only allowed for blocks use full backrefs. 305 */ 306 307 if (btrfs_block_can_be_shared(root, buf)) { 308 ret = btrfs_lookup_extent_info(trans, root, buf->start, 309 buf->len, &refs, &flags); 310 BUG_ON(ret); 311 BUG_ON(refs == 0); 312 } else { 313 refs = 1; 314 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 315 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 316 flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 317 else 318 flags = 0; 319 } 320 321 owner = btrfs_header_owner(buf); 322 BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && 323 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 324 325 if (refs > 1) { 326 if ((owner == root->root_key.objectid || 327 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 328 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 329 ret = btrfs_inc_ref(trans, root, buf, 1); 330 BUG_ON(ret); 331 332 if (root->root_key.objectid == 333 BTRFS_TREE_RELOC_OBJECTID) { 334 ret = btrfs_dec_ref(trans, root, buf, 0); 335 BUG_ON(ret); 336 ret = btrfs_inc_ref(trans, root, cow, 1); 337 BUG_ON(ret); 338 } 339 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 340 } else { 341 342 if (root->root_key.objectid == 343 BTRFS_TREE_RELOC_OBJECTID) 344 ret = btrfs_inc_ref(trans, root, cow, 1); 345 else 346 ret = btrfs_inc_ref(trans, root, cow, 0); 347 BUG_ON(ret); 348 } 349 if (new_flags != 0) { 350 ret = btrfs_set_disk_extent_flags(trans, root, 351 buf->start, 352 buf->len, 353 new_flags, 0); 354 BUG_ON(ret); 355 } 356 } else { 357 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 358 if (root->root_key.objectid == 359 BTRFS_TREE_RELOC_OBJECTID) 360 ret = btrfs_inc_ref(trans, root, cow, 1); 361 else 362 ret = btrfs_inc_ref(trans, root, cow, 0); 363 BUG_ON(ret); 364 ret = btrfs_dec_ref(trans, root, buf, 1); 365 BUG_ON(ret); 366 } 367 clean_tree_block(trans, root, buf); 368 *last_ref = 1; 369 } 370 return 0; 371 } 372 373 /* 374 * does the dirty work in cow of a single block. The parent block (if 375 * supplied) is updated to point to the new cow copy. The new buffer is marked 376 * dirty and returned locked. If you modify the block it needs to be marked 377 * dirty again. 378 * 379 * search_start -- an allocation hint for the new block 380 * 381 * empty_size -- a hint that you plan on doing more cow. This is the size in 382 * bytes the allocator should try to find free next to the block it returns. 383 * This is just a hint and may be ignored by the allocator. 384 */ 385 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 386 struct btrfs_root *root, 387 struct extent_buffer *buf, 388 struct extent_buffer *parent, int parent_slot, 389 struct extent_buffer **cow_ret, 390 u64 search_start, u64 empty_size) 391 { 392 struct btrfs_disk_key disk_key; 393 struct extent_buffer *cow; 394 int level; 395 int last_ref = 0; 396 int unlock_orig = 0; 397 u64 parent_start; 398 399 if (*cow_ret == buf) 400 unlock_orig = 1; 401 402 btrfs_assert_tree_locked(buf); 403 404 WARN_ON(root->ref_cows && trans->transid != 405 root->fs_info->running_transaction->transid); 406 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 407 408 level = btrfs_header_level(buf); 409 410 if (level == 0) 411 btrfs_item_key(buf, &disk_key, 0); 412 else 413 btrfs_node_key(buf, &disk_key, 0); 414 415 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 416 if (parent) 417 parent_start = parent->start; 418 else 419 parent_start = 0; 420 } else 421 parent_start = 0; 422 423 cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, 424 root->root_key.objectid, &disk_key, 425 level, search_start, empty_size); 426 if (IS_ERR(cow)) 427 return PTR_ERR(cow); 428 429 /* cow is set to blocking by btrfs_init_new_buffer */ 430 431 copy_extent_buffer(cow, buf, 0, 0, cow->len); 432 btrfs_set_header_bytenr(cow, cow->start); 433 btrfs_set_header_generation(cow, trans->transid); 434 btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 435 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 436 BTRFS_HEADER_FLAG_RELOC); 437 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 438 btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 439 else 440 btrfs_set_header_owner(cow, root->root_key.objectid); 441 442 write_extent_buffer(cow, root->fs_info->fsid, 443 (unsigned long)btrfs_header_fsid(cow), 444 BTRFS_FSID_SIZE); 445 446 update_ref_for_cow(trans, root, buf, cow, &last_ref); 447 448 if (root->ref_cows) 449 btrfs_reloc_cow_block(trans, root, buf, cow); 450 451 if (buf == root->node) { 452 WARN_ON(parent && parent != buf); 453 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 454 btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 455 parent_start = buf->start; 456 else 457 parent_start = 0; 458 459 spin_lock(&root->node_lock); 460 root->node = cow; 461 extent_buffer_get(cow); 462 spin_unlock(&root->node_lock); 463 464 btrfs_free_tree_block(trans, root, buf, parent_start, 465 last_ref); 466 free_extent_buffer(buf); 467 add_root_to_dirty_list(root); 468 } else { 469 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 470 parent_start = parent->start; 471 else 472 parent_start = 0; 473 474 WARN_ON(trans->transid != btrfs_header_generation(parent)); 475 btrfs_set_node_blockptr(parent, parent_slot, 476 cow->start); 477 btrfs_set_node_ptr_generation(parent, parent_slot, 478 trans->transid); 479 btrfs_mark_buffer_dirty(parent); 480 btrfs_free_tree_block(trans, root, buf, parent_start, 481 last_ref); 482 } 483 if (unlock_orig) 484 btrfs_tree_unlock(buf); 485 free_extent_buffer(buf); 486 btrfs_mark_buffer_dirty(cow); 487 *cow_ret = cow; 488 return 0; 489 } 490 491 static inline int should_cow_block(struct btrfs_trans_handle *trans, 492 struct btrfs_root *root, 493 struct extent_buffer *buf) 494 { 495 if (btrfs_header_generation(buf) == trans->transid && 496 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 497 !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 498 btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) 499 return 0; 500 return 1; 501 } 502 503 /* 504 * cows a single block, see __btrfs_cow_block for the real work. 505 * This version of it has extra checks so that a block isn't cow'd more than 506 * once per transaction, as long as it hasn't been written yet 507 */ 508 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 509 struct btrfs_root *root, struct extent_buffer *buf, 510 struct extent_buffer *parent, int parent_slot, 511 struct extent_buffer **cow_ret) 512 { 513 u64 search_start; 514 int ret; 515 516 if (trans->transaction != root->fs_info->running_transaction) { 517 printk(KERN_CRIT "trans %llu running %llu\n", 518 (unsigned long long)trans->transid, 519 (unsigned long long) 520 root->fs_info->running_transaction->transid); 521 WARN_ON(1); 522 } 523 if (trans->transid != root->fs_info->generation) { 524 printk(KERN_CRIT "trans %llu running %llu\n", 525 (unsigned long long)trans->transid, 526 (unsigned long long)root->fs_info->generation); 527 WARN_ON(1); 528 } 529 530 if (!should_cow_block(trans, root, buf)) { 531 *cow_ret = buf; 532 return 0; 533 } 534 535 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 536 537 if (parent) 538 btrfs_set_lock_blocking(parent); 539 btrfs_set_lock_blocking(buf); 540 541 ret = __btrfs_cow_block(trans, root, buf, parent, 542 parent_slot, cow_ret, search_start, 0); 543 return ret; 544 } 545 546 /* 547 * helper function for defrag to decide if two blocks pointed to by a 548 * node are actually close by 549 */ 550 static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 551 { 552 if (blocknr < other && other - (blocknr + blocksize) < 32768) 553 return 1; 554 if (blocknr > other && blocknr - (other + blocksize) < 32768) 555 return 1; 556 return 0; 557 } 558 559 /* 560 * compare two keys in a memcmp fashion 561 */ 562 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 563 { 564 struct btrfs_key k1; 565 566 btrfs_disk_key_to_cpu(&k1, disk); 567 568 return btrfs_comp_cpu_keys(&k1, k2); 569 } 570 571 /* 572 * same as comp_keys only with two btrfs_key's 573 */ 574 int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) 575 { 576 if (k1->objectid > k2->objectid) 577 return 1; 578 if (k1->objectid < k2->objectid) 579 return -1; 580 if (k1->type > k2->type) 581 return 1; 582 if (k1->type < k2->type) 583 return -1; 584 if (k1->offset > k2->offset) 585 return 1; 586 if (k1->offset < k2->offset) 587 return -1; 588 return 0; 589 } 590 591 /* 592 * this is used by the defrag code to go through all the 593 * leaves pointed to by a node and reallocate them so that 594 * disk order is close to key order 595 */ 596 int btrfs_realloc_node(struct btrfs_trans_handle *trans, 597 struct btrfs_root *root, struct extent_buffer *parent, 598 int start_slot, int cache_only, u64 *last_ret, 599 struct btrfs_key *progress) 600 { 601 struct extent_buffer *cur; 602 u64 blocknr; 603 u64 gen; 604 u64 search_start = *last_ret; 605 u64 last_block = 0; 606 u64 other; 607 u32 parent_nritems; 608 int end_slot; 609 int i; 610 int err = 0; 611 int parent_level; 612 int uptodate; 613 u32 blocksize; 614 int progress_passed = 0; 615 struct btrfs_disk_key disk_key; 616 617 parent_level = btrfs_header_level(parent); 618 if (cache_only && parent_level != 1) 619 return 0; 620 621 if (trans->transaction != root->fs_info->running_transaction) 622 WARN_ON(1); 623 if (trans->transid != root->fs_info->generation) 624 WARN_ON(1); 625 626 parent_nritems = btrfs_header_nritems(parent); 627 blocksize = btrfs_level_size(root, parent_level - 1); 628 end_slot = parent_nritems; 629 630 if (parent_nritems == 1) 631 return 0; 632 633 btrfs_set_lock_blocking(parent); 634 635 for (i = start_slot; i < end_slot; i++) { 636 int close = 1; 637 638 if (!parent->map_token) { 639 map_extent_buffer(parent, 640 btrfs_node_key_ptr_offset(i), 641 sizeof(struct btrfs_key_ptr), 642 &parent->map_token, &parent->kaddr, 643 &parent->map_start, &parent->map_len, 644 KM_USER1); 645 } 646 btrfs_node_key(parent, &disk_key, i); 647 if (!progress_passed && comp_keys(&disk_key, progress) < 0) 648 continue; 649 650 progress_passed = 1; 651 blocknr = btrfs_node_blockptr(parent, i); 652 gen = btrfs_node_ptr_generation(parent, i); 653 if (last_block == 0) 654 last_block = blocknr; 655 656 if (i > 0) { 657 other = btrfs_node_blockptr(parent, i - 1); 658 close = close_blocks(blocknr, other, blocksize); 659 } 660 if (!close && i < end_slot - 2) { 661 other = btrfs_node_blockptr(parent, i + 1); 662 close = close_blocks(blocknr, other, blocksize); 663 } 664 if (close) { 665 last_block = blocknr; 666 continue; 667 } 668 if (parent->map_token) { 669 unmap_extent_buffer(parent, parent->map_token, 670 KM_USER1); 671 parent->map_token = NULL; 672 } 673 674 cur = btrfs_find_tree_block(root, blocknr, blocksize); 675 if (cur) 676 uptodate = btrfs_buffer_uptodate(cur, gen); 677 else 678 uptodate = 0; 679 if (!cur || !uptodate) { 680 if (cache_only) { 681 free_extent_buffer(cur); 682 continue; 683 } 684 if (!cur) { 685 cur = read_tree_block(root, blocknr, 686 blocksize, gen); 687 } else if (!uptodate) { 688 btrfs_read_buffer(cur, gen); 689 } 690 } 691 if (search_start == 0) 692 search_start = last_block; 693 694 btrfs_tree_lock(cur); 695 btrfs_set_lock_blocking(cur); 696 err = __btrfs_cow_block(trans, root, cur, parent, i, 697 &cur, search_start, 698 min(16 * blocksize, 699 (end_slot - i) * blocksize)); 700 if (err) { 701 btrfs_tree_unlock(cur); 702 free_extent_buffer(cur); 703 break; 704 } 705 search_start = cur->start; 706 last_block = cur->start; 707 *last_ret = search_start; 708 btrfs_tree_unlock(cur); 709 free_extent_buffer(cur); 710 } 711 if (parent->map_token) { 712 unmap_extent_buffer(parent, parent->map_token, 713 KM_USER1); 714 parent->map_token = NULL; 715 } 716 return err; 717 } 718 719 /* 720 * The leaf data grows from end-to-front in the node. 721 * this returns the address of the start of the last item, 722 * which is the stop of the leaf data stack 723 */ 724 static inline unsigned int leaf_data_end(struct btrfs_root *root, 725 struct extent_buffer *leaf) 726 { 727 u32 nr = btrfs_header_nritems(leaf); 728 if (nr == 0) 729 return BTRFS_LEAF_DATA_SIZE(root); 730 return btrfs_item_offset_nr(leaf, nr - 1); 731 } 732 733 /* 734 * extra debugging checks to make sure all the items in a key are 735 * well formed and in the proper order 736 */ 737 static int check_node(struct btrfs_root *root, struct btrfs_path *path, 738 int level) 739 { 740 struct extent_buffer *parent = NULL; 741 struct extent_buffer *node = path->nodes[level]; 742 struct btrfs_disk_key parent_key; 743 struct btrfs_disk_key node_key; 744 int parent_slot; 745 int slot; 746 struct btrfs_key cpukey; 747 u32 nritems = btrfs_header_nritems(node); 748 749 if (path->nodes[level + 1]) 750 parent = path->nodes[level + 1]; 751 752 slot = path->slots[level]; 753 BUG_ON(nritems == 0); 754 if (parent) { 755 parent_slot = path->slots[level + 1]; 756 btrfs_node_key(parent, &parent_key, parent_slot); 757 btrfs_node_key(node, &node_key, 0); 758 BUG_ON(memcmp(&parent_key, &node_key, 759 sizeof(struct btrfs_disk_key))); 760 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 761 btrfs_header_bytenr(node)); 762 } 763 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); 764 if (slot != 0) { 765 btrfs_node_key_to_cpu(node, &cpukey, slot - 1); 766 btrfs_node_key(node, &node_key, slot); 767 BUG_ON(comp_keys(&node_key, &cpukey) <= 0); 768 } 769 if (slot < nritems - 1) { 770 btrfs_node_key_to_cpu(node, &cpukey, slot + 1); 771 btrfs_node_key(node, &node_key, slot); 772 BUG_ON(comp_keys(&node_key, &cpukey) >= 0); 773 } 774 return 0; 775 } 776 777 /* 778 * extra checking to make sure all the items in a leaf are 779 * well formed and in the proper order 780 */ 781 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, 782 int level) 783 { 784 struct extent_buffer *leaf = path->nodes[level]; 785 struct extent_buffer *parent = NULL; 786 int parent_slot; 787 struct btrfs_key cpukey; 788 struct btrfs_disk_key parent_key; 789 struct btrfs_disk_key leaf_key; 790 int slot = path->slots[0]; 791 792 u32 nritems = btrfs_header_nritems(leaf); 793 794 if (path->nodes[level + 1]) 795 parent = path->nodes[level + 1]; 796 797 if (nritems == 0) 798 return 0; 799 800 if (parent) { 801 parent_slot = path->slots[level + 1]; 802 btrfs_node_key(parent, &parent_key, parent_slot); 803 btrfs_item_key(leaf, &leaf_key, 0); 804 805 BUG_ON(memcmp(&parent_key, &leaf_key, 806 sizeof(struct btrfs_disk_key))); 807 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 808 btrfs_header_bytenr(leaf)); 809 } 810 if (slot != 0 && slot < nritems - 1) { 811 btrfs_item_key(leaf, &leaf_key, slot); 812 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); 813 if (comp_keys(&leaf_key, &cpukey) <= 0) { 814 btrfs_print_leaf(root, leaf); 815 printk(KERN_CRIT "slot %d offset bad key\n", slot); 816 BUG_ON(1); 817 } 818 if (btrfs_item_offset_nr(leaf, slot - 1) != 819 btrfs_item_end_nr(leaf, slot)) { 820 btrfs_print_leaf(root, leaf); 821 printk(KERN_CRIT "slot %d offset bad\n", slot); 822 BUG_ON(1); 823 } 824 } 825 if (slot < nritems - 1) { 826 btrfs_item_key(leaf, &leaf_key, slot); 827 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); 828 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); 829 if (btrfs_item_offset_nr(leaf, slot) != 830 btrfs_item_end_nr(leaf, slot + 1)) { 831 btrfs_print_leaf(root, leaf); 832 printk(KERN_CRIT "slot %d offset bad\n", slot); 833 BUG_ON(1); 834 } 835 } 836 BUG_ON(btrfs_item_offset_nr(leaf, 0) + 837 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); 838 return 0; 839 } 840 841 static noinline int check_block(struct btrfs_root *root, 842 struct btrfs_path *path, int level) 843 { 844 return 0; 845 if (level == 0) 846 return check_leaf(root, path, level); 847 return check_node(root, path, level); 848 } 849 850 /* 851 * search for key in the extent_buffer. The items start at offset p, 852 * and they are item_size apart. There are 'max' items in p. 853 * 854 * the slot in the array is returned via slot, and it points to 855 * the place where you would insert key if it is not found in 856 * the array. 857 * 858 * slot may point to max if the key is bigger than all of the keys 859 */ 860 static noinline int generic_bin_search(struct extent_buffer *eb, 861 unsigned long p, 862 int item_size, struct btrfs_key *key, 863 int max, int *slot) 864 { 865 int low = 0; 866 int high = max; 867 int mid; 868 int ret; 869 struct btrfs_disk_key *tmp = NULL; 870 struct btrfs_disk_key unaligned; 871 unsigned long offset; 872 char *map_token = NULL; 873 char *kaddr = NULL; 874 unsigned long map_start = 0; 875 unsigned long map_len = 0; 876 int err; 877 878 while (low < high) { 879 mid = (low + high) / 2; 880 offset = p + mid * item_size; 881 882 if (!map_token || offset < map_start || 883 (offset + sizeof(struct btrfs_disk_key)) > 884 map_start + map_len) { 885 if (map_token) { 886 unmap_extent_buffer(eb, map_token, KM_USER0); 887 map_token = NULL; 888 } 889 890 err = map_private_extent_buffer(eb, offset, 891 sizeof(struct btrfs_disk_key), 892 &map_token, &kaddr, 893 &map_start, &map_len, KM_USER0); 894 895 if (!err) { 896 tmp = (struct btrfs_disk_key *)(kaddr + offset - 897 map_start); 898 } else { 899 read_extent_buffer(eb, &unaligned, 900 offset, sizeof(unaligned)); 901 tmp = &unaligned; 902 } 903 904 } else { 905 tmp = (struct btrfs_disk_key *)(kaddr + offset - 906 map_start); 907 } 908 ret = comp_keys(tmp, key); 909 910 if (ret < 0) 911 low = mid + 1; 912 else if (ret > 0) 913 high = mid; 914 else { 915 *slot = mid; 916 if (map_token) 917 unmap_extent_buffer(eb, map_token, KM_USER0); 918 return 0; 919 } 920 } 921 *slot = low; 922 if (map_token) 923 unmap_extent_buffer(eb, map_token, KM_USER0); 924 return 1; 925 } 926 927 /* 928 * simple bin_search frontend that does the right thing for 929 * leaves vs nodes 930 */ 931 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 932 int level, int *slot) 933 { 934 if (level == 0) { 935 return generic_bin_search(eb, 936 offsetof(struct btrfs_leaf, items), 937 sizeof(struct btrfs_item), 938 key, btrfs_header_nritems(eb), 939 slot); 940 } else { 941 return generic_bin_search(eb, 942 offsetof(struct btrfs_node, ptrs), 943 sizeof(struct btrfs_key_ptr), 944 key, btrfs_header_nritems(eb), 945 slot); 946 } 947 return -1; 948 } 949 950 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 951 int level, int *slot) 952 { 953 return bin_search(eb, key, level, slot); 954 } 955 956 static void root_add_used(struct btrfs_root *root, u32 size) 957 { 958 spin_lock(&root->accounting_lock); 959 btrfs_set_root_used(&root->root_item, 960 btrfs_root_used(&root->root_item) + size); 961 spin_unlock(&root->accounting_lock); 962 } 963 964 static void root_sub_used(struct btrfs_root *root, u32 size) 965 { 966 spin_lock(&root->accounting_lock); 967 btrfs_set_root_used(&root->root_item, 968 btrfs_root_used(&root->root_item) - size); 969 spin_unlock(&root->accounting_lock); 970 } 971 972 /* given a node and slot number, this reads the blocks it points to. The 973 * extent buffer is returned with a reference taken (but unlocked). 974 * NULL is returned on error. 975 */ 976 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root, 977 struct extent_buffer *parent, int slot) 978 { 979 int level = btrfs_header_level(parent); 980 if (slot < 0) 981 return NULL; 982 if (slot >= btrfs_header_nritems(parent)) 983 return NULL; 984 985 BUG_ON(level == 0); 986 987 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 988 btrfs_level_size(root, level - 1), 989 btrfs_node_ptr_generation(parent, slot)); 990 } 991 992 /* 993 * node level balancing, used to make sure nodes are in proper order for 994 * item deletion. We balance from the top down, so we have to make sure 995 * that a deletion won't leave an node completely empty later on. 996 */ 997 static noinline int balance_level(struct btrfs_trans_handle *trans, 998 struct btrfs_root *root, 999 struct btrfs_path *path, int level) 1000 { 1001 struct extent_buffer *right = NULL; 1002 struct extent_buffer *mid; 1003 struct extent_buffer *left = NULL; 1004 struct extent_buffer *parent = NULL; 1005 int ret = 0; 1006 int wret; 1007 int pslot; 1008 int orig_slot = path->slots[level]; 1009 u64 orig_ptr; 1010 1011 if (level == 0) 1012 return 0; 1013 1014 mid = path->nodes[level]; 1015 1016 WARN_ON(!path->locks[level]); 1017 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1018 1019 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 1020 1021 if (level < BTRFS_MAX_LEVEL - 1) 1022 parent = path->nodes[level + 1]; 1023 pslot = path->slots[level + 1]; 1024 1025 /* 1026 * deal with the case where there is only one pointer in the root 1027 * by promoting the node below to a root 1028 */ 1029 if (!parent) { 1030 struct extent_buffer *child; 1031 1032 if (btrfs_header_nritems(mid) != 1) 1033 return 0; 1034 1035 /* promote the child to a root */ 1036 child = read_node_slot(root, mid, 0); 1037 BUG_ON(!child); 1038 btrfs_tree_lock(child); 1039 btrfs_set_lock_blocking(child); 1040 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 1041 if (ret) { 1042 btrfs_tree_unlock(child); 1043 free_extent_buffer(child); 1044 goto enospc; 1045 } 1046 1047 spin_lock(&root->node_lock); 1048 root->node = child; 1049 spin_unlock(&root->node_lock); 1050 1051 add_root_to_dirty_list(root); 1052 btrfs_tree_unlock(child); 1053 1054 path->locks[level] = 0; 1055 path->nodes[level] = NULL; 1056 clean_tree_block(trans, root, mid); 1057 btrfs_tree_unlock(mid); 1058 /* once for the path */ 1059 free_extent_buffer(mid); 1060 1061 root_sub_used(root, mid->len); 1062 btrfs_free_tree_block(trans, root, mid, 0, 1); 1063 /* once for the root ptr */ 1064 free_extent_buffer(mid); 1065 return 0; 1066 } 1067 if (btrfs_header_nritems(mid) > 1068 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1069 return 0; 1070 1071 btrfs_header_nritems(mid); 1072 1073 left = read_node_slot(root, parent, pslot - 1); 1074 if (left) { 1075 btrfs_tree_lock(left); 1076 btrfs_set_lock_blocking(left); 1077 wret = btrfs_cow_block(trans, root, left, 1078 parent, pslot - 1, &left); 1079 if (wret) { 1080 ret = wret; 1081 goto enospc; 1082 } 1083 } 1084 right = read_node_slot(root, parent, pslot + 1); 1085 if (right) { 1086 btrfs_tree_lock(right); 1087 btrfs_set_lock_blocking(right); 1088 wret = btrfs_cow_block(trans, root, right, 1089 parent, pslot + 1, &right); 1090 if (wret) { 1091 ret = wret; 1092 goto enospc; 1093 } 1094 } 1095 1096 /* first, try to make some room in the middle buffer */ 1097 if (left) { 1098 orig_slot += btrfs_header_nritems(left); 1099 wret = push_node_left(trans, root, left, mid, 1); 1100 if (wret < 0) 1101 ret = wret; 1102 btrfs_header_nritems(mid); 1103 } 1104 1105 /* 1106 * then try to empty the right most buffer into the middle 1107 */ 1108 if (right) { 1109 wret = push_node_left(trans, root, mid, right, 1); 1110 if (wret < 0 && wret != -ENOSPC) 1111 ret = wret; 1112 if (btrfs_header_nritems(right) == 0) { 1113 clean_tree_block(trans, root, right); 1114 btrfs_tree_unlock(right); 1115 wret = del_ptr(trans, root, path, level + 1, pslot + 1116 1); 1117 if (wret) 1118 ret = wret; 1119 root_sub_used(root, right->len); 1120 btrfs_free_tree_block(trans, root, right, 0, 1); 1121 free_extent_buffer(right); 1122 right = NULL; 1123 } else { 1124 struct btrfs_disk_key right_key; 1125 btrfs_node_key(right, &right_key, 0); 1126 btrfs_set_node_key(parent, &right_key, pslot + 1); 1127 btrfs_mark_buffer_dirty(parent); 1128 } 1129 } 1130 if (btrfs_header_nritems(mid) == 1) { 1131 /* 1132 * we're not allowed to leave a node with one item in the 1133 * tree during a delete. A deletion from lower in the tree 1134 * could try to delete the only pointer in this node. 1135 * So, pull some keys from the left. 1136 * There has to be a left pointer at this point because 1137 * otherwise we would have pulled some pointers from the 1138 * right 1139 */ 1140 BUG_ON(!left); 1141 wret = balance_node_right(trans, root, mid, left); 1142 if (wret < 0) { 1143 ret = wret; 1144 goto enospc; 1145 } 1146 if (wret == 1) { 1147 wret = push_node_left(trans, root, left, mid, 1); 1148 if (wret < 0) 1149 ret = wret; 1150 } 1151 BUG_ON(wret == 1); 1152 } 1153 if (btrfs_header_nritems(mid) == 0) { 1154 clean_tree_block(trans, root, mid); 1155 btrfs_tree_unlock(mid); 1156 wret = del_ptr(trans, root, path, level + 1, pslot); 1157 if (wret) 1158 ret = wret; 1159 root_sub_used(root, mid->len); 1160 btrfs_free_tree_block(trans, root, mid, 0, 1); 1161 free_extent_buffer(mid); 1162 mid = NULL; 1163 } else { 1164 /* update the parent key to reflect our changes */ 1165 struct btrfs_disk_key mid_key; 1166 btrfs_node_key(mid, &mid_key, 0); 1167 btrfs_set_node_key(parent, &mid_key, pslot); 1168 btrfs_mark_buffer_dirty(parent); 1169 } 1170 1171 /* update the path */ 1172 if (left) { 1173 if (btrfs_header_nritems(left) > orig_slot) { 1174 extent_buffer_get(left); 1175 /* left was locked after cow */ 1176 path->nodes[level] = left; 1177 path->slots[level + 1] -= 1; 1178 path->slots[level] = orig_slot; 1179 if (mid) { 1180 btrfs_tree_unlock(mid); 1181 free_extent_buffer(mid); 1182 } 1183 } else { 1184 orig_slot -= btrfs_header_nritems(left); 1185 path->slots[level] = orig_slot; 1186 } 1187 } 1188 /* double check we haven't messed things up */ 1189 check_block(root, path, level); 1190 if (orig_ptr != 1191 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 1192 BUG(); 1193 enospc: 1194 if (right) { 1195 btrfs_tree_unlock(right); 1196 free_extent_buffer(right); 1197 } 1198 if (left) { 1199 if (path->nodes[level] != left) 1200 btrfs_tree_unlock(left); 1201 free_extent_buffer(left); 1202 } 1203 return ret; 1204 } 1205 1206 /* Node balancing for insertion. Here we only split or push nodes around 1207 * when they are completely full. This is also done top down, so we 1208 * have to be pessimistic. 1209 */ 1210 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 1211 struct btrfs_root *root, 1212 struct btrfs_path *path, int level) 1213 { 1214 struct extent_buffer *right = NULL; 1215 struct extent_buffer *mid; 1216 struct extent_buffer *left = NULL; 1217 struct extent_buffer *parent = NULL; 1218 int ret = 0; 1219 int wret; 1220 int pslot; 1221 int orig_slot = path->slots[level]; 1222 1223 if (level == 0) 1224 return 1; 1225 1226 mid = path->nodes[level]; 1227 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1228 1229 if (level < BTRFS_MAX_LEVEL - 1) 1230 parent = path->nodes[level + 1]; 1231 pslot = path->slots[level + 1]; 1232 1233 if (!parent) 1234 return 1; 1235 1236 left = read_node_slot(root, parent, pslot - 1); 1237 1238 /* first, try to make some room in the middle buffer */ 1239 if (left) { 1240 u32 left_nr; 1241 1242 btrfs_tree_lock(left); 1243 btrfs_set_lock_blocking(left); 1244 1245 left_nr = btrfs_header_nritems(left); 1246 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1247 wret = 1; 1248 } else { 1249 ret = btrfs_cow_block(trans, root, left, parent, 1250 pslot - 1, &left); 1251 if (ret) 1252 wret = 1; 1253 else { 1254 wret = push_node_left(trans, root, 1255 left, mid, 0); 1256 } 1257 } 1258 if (wret < 0) 1259 ret = wret; 1260 if (wret == 0) { 1261 struct btrfs_disk_key disk_key; 1262 orig_slot += left_nr; 1263 btrfs_node_key(mid, &disk_key, 0); 1264 btrfs_set_node_key(parent, &disk_key, pslot); 1265 btrfs_mark_buffer_dirty(parent); 1266 if (btrfs_header_nritems(left) > orig_slot) { 1267 path->nodes[level] = left; 1268 path->slots[level + 1] -= 1; 1269 path->slots[level] = orig_slot; 1270 btrfs_tree_unlock(mid); 1271 free_extent_buffer(mid); 1272 } else { 1273 orig_slot -= 1274 btrfs_header_nritems(left); 1275 path->slots[level] = orig_slot; 1276 btrfs_tree_unlock(left); 1277 free_extent_buffer(left); 1278 } 1279 return 0; 1280 } 1281 btrfs_tree_unlock(left); 1282 free_extent_buffer(left); 1283 } 1284 right = read_node_slot(root, parent, pslot + 1); 1285 1286 /* 1287 * then try to empty the right most buffer into the middle 1288 */ 1289 if (right) { 1290 u32 right_nr; 1291 1292 btrfs_tree_lock(right); 1293 btrfs_set_lock_blocking(right); 1294 1295 right_nr = btrfs_header_nritems(right); 1296 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1297 wret = 1; 1298 } else { 1299 ret = btrfs_cow_block(trans, root, right, 1300 parent, pslot + 1, 1301 &right); 1302 if (ret) 1303 wret = 1; 1304 else { 1305 wret = balance_node_right(trans, root, 1306 right, mid); 1307 } 1308 } 1309 if (wret < 0) 1310 ret = wret; 1311 if (wret == 0) { 1312 struct btrfs_disk_key disk_key; 1313 1314 btrfs_node_key(right, &disk_key, 0); 1315 btrfs_set_node_key(parent, &disk_key, pslot + 1); 1316 btrfs_mark_buffer_dirty(parent); 1317 1318 if (btrfs_header_nritems(mid) <= orig_slot) { 1319 path->nodes[level] = right; 1320 path->slots[level + 1] += 1; 1321 path->slots[level] = orig_slot - 1322 btrfs_header_nritems(mid); 1323 btrfs_tree_unlock(mid); 1324 free_extent_buffer(mid); 1325 } else { 1326 btrfs_tree_unlock(right); 1327 free_extent_buffer(right); 1328 } 1329 return 0; 1330 } 1331 btrfs_tree_unlock(right); 1332 free_extent_buffer(right); 1333 } 1334 return 1; 1335 } 1336 1337 /* 1338 * readahead one full node of leaves, finding things that are close 1339 * to the block in 'slot', and triggering ra on them. 1340 */ 1341 static void reada_for_search(struct btrfs_root *root, 1342 struct btrfs_path *path, 1343 int level, int slot, u64 objectid) 1344 { 1345 struct extent_buffer *node; 1346 struct btrfs_disk_key disk_key; 1347 u32 nritems; 1348 u64 search; 1349 u64 target; 1350 u64 nread = 0; 1351 int direction = path->reada; 1352 struct extent_buffer *eb; 1353 u32 nr; 1354 u32 blocksize; 1355 u32 nscan = 0; 1356 1357 if (level != 1) 1358 return; 1359 1360 if (!path->nodes[level]) 1361 return; 1362 1363 node = path->nodes[level]; 1364 1365 search = btrfs_node_blockptr(node, slot); 1366 blocksize = btrfs_level_size(root, level - 1); 1367 eb = btrfs_find_tree_block(root, search, blocksize); 1368 if (eb) { 1369 free_extent_buffer(eb); 1370 return; 1371 } 1372 1373 target = search; 1374 1375 nritems = btrfs_header_nritems(node); 1376 nr = slot; 1377 while (1) { 1378 if (direction < 0) { 1379 if (nr == 0) 1380 break; 1381 nr--; 1382 } else if (direction > 0) { 1383 nr++; 1384 if (nr >= nritems) 1385 break; 1386 } 1387 if (path->reada < 0 && objectid) { 1388 btrfs_node_key(node, &disk_key, nr); 1389 if (btrfs_disk_key_objectid(&disk_key) != objectid) 1390 break; 1391 } 1392 search = btrfs_node_blockptr(node, nr); 1393 if ((search <= target && target - search <= 65536) || 1394 (search > target && search - target <= 65536)) { 1395 readahead_tree_block(root, search, blocksize, 1396 btrfs_node_ptr_generation(node, nr)); 1397 nread += blocksize; 1398 } 1399 nscan++; 1400 if ((nread > 65536 || nscan > 32)) 1401 break; 1402 } 1403 } 1404 1405 /* 1406 * returns -EAGAIN if it had to drop the path, or zero if everything was in 1407 * cache 1408 */ 1409 static noinline int reada_for_balance(struct btrfs_root *root, 1410 struct btrfs_path *path, int level) 1411 { 1412 int slot; 1413 int nritems; 1414 struct extent_buffer *parent; 1415 struct extent_buffer *eb; 1416 u64 gen; 1417 u64 block1 = 0; 1418 u64 block2 = 0; 1419 int ret = 0; 1420 int blocksize; 1421 1422 parent = path->nodes[level + 1]; 1423 if (!parent) 1424 return 0; 1425 1426 nritems = btrfs_header_nritems(parent); 1427 slot = path->slots[level + 1]; 1428 blocksize = btrfs_level_size(root, level); 1429 1430 if (slot > 0) { 1431 block1 = btrfs_node_blockptr(parent, slot - 1); 1432 gen = btrfs_node_ptr_generation(parent, slot - 1); 1433 eb = btrfs_find_tree_block(root, block1, blocksize); 1434 if (eb && btrfs_buffer_uptodate(eb, gen)) 1435 block1 = 0; 1436 free_extent_buffer(eb); 1437 } 1438 if (slot + 1 < nritems) { 1439 block2 = btrfs_node_blockptr(parent, slot + 1); 1440 gen = btrfs_node_ptr_generation(parent, slot + 1); 1441 eb = btrfs_find_tree_block(root, block2, blocksize); 1442 if (eb && btrfs_buffer_uptodate(eb, gen)) 1443 block2 = 0; 1444 free_extent_buffer(eb); 1445 } 1446 if (block1 || block2) { 1447 ret = -EAGAIN; 1448 1449 /* release the whole path */ 1450 btrfs_release_path(root, path); 1451 1452 /* read the blocks */ 1453 if (block1) 1454 readahead_tree_block(root, block1, blocksize, 0); 1455 if (block2) 1456 readahead_tree_block(root, block2, blocksize, 0); 1457 1458 if (block1) { 1459 eb = read_tree_block(root, block1, blocksize, 0); 1460 free_extent_buffer(eb); 1461 } 1462 if (block2) { 1463 eb = read_tree_block(root, block2, blocksize, 0); 1464 free_extent_buffer(eb); 1465 } 1466 } 1467 return ret; 1468 } 1469 1470 1471 /* 1472 * when we walk down the tree, it is usually safe to unlock the higher layers 1473 * in the tree. The exceptions are when our path goes through slot 0, because 1474 * operations on the tree might require changing key pointers higher up in the 1475 * tree. 1476 * 1477 * callers might also have set path->keep_locks, which tells this code to keep 1478 * the lock if the path points to the last slot in the block. This is part of 1479 * walking through the tree, and selecting the next slot in the higher block. 1480 * 1481 * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 1482 * if lowest_unlock is 1, level 0 won't be unlocked 1483 */ 1484 static noinline void unlock_up(struct btrfs_path *path, int level, 1485 int lowest_unlock) 1486 { 1487 int i; 1488 int skip_level = level; 1489 int no_skips = 0; 1490 struct extent_buffer *t; 1491 1492 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1493 if (!path->nodes[i]) 1494 break; 1495 if (!path->locks[i]) 1496 break; 1497 if (!no_skips && path->slots[i] == 0) { 1498 skip_level = i + 1; 1499 continue; 1500 } 1501 if (!no_skips && path->keep_locks) { 1502 u32 nritems; 1503 t = path->nodes[i]; 1504 nritems = btrfs_header_nritems(t); 1505 if (nritems < 1 || path->slots[i] >= nritems - 1) { 1506 skip_level = i + 1; 1507 continue; 1508 } 1509 } 1510 if (skip_level < i && i >= lowest_unlock) 1511 no_skips = 1; 1512 1513 t = path->nodes[i]; 1514 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1515 btrfs_tree_unlock(t); 1516 path->locks[i] = 0; 1517 } 1518 } 1519 } 1520 1521 /* 1522 * This releases any locks held in the path starting at level and 1523 * going all the way up to the root. 1524 * 1525 * btrfs_search_slot will keep the lock held on higher nodes in a few 1526 * corner cases, such as COW of the block at slot zero in the node. This 1527 * ignores those rules, and it should only be called when there are no 1528 * more updates to be done higher up in the tree. 1529 */ 1530 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 1531 { 1532 int i; 1533 1534 if (path->keep_locks) 1535 return; 1536 1537 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1538 if (!path->nodes[i]) 1539 continue; 1540 if (!path->locks[i]) 1541 continue; 1542 btrfs_tree_unlock(path->nodes[i]); 1543 path->locks[i] = 0; 1544 } 1545 } 1546 1547 /* 1548 * helper function for btrfs_search_slot. The goal is to find a block 1549 * in cache without setting the path to blocking. If we find the block 1550 * we return zero and the path is unchanged. 1551 * 1552 * If we can't find the block, we set the path blocking and do some 1553 * reada. -EAGAIN is returned and the search must be repeated. 1554 */ 1555 static int 1556 read_block_for_search(struct btrfs_trans_handle *trans, 1557 struct btrfs_root *root, struct btrfs_path *p, 1558 struct extent_buffer **eb_ret, int level, int slot, 1559 struct btrfs_key *key) 1560 { 1561 u64 blocknr; 1562 u64 gen; 1563 u32 blocksize; 1564 struct extent_buffer *b = *eb_ret; 1565 struct extent_buffer *tmp; 1566 int ret; 1567 1568 blocknr = btrfs_node_blockptr(b, slot); 1569 gen = btrfs_node_ptr_generation(b, slot); 1570 blocksize = btrfs_level_size(root, level - 1); 1571 1572 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1573 if (tmp) { 1574 if (btrfs_buffer_uptodate(tmp, 0)) { 1575 if (btrfs_buffer_uptodate(tmp, gen)) { 1576 /* 1577 * we found an up to date block without 1578 * sleeping, return 1579 * right away 1580 */ 1581 *eb_ret = tmp; 1582 return 0; 1583 } 1584 /* the pages were up to date, but we failed 1585 * the generation number check. Do a full 1586 * read for the generation number that is correct. 1587 * We must do this without dropping locks so 1588 * we can trust our generation number 1589 */ 1590 free_extent_buffer(tmp); 1591 tmp = read_tree_block(root, blocknr, blocksize, gen); 1592 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1593 *eb_ret = tmp; 1594 return 0; 1595 } 1596 free_extent_buffer(tmp); 1597 btrfs_release_path(NULL, p); 1598 return -EIO; 1599 } 1600 } 1601 1602 /* 1603 * reduce lock contention at high levels 1604 * of the btree by dropping locks before 1605 * we read. Don't release the lock on the current 1606 * level because we need to walk this node to figure 1607 * out which blocks to read. 1608 */ 1609 btrfs_unlock_up_safe(p, level + 1); 1610 btrfs_set_path_blocking(p); 1611 1612 free_extent_buffer(tmp); 1613 if (p->reada) 1614 reada_for_search(root, p, level, slot, key->objectid); 1615 1616 btrfs_release_path(NULL, p); 1617 1618 ret = -EAGAIN; 1619 tmp = read_tree_block(root, blocknr, blocksize, 0); 1620 if (tmp) { 1621 /* 1622 * If the read above didn't mark this buffer up to date, 1623 * it will never end up being up to date. Set ret to EIO now 1624 * and give up so that our caller doesn't loop forever 1625 * on our EAGAINs. 1626 */ 1627 if (!btrfs_buffer_uptodate(tmp, 0)) 1628 ret = -EIO; 1629 free_extent_buffer(tmp); 1630 } 1631 return ret; 1632 } 1633 1634 /* 1635 * helper function for btrfs_search_slot. This does all of the checks 1636 * for node-level blocks and does any balancing required based on 1637 * the ins_len. 1638 * 1639 * If no extra work was required, zero is returned. If we had to 1640 * drop the path, -EAGAIN is returned and btrfs_search_slot must 1641 * start over 1642 */ 1643 static int 1644 setup_nodes_for_search(struct btrfs_trans_handle *trans, 1645 struct btrfs_root *root, struct btrfs_path *p, 1646 struct extent_buffer *b, int level, int ins_len) 1647 { 1648 int ret; 1649 if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 1650 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1651 int sret; 1652 1653 sret = reada_for_balance(root, p, level); 1654 if (sret) 1655 goto again; 1656 1657 btrfs_set_path_blocking(p); 1658 sret = split_node(trans, root, p, level); 1659 btrfs_clear_path_blocking(p, NULL); 1660 1661 BUG_ON(sret > 0); 1662 if (sret) { 1663 ret = sret; 1664 goto done; 1665 } 1666 b = p->nodes[level]; 1667 } else if (ins_len < 0 && btrfs_header_nritems(b) < 1668 BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { 1669 int sret; 1670 1671 sret = reada_for_balance(root, p, level); 1672 if (sret) 1673 goto again; 1674 1675 btrfs_set_path_blocking(p); 1676 sret = balance_level(trans, root, p, level); 1677 btrfs_clear_path_blocking(p, NULL); 1678 1679 if (sret) { 1680 ret = sret; 1681 goto done; 1682 } 1683 b = p->nodes[level]; 1684 if (!b) { 1685 btrfs_release_path(NULL, p); 1686 goto again; 1687 } 1688 BUG_ON(btrfs_header_nritems(b) == 1); 1689 } 1690 return 0; 1691 1692 again: 1693 ret = -EAGAIN; 1694 done: 1695 return ret; 1696 } 1697 1698 /* 1699 * look for key in the tree. path is filled in with nodes along the way 1700 * if key is found, we return zero and you can find the item in the leaf 1701 * level of the path (level 0) 1702 * 1703 * If the key isn't found, the path points to the slot where it should 1704 * be inserted, and 1 is returned. If there are other errors during the 1705 * search a negative error number is returned. 1706 * 1707 * if ins_len > 0, nodes and leaves will be split as we walk down the 1708 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 1709 * possible) 1710 */ 1711 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1712 *root, struct btrfs_key *key, struct btrfs_path *p, int 1713 ins_len, int cow) 1714 { 1715 struct extent_buffer *b; 1716 int slot; 1717 int ret; 1718 int err; 1719 int level; 1720 int lowest_unlock = 1; 1721 u8 lowest_level = 0; 1722 1723 lowest_level = p->lowest_level; 1724 WARN_ON(lowest_level && ins_len > 0); 1725 WARN_ON(p->nodes[0] != NULL); 1726 1727 if (ins_len < 0) 1728 lowest_unlock = 2; 1729 1730 again: 1731 if (p->search_commit_root) { 1732 b = root->commit_root; 1733 extent_buffer_get(b); 1734 if (!p->skip_locking) 1735 btrfs_tree_lock(b); 1736 } else { 1737 if (p->skip_locking) 1738 b = btrfs_root_node(root); 1739 else 1740 b = btrfs_lock_root_node(root); 1741 } 1742 1743 while (b) { 1744 level = btrfs_header_level(b); 1745 1746 /* 1747 * setup the path here so we can release it under lock 1748 * contention with the cow code 1749 */ 1750 p->nodes[level] = b; 1751 if (!p->skip_locking) 1752 p->locks[level] = 1; 1753 1754 if (cow) { 1755 /* 1756 * if we don't really need to cow this block 1757 * then we don't want to set the path blocking, 1758 * so we test it here 1759 */ 1760 if (!should_cow_block(trans, root, b)) 1761 goto cow_done; 1762 1763 btrfs_set_path_blocking(p); 1764 1765 err = btrfs_cow_block(trans, root, b, 1766 p->nodes[level + 1], 1767 p->slots[level + 1], &b); 1768 if (err) { 1769 ret = err; 1770 goto done; 1771 } 1772 } 1773 cow_done: 1774 BUG_ON(!cow && ins_len); 1775 if (level != btrfs_header_level(b)) 1776 WARN_ON(1); 1777 level = btrfs_header_level(b); 1778 1779 p->nodes[level] = b; 1780 if (!p->skip_locking) 1781 p->locks[level] = 1; 1782 1783 btrfs_clear_path_blocking(p, NULL); 1784 1785 /* 1786 * we have a lock on b and as long as we aren't changing 1787 * the tree, there is no way to for the items in b to change. 1788 * It is safe to drop the lock on our parent before we 1789 * go through the expensive btree search on b. 1790 * 1791 * If cow is true, then we might be changing slot zero, 1792 * which may require changing the parent. So, we can't 1793 * drop the lock until after we know which slot we're 1794 * operating on. 1795 */ 1796 if (!cow) 1797 btrfs_unlock_up_safe(p, level + 1); 1798 1799 ret = check_block(root, p, level); 1800 if (ret) { 1801 ret = -1; 1802 goto done; 1803 } 1804 1805 ret = bin_search(b, key, level, &slot); 1806 1807 if (level != 0) { 1808 int dec = 0; 1809 if (ret && slot > 0) { 1810 dec = 1; 1811 slot -= 1; 1812 } 1813 p->slots[level] = slot; 1814 err = setup_nodes_for_search(trans, root, p, b, level, 1815 ins_len); 1816 if (err == -EAGAIN) 1817 goto again; 1818 if (err) { 1819 ret = err; 1820 goto done; 1821 } 1822 b = p->nodes[level]; 1823 slot = p->slots[level]; 1824 1825 unlock_up(p, level, lowest_unlock); 1826 1827 if (level == lowest_level) { 1828 if (dec) 1829 p->slots[level]++; 1830 goto done; 1831 } 1832 1833 err = read_block_for_search(trans, root, p, 1834 &b, level, slot, key); 1835 if (err == -EAGAIN) 1836 goto again; 1837 if (err) { 1838 ret = err; 1839 goto done; 1840 } 1841 1842 if (!p->skip_locking) { 1843 btrfs_clear_path_blocking(p, NULL); 1844 err = btrfs_try_spin_lock(b); 1845 1846 if (!err) { 1847 btrfs_set_path_blocking(p); 1848 btrfs_tree_lock(b); 1849 btrfs_clear_path_blocking(p, b); 1850 } 1851 } 1852 } else { 1853 p->slots[level] = slot; 1854 if (ins_len > 0 && 1855 btrfs_leaf_free_space(root, b) < ins_len) { 1856 btrfs_set_path_blocking(p); 1857 err = split_leaf(trans, root, key, 1858 p, ins_len, ret == 0); 1859 btrfs_clear_path_blocking(p, NULL); 1860 1861 BUG_ON(err > 0); 1862 if (err) { 1863 ret = err; 1864 goto done; 1865 } 1866 } 1867 if (!p->search_for_split) 1868 unlock_up(p, level, lowest_unlock); 1869 goto done; 1870 } 1871 } 1872 ret = 1; 1873 done: 1874 /* 1875 * we don't really know what they plan on doing with the path 1876 * from here on, so for now just mark it as blocking 1877 */ 1878 if (!p->leave_spinning) 1879 btrfs_set_path_blocking(p); 1880 if (ret < 0) 1881 btrfs_release_path(root, p); 1882 return ret; 1883 } 1884 1885 /* 1886 * adjust the pointers going up the tree, starting at level 1887 * making sure the right key of each node is points to 'key'. 1888 * This is used after shifting pointers to the left, so it stops 1889 * fixing up pointers when a given leaf/node is not in slot 0 of the 1890 * higher levels 1891 * 1892 * If this fails to write a tree block, it returns -1, but continues 1893 * fixing up the blocks in ram so the tree is consistent. 1894 */ 1895 static int fixup_low_keys(struct btrfs_trans_handle *trans, 1896 struct btrfs_root *root, struct btrfs_path *path, 1897 struct btrfs_disk_key *key, int level) 1898 { 1899 int i; 1900 int ret = 0; 1901 struct extent_buffer *t; 1902 1903 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1904 int tslot = path->slots[i]; 1905 if (!path->nodes[i]) 1906 break; 1907 t = path->nodes[i]; 1908 btrfs_set_node_key(t, key, tslot); 1909 btrfs_mark_buffer_dirty(path->nodes[i]); 1910 if (tslot != 0) 1911 break; 1912 } 1913 return ret; 1914 } 1915 1916 /* 1917 * update item key. 1918 * 1919 * This function isn't completely safe. It's the caller's responsibility 1920 * that the new key won't break the order 1921 */ 1922 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 1923 struct btrfs_root *root, struct btrfs_path *path, 1924 struct btrfs_key *new_key) 1925 { 1926 struct btrfs_disk_key disk_key; 1927 struct extent_buffer *eb; 1928 int slot; 1929 1930 eb = path->nodes[0]; 1931 slot = path->slots[0]; 1932 if (slot > 0) { 1933 btrfs_item_key(eb, &disk_key, slot - 1); 1934 if (comp_keys(&disk_key, new_key) >= 0) 1935 return -1; 1936 } 1937 if (slot < btrfs_header_nritems(eb) - 1) { 1938 btrfs_item_key(eb, &disk_key, slot + 1); 1939 if (comp_keys(&disk_key, new_key) <= 0) 1940 return -1; 1941 } 1942 1943 btrfs_cpu_key_to_disk(&disk_key, new_key); 1944 btrfs_set_item_key(eb, &disk_key, slot); 1945 btrfs_mark_buffer_dirty(eb); 1946 if (slot == 0) 1947 fixup_low_keys(trans, root, path, &disk_key, 1); 1948 return 0; 1949 } 1950 1951 /* 1952 * try to push data from one node into the next node left in the 1953 * tree. 1954 * 1955 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 1956 * error, and > 0 if there was no room in the left hand block. 1957 */ 1958 static int push_node_left(struct btrfs_trans_handle *trans, 1959 struct btrfs_root *root, struct extent_buffer *dst, 1960 struct extent_buffer *src, int empty) 1961 { 1962 int push_items = 0; 1963 int src_nritems; 1964 int dst_nritems; 1965 int ret = 0; 1966 1967 src_nritems = btrfs_header_nritems(src); 1968 dst_nritems = btrfs_header_nritems(dst); 1969 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1970 WARN_ON(btrfs_header_generation(src) != trans->transid); 1971 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1972 1973 if (!empty && src_nritems <= 8) 1974 return 1; 1975 1976 if (push_items <= 0) 1977 return 1; 1978 1979 if (empty) { 1980 push_items = min(src_nritems, push_items); 1981 if (push_items < src_nritems) { 1982 /* leave at least 8 pointers in the node if 1983 * we aren't going to empty it 1984 */ 1985 if (src_nritems - push_items < 8) { 1986 if (push_items <= 8) 1987 return 1; 1988 push_items -= 8; 1989 } 1990 } 1991 } else 1992 push_items = min(src_nritems - 8, push_items); 1993 1994 copy_extent_buffer(dst, src, 1995 btrfs_node_key_ptr_offset(dst_nritems), 1996 btrfs_node_key_ptr_offset(0), 1997 push_items * sizeof(struct btrfs_key_ptr)); 1998 1999 if (push_items < src_nritems) { 2000 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 2001 btrfs_node_key_ptr_offset(push_items), 2002 (src_nritems - push_items) * 2003 sizeof(struct btrfs_key_ptr)); 2004 } 2005 btrfs_set_header_nritems(src, src_nritems - push_items); 2006 btrfs_set_header_nritems(dst, dst_nritems + push_items); 2007 btrfs_mark_buffer_dirty(src); 2008 btrfs_mark_buffer_dirty(dst); 2009 2010 return ret; 2011 } 2012 2013 /* 2014 * try to push data from one node into the next node right in the 2015 * tree. 2016 * 2017 * returns 0 if some ptrs were pushed, < 0 if there was some horrible 2018 * error, and > 0 if there was no room in the right hand block. 2019 * 2020 * this will only push up to 1/2 the contents of the left node over 2021 */ 2022 static int balance_node_right(struct btrfs_trans_handle *trans, 2023 struct btrfs_root *root, 2024 struct extent_buffer *dst, 2025 struct extent_buffer *src) 2026 { 2027 int push_items = 0; 2028 int max_push; 2029 int src_nritems; 2030 int dst_nritems; 2031 int ret = 0; 2032 2033 WARN_ON(btrfs_header_generation(src) != trans->transid); 2034 WARN_ON(btrfs_header_generation(dst) != trans->transid); 2035 2036 src_nritems = btrfs_header_nritems(src); 2037 dst_nritems = btrfs_header_nritems(dst); 2038 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 2039 if (push_items <= 0) 2040 return 1; 2041 2042 if (src_nritems < 4) 2043 return 1; 2044 2045 max_push = src_nritems / 2 + 1; 2046 /* don't try to empty the node */ 2047 if (max_push >= src_nritems) 2048 return 1; 2049 2050 if (max_push < push_items) 2051 push_items = max_push; 2052 2053 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 2054 btrfs_node_key_ptr_offset(0), 2055 (dst_nritems) * 2056 sizeof(struct btrfs_key_ptr)); 2057 2058 copy_extent_buffer(dst, src, 2059 btrfs_node_key_ptr_offset(0), 2060 btrfs_node_key_ptr_offset(src_nritems - push_items), 2061 push_items * sizeof(struct btrfs_key_ptr)); 2062 2063 btrfs_set_header_nritems(src, src_nritems - push_items); 2064 btrfs_set_header_nritems(dst, dst_nritems + push_items); 2065 2066 btrfs_mark_buffer_dirty(src); 2067 btrfs_mark_buffer_dirty(dst); 2068 2069 return ret; 2070 } 2071 2072 /* 2073 * helper function to insert a new root level in the tree. 2074 * A new node is allocated, and a single item is inserted to 2075 * point to the existing root 2076 * 2077 * returns zero on success or < 0 on failure. 2078 */ 2079 static noinline int insert_new_root(struct btrfs_trans_handle *trans, 2080 struct btrfs_root *root, 2081 struct btrfs_path *path, int level) 2082 { 2083 u64 lower_gen; 2084 struct extent_buffer *lower; 2085 struct extent_buffer *c; 2086 struct extent_buffer *old; 2087 struct btrfs_disk_key lower_key; 2088 2089 BUG_ON(path->nodes[level]); 2090 BUG_ON(path->nodes[level-1] != root->node); 2091 2092 lower = path->nodes[level-1]; 2093 if (level == 1) 2094 btrfs_item_key(lower, &lower_key, 0); 2095 else 2096 btrfs_node_key(lower, &lower_key, 0); 2097 2098 c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2099 root->root_key.objectid, &lower_key, 2100 level, root->node->start, 0); 2101 if (IS_ERR(c)) 2102 return PTR_ERR(c); 2103 2104 root_add_used(root, root->nodesize); 2105 2106 memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); 2107 btrfs_set_header_nritems(c, 1); 2108 btrfs_set_header_level(c, level); 2109 btrfs_set_header_bytenr(c, c->start); 2110 btrfs_set_header_generation(c, trans->transid); 2111 btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV); 2112 btrfs_set_header_owner(c, root->root_key.objectid); 2113 2114 write_extent_buffer(c, root->fs_info->fsid, 2115 (unsigned long)btrfs_header_fsid(c), 2116 BTRFS_FSID_SIZE); 2117 2118 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 2119 (unsigned long)btrfs_header_chunk_tree_uuid(c), 2120 BTRFS_UUID_SIZE); 2121 2122 btrfs_set_node_key(c, &lower_key, 0); 2123 btrfs_set_node_blockptr(c, 0, lower->start); 2124 lower_gen = btrfs_header_generation(lower); 2125 WARN_ON(lower_gen != trans->transid); 2126 2127 btrfs_set_node_ptr_generation(c, 0, lower_gen); 2128 2129 btrfs_mark_buffer_dirty(c); 2130 2131 spin_lock(&root->node_lock); 2132 old = root->node; 2133 root->node = c; 2134 spin_unlock(&root->node_lock); 2135 2136 /* the super has an extra ref to root->node */ 2137 free_extent_buffer(old); 2138 2139 add_root_to_dirty_list(root); 2140 extent_buffer_get(c); 2141 path->nodes[level] = c; 2142 path->locks[level] = 1; 2143 path->slots[level] = 0; 2144 return 0; 2145 } 2146 2147 /* 2148 * worker function to insert a single pointer in a node. 2149 * the node should have enough room for the pointer already 2150 * 2151 * slot and level indicate where you want the key to go, and 2152 * blocknr is the block the key points to. 2153 * 2154 * returns zero on success and < 0 on any error 2155 */ 2156 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 2157 *root, struct btrfs_path *path, struct btrfs_disk_key 2158 *key, u64 bytenr, int slot, int level) 2159 { 2160 struct extent_buffer *lower; 2161 int nritems; 2162 2163 BUG_ON(!path->nodes[level]); 2164 btrfs_assert_tree_locked(path->nodes[level]); 2165 lower = path->nodes[level]; 2166 nritems = btrfs_header_nritems(lower); 2167 BUG_ON(slot > nritems); 2168 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 2169 BUG(); 2170 if (slot != nritems) { 2171 memmove_extent_buffer(lower, 2172 btrfs_node_key_ptr_offset(slot + 1), 2173 btrfs_node_key_ptr_offset(slot), 2174 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 2175 } 2176 btrfs_set_node_key(lower, key, slot); 2177 btrfs_set_node_blockptr(lower, slot, bytenr); 2178 WARN_ON(trans->transid == 0); 2179 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 2180 btrfs_set_header_nritems(lower, nritems + 1); 2181 btrfs_mark_buffer_dirty(lower); 2182 return 0; 2183 } 2184 2185 /* 2186 * split the node at the specified level in path in two. 2187 * The path is corrected to point to the appropriate node after the split 2188 * 2189 * Before splitting this tries to make some room in the node by pushing 2190 * left and right, if either one works, it returns right away. 2191 * 2192 * returns 0 on success and < 0 on failure 2193 */ 2194 static noinline int split_node(struct btrfs_trans_handle *trans, 2195 struct btrfs_root *root, 2196 struct btrfs_path *path, int level) 2197 { 2198 struct extent_buffer *c; 2199 struct extent_buffer *split; 2200 struct btrfs_disk_key disk_key; 2201 int mid; 2202 int ret; 2203 int wret; 2204 u32 c_nritems; 2205 2206 c = path->nodes[level]; 2207 WARN_ON(btrfs_header_generation(c) != trans->transid); 2208 if (c == root->node) { 2209 /* trying to split the root, lets make a new one */ 2210 ret = insert_new_root(trans, root, path, level + 1); 2211 if (ret) 2212 return ret; 2213 } else { 2214 ret = push_nodes_for_insert(trans, root, path, level); 2215 c = path->nodes[level]; 2216 if (!ret && btrfs_header_nritems(c) < 2217 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) 2218 return 0; 2219 if (ret < 0) 2220 return ret; 2221 } 2222 2223 c_nritems = btrfs_header_nritems(c); 2224 mid = (c_nritems + 1) / 2; 2225 btrfs_node_key(c, &disk_key, mid); 2226 2227 split = btrfs_alloc_free_block(trans, root, root->nodesize, 0, 2228 root->root_key.objectid, 2229 &disk_key, level, c->start, 0); 2230 if (IS_ERR(split)) 2231 return PTR_ERR(split); 2232 2233 root_add_used(root, root->nodesize); 2234 2235 memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); 2236 btrfs_set_header_level(split, btrfs_header_level(c)); 2237 btrfs_set_header_bytenr(split, split->start); 2238 btrfs_set_header_generation(split, trans->transid); 2239 btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV); 2240 btrfs_set_header_owner(split, root->root_key.objectid); 2241 write_extent_buffer(split, root->fs_info->fsid, 2242 (unsigned long)btrfs_header_fsid(split), 2243 BTRFS_FSID_SIZE); 2244 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 2245 (unsigned long)btrfs_header_chunk_tree_uuid(split), 2246 BTRFS_UUID_SIZE); 2247 2248 2249 copy_extent_buffer(split, c, 2250 btrfs_node_key_ptr_offset(0), 2251 btrfs_node_key_ptr_offset(mid), 2252 (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 2253 btrfs_set_header_nritems(split, c_nritems - mid); 2254 btrfs_set_header_nritems(c, mid); 2255 ret = 0; 2256 2257 btrfs_mark_buffer_dirty(c); 2258 btrfs_mark_buffer_dirty(split); 2259 2260 wret = insert_ptr(trans, root, path, &disk_key, split->start, 2261 path->slots[level + 1] + 1, 2262 level + 1); 2263 if (wret) 2264 ret = wret; 2265 2266 if (path->slots[level] >= mid) { 2267 path->slots[level] -= mid; 2268 btrfs_tree_unlock(c); 2269 free_extent_buffer(c); 2270 path->nodes[level] = split; 2271 path->slots[level + 1] += 1; 2272 } else { 2273 btrfs_tree_unlock(split); 2274 free_extent_buffer(split); 2275 } 2276 return ret; 2277 } 2278 2279 /* 2280 * how many bytes are required to store the items in a leaf. start 2281 * and nr indicate which items in the leaf to check. This totals up the 2282 * space used both by the item structs and the item data 2283 */ 2284 static int leaf_space_used(struct extent_buffer *l, int start, int nr) 2285 { 2286 int data_len; 2287 int nritems = btrfs_header_nritems(l); 2288 int end = min(nritems, start + nr) - 1; 2289 2290 if (!nr) 2291 return 0; 2292 data_len = btrfs_item_end_nr(l, start); 2293 data_len = data_len - btrfs_item_offset_nr(l, end); 2294 data_len += sizeof(struct btrfs_item) * nr; 2295 WARN_ON(data_len < 0); 2296 return data_len; 2297 } 2298 2299 /* 2300 * The space between the end of the leaf items and 2301 * the start of the leaf data. IOW, how much room 2302 * the leaf has left for both items and data 2303 */ 2304 noinline int btrfs_leaf_free_space(struct btrfs_root *root, 2305 struct extent_buffer *leaf) 2306 { 2307 int nritems = btrfs_header_nritems(leaf); 2308 int ret; 2309 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 2310 if (ret < 0) { 2311 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, " 2312 "used %d nritems %d\n", 2313 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 2314 leaf_space_used(leaf, 0, nritems), nritems); 2315 } 2316 return ret; 2317 } 2318 2319 /* 2320 * min slot controls the lowest index we're willing to push to the 2321 * right. We'll push up to and including min_slot, but no lower 2322 */ 2323 static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 2324 struct btrfs_root *root, 2325 struct btrfs_path *path, 2326 int data_size, int empty, 2327 struct extent_buffer *right, 2328 int free_space, u32 left_nritems, 2329 u32 min_slot) 2330 { 2331 struct extent_buffer *left = path->nodes[0]; 2332 struct extent_buffer *upper = path->nodes[1]; 2333 struct btrfs_disk_key disk_key; 2334 int slot; 2335 u32 i; 2336 int push_space = 0; 2337 int push_items = 0; 2338 struct btrfs_item *item; 2339 u32 nr; 2340 u32 right_nritems; 2341 u32 data_end; 2342 u32 this_item_size; 2343 2344 if (empty) 2345 nr = 0; 2346 else 2347 nr = max_t(u32, 1, min_slot); 2348 2349 if (path->slots[0] >= left_nritems) 2350 push_space += data_size; 2351 2352 slot = path->slots[1]; 2353 i = left_nritems - 1; 2354 while (i >= nr) { 2355 item = btrfs_item_nr(left, i); 2356 2357 if (!empty && push_items > 0) { 2358 if (path->slots[0] > i) 2359 break; 2360 if (path->slots[0] == i) { 2361 int space = btrfs_leaf_free_space(root, left); 2362 if (space + push_space * 2 > free_space) 2363 break; 2364 } 2365 } 2366 2367 if (path->slots[0] == i) 2368 push_space += data_size; 2369 2370 if (!left->map_token) { 2371 map_extent_buffer(left, (unsigned long)item, 2372 sizeof(struct btrfs_item), 2373 &left->map_token, &left->kaddr, 2374 &left->map_start, &left->map_len, 2375 KM_USER1); 2376 } 2377 2378 this_item_size = btrfs_item_size(left, item); 2379 if (this_item_size + sizeof(*item) + push_space > free_space) 2380 break; 2381 2382 push_items++; 2383 push_space += this_item_size + sizeof(*item); 2384 if (i == 0) 2385 break; 2386 i--; 2387 } 2388 if (left->map_token) { 2389 unmap_extent_buffer(left, left->map_token, KM_USER1); 2390 left->map_token = NULL; 2391 } 2392 2393 if (push_items == 0) 2394 goto out_unlock; 2395 2396 if (!empty && push_items == left_nritems) 2397 WARN_ON(1); 2398 2399 /* push left to right */ 2400 right_nritems = btrfs_header_nritems(right); 2401 2402 push_space = btrfs_item_end_nr(left, left_nritems - push_items); 2403 push_space -= leaf_data_end(root, left); 2404 2405 /* make room in the right data area */ 2406 data_end = leaf_data_end(root, right); 2407 memmove_extent_buffer(right, 2408 btrfs_leaf_data(right) + data_end - push_space, 2409 btrfs_leaf_data(right) + data_end, 2410 BTRFS_LEAF_DATA_SIZE(root) - data_end); 2411 2412 /* copy from the left data area */ 2413 copy_extent_buffer(right, left, btrfs_leaf_data(right) + 2414 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2415 btrfs_leaf_data(left) + leaf_data_end(root, left), 2416 push_space); 2417 2418 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 2419 btrfs_item_nr_offset(0), 2420 right_nritems * sizeof(struct btrfs_item)); 2421 2422 /* copy the items from left to right */ 2423 copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 2424 btrfs_item_nr_offset(left_nritems - push_items), 2425 push_items * sizeof(struct btrfs_item)); 2426 2427 /* update the item pointers */ 2428 right_nritems += push_items; 2429 btrfs_set_header_nritems(right, right_nritems); 2430 push_space = BTRFS_LEAF_DATA_SIZE(root); 2431 for (i = 0; i < right_nritems; i++) { 2432 item = btrfs_item_nr(right, i); 2433 if (!right->map_token) { 2434 map_extent_buffer(right, (unsigned long)item, 2435 sizeof(struct btrfs_item), 2436 &right->map_token, &right->kaddr, 2437 &right->map_start, &right->map_len, 2438 KM_USER1); 2439 } 2440 push_space -= btrfs_item_size(right, item); 2441 btrfs_set_item_offset(right, item, push_space); 2442 } 2443 2444 if (right->map_token) { 2445 unmap_extent_buffer(right, right->map_token, KM_USER1); 2446 right->map_token = NULL; 2447 } 2448 left_nritems -= push_items; 2449 btrfs_set_header_nritems(left, left_nritems); 2450 2451 if (left_nritems) 2452 btrfs_mark_buffer_dirty(left); 2453 else 2454 clean_tree_block(trans, root, left); 2455 2456 btrfs_mark_buffer_dirty(right); 2457 2458 btrfs_item_key(right, &disk_key, 0); 2459 btrfs_set_node_key(upper, &disk_key, slot + 1); 2460 btrfs_mark_buffer_dirty(upper); 2461 2462 /* then fixup the leaf pointer in the path */ 2463 if (path->slots[0] >= left_nritems) { 2464 path->slots[0] -= left_nritems; 2465 if (btrfs_header_nritems(path->nodes[0]) == 0) 2466 clean_tree_block(trans, root, path->nodes[0]); 2467 btrfs_tree_unlock(path->nodes[0]); 2468 free_extent_buffer(path->nodes[0]); 2469 path->nodes[0] = right; 2470 path->slots[1] += 1; 2471 } else { 2472 btrfs_tree_unlock(right); 2473 free_extent_buffer(right); 2474 } 2475 return 0; 2476 2477 out_unlock: 2478 btrfs_tree_unlock(right); 2479 free_extent_buffer(right); 2480 return 1; 2481 } 2482 2483 /* 2484 * push some data in the path leaf to the right, trying to free up at 2485 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2486 * 2487 * returns 1 if the push failed because the other node didn't have enough 2488 * room, 0 if everything worked out and < 0 if there were major errors. 2489 * 2490 * this will push starting from min_slot to the end of the leaf. It won't 2491 * push any slot lower than min_slot 2492 */ 2493 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 2494 *root, struct btrfs_path *path, 2495 int min_data_size, int data_size, 2496 int empty, u32 min_slot) 2497 { 2498 struct extent_buffer *left = path->nodes[0]; 2499 struct extent_buffer *right; 2500 struct extent_buffer *upper; 2501 int slot; 2502 int free_space; 2503 u32 left_nritems; 2504 int ret; 2505 2506 if (!path->nodes[1]) 2507 return 1; 2508 2509 slot = path->slots[1]; 2510 upper = path->nodes[1]; 2511 if (slot >= btrfs_header_nritems(upper) - 1) 2512 return 1; 2513 2514 btrfs_assert_tree_locked(path->nodes[1]); 2515 2516 right = read_node_slot(root, upper, slot + 1); 2517 btrfs_tree_lock(right); 2518 btrfs_set_lock_blocking(right); 2519 2520 free_space = btrfs_leaf_free_space(root, right); 2521 if (free_space < data_size) 2522 goto out_unlock; 2523 2524 /* cow and double check */ 2525 ret = btrfs_cow_block(trans, root, right, upper, 2526 slot + 1, &right); 2527 if (ret) 2528 goto out_unlock; 2529 2530 free_space = btrfs_leaf_free_space(root, right); 2531 if (free_space < data_size) 2532 goto out_unlock; 2533 2534 left_nritems = btrfs_header_nritems(left); 2535 if (left_nritems == 0) 2536 goto out_unlock; 2537 2538 return __push_leaf_right(trans, root, path, min_data_size, empty, 2539 right, free_space, left_nritems, min_slot); 2540 out_unlock: 2541 btrfs_tree_unlock(right); 2542 free_extent_buffer(right); 2543 return 1; 2544 } 2545 2546 /* 2547 * push some data in the path leaf to the left, trying to free up at 2548 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2549 * 2550 * max_slot can put a limit on how far into the leaf we'll push items. The 2551 * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the 2552 * items 2553 */ 2554 static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 2555 struct btrfs_root *root, 2556 struct btrfs_path *path, int data_size, 2557 int empty, struct extent_buffer *left, 2558 int free_space, u32 right_nritems, 2559 u32 max_slot) 2560 { 2561 struct btrfs_disk_key disk_key; 2562 struct extent_buffer *right = path->nodes[0]; 2563 int i; 2564 int push_space = 0; 2565 int push_items = 0; 2566 struct btrfs_item *item; 2567 u32 old_left_nritems; 2568 u32 nr; 2569 int ret = 0; 2570 int wret; 2571 u32 this_item_size; 2572 u32 old_left_item_size; 2573 2574 if (empty) 2575 nr = min(right_nritems, max_slot); 2576 else 2577 nr = min(right_nritems - 1, max_slot); 2578 2579 for (i = 0; i < nr; i++) { 2580 item = btrfs_item_nr(right, i); 2581 if (!right->map_token) { 2582 map_extent_buffer(right, (unsigned long)item, 2583 sizeof(struct btrfs_item), 2584 &right->map_token, &right->kaddr, 2585 &right->map_start, &right->map_len, 2586 KM_USER1); 2587 } 2588 2589 if (!empty && push_items > 0) { 2590 if (path->slots[0] < i) 2591 break; 2592 if (path->slots[0] == i) { 2593 int space = btrfs_leaf_free_space(root, right); 2594 if (space + push_space * 2 > free_space) 2595 break; 2596 } 2597 } 2598 2599 if (path->slots[0] == i) 2600 push_space += data_size; 2601 2602 this_item_size = btrfs_item_size(right, item); 2603 if (this_item_size + sizeof(*item) + push_space > free_space) 2604 break; 2605 2606 push_items++; 2607 push_space += this_item_size + sizeof(*item); 2608 } 2609 2610 if (right->map_token) { 2611 unmap_extent_buffer(right, right->map_token, KM_USER1); 2612 right->map_token = NULL; 2613 } 2614 2615 if (push_items == 0) { 2616 ret = 1; 2617 goto out; 2618 } 2619 if (!empty && push_items == btrfs_header_nritems(right)) 2620 WARN_ON(1); 2621 2622 /* push data from right to left */ 2623 copy_extent_buffer(left, right, 2624 btrfs_item_nr_offset(btrfs_header_nritems(left)), 2625 btrfs_item_nr_offset(0), 2626 push_items * sizeof(struct btrfs_item)); 2627 2628 push_space = BTRFS_LEAF_DATA_SIZE(root) - 2629 btrfs_item_offset_nr(right, push_items - 1); 2630 2631 copy_extent_buffer(left, right, btrfs_leaf_data(left) + 2632 leaf_data_end(root, left) - push_space, 2633 btrfs_leaf_data(right) + 2634 btrfs_item_offset_nr(right, push_items - 1), 2635 push_space); 2636 old_left_nritems = btrfs_header_nritems(left); 2637 BUG_ON(old_left_nritems <= 0); 2638 2639 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 2640 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 2641 u32 ioff; 2642 2643 item = btrfs_item_nr(left, i); 2644 if (!left->map_token) { 2645 map_extent_buffer(left, (unsigned long)item, 2646 sizeof(struct btrfs_item), 2647 &left->map_token, &left->kaddr, 2648 &left->map_start, &left->map_len, 2649 KM_USER1); 2650 } 2651 2652 ioff = btrfs_item_offset(left, item); 2653 btrfs_set_item_offset(left, item, 2654 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2655 } 2656 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2657 if (left->map_token) { 2658 unmap_extent_buffer(left, left->map_token, KM_USER1); 2659 left->map_token = NULL; 2660 } 2661 2662 /* fixup right node */ 2663 if (push_items > right_nritems) { 2664 printk(KERN_CRIT "push items %d nr %u\n", push_items, 2665 right_nritems); 2666 WARN_ON(1); 2667 } 2668 2669 if (push_items < right_nritems) { 2670 push_space = btrfs_item_offset_nr(right, push_items - 1) - 2671 leaf_data_end(root, right); 2672 memmove_extent_buffer(right, btrfs_leaf_data(right) + 2673 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2674 btrfs_leaf_data(right) + 2675 leaf_data_end(root, right), push_space); 2676 2677 memmove_extent_buffer(right, btrfs_item_nr_offset(0), 2678 btrfs_item_nr_offset(push_items), 2679 (btrfs_header_nritems(right) - push_items) * 2680 sizeof(struct btrfs_item)); 2681 } 2682 right_nritems -= push_items; 2683 btrfs_set_header_nritems(right, right_nritems); 2684 push_space = BTRFS_LEAF_DATA_SIZE(root); 2685 for (i = 0; i < right_nritems; i++) { 2686 item = btrfs_item_nr(right, i); 2687 2688 if (!right->map_token) { 2689 map_extent_buffer(right, (unsigned long)item, 2690 sizeof(struct btrfs_item), 2691 &right->map_token, &right->kaddr, 2692 &right->map_start, &right->map_len, 2693 KM_USER1); 2694 } 2695 2696 push_space = push_space - btrfs_item_size(right, item); 2697 btrfs_set_item_offset(right, item, push_space); 2698 } 2699 if (right->map_token) { 2700 unmap_extent_buffer(right, right->map_token, KM_USER1); 2701 right->map_token = NULL; 2702 } 2703 2704 btrfs_mark_buffer_dirty(left); 2705 if (right_nritems) 2706 btrfs_mark_buffer_dirty(right); 2707 else 2708 clean_tree_block(trans, root, right); 2709 2710 btrfs_item_key(right, &disk_key, 0); 2711 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2712 if (wret) 2713 ret = wret; 2714 2715 /* then fixup the leaf pointer in the path */ 2716 if (path->slots[0] < push_items) { 2717 path->slots[0] += old_left_nritems; 2718 btrfs_tree_unlock(path->nodes[0]); 2719 free_extent_buffer(path->nodes[0]); 2720 path->nodes[0] = left; 2721 path->slots[1] -= 1; 2722 } else { 2723 btrfs_tree_unlock(left); 2724 free_extent_buffer(left); 2725 path->slots[0] -= push_items; 2726 } 2727 BUG_ON(path->slots[0] < 0); 2728 return ret; 2729 out: 2730 btrfs_tree_unlock(left); 2731 free_extent_buffer(left); 2732 return ret; 2733 } 2734 2735 /* 2736 * push some data in the path leaf to the left, trying to free up at 2737 * least data_size bytes. returns zero if the push worked, nonzero otherwise 2738 * 2739 * max_slot can put a limit on how far into the leaf we'll push items. The 2740 * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the 2741 * items 2742 */ 2743 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 2744 *root, struct btrfs_path *path, int min_data_size, 2745 int data_size, int empty, u32 max_slot) 2746 { 2747 struct extent_buffer *right = path->nodes[0]; 2748 struct extent_buffer *left; 2749 int slot; 2750 int free_space; 2751 u32 right_nritems; 2752 int ret = 0; 2753 2754 slot = path->slots[1]; 2755 if (slot == 0) 2756 return 1; 2757 if (!path->nodes[1]) 2758 return 1; 2759 2760 right_nritems = btrfs_header_nritems(right); 2761 if (right_nritems == 0) 2762 return 1; 2763 2764 btrfs_assert_tree_locked(path->nodes[1]); 2765 2766 left = read_node_slot(root, path->nodes[1], slot - 1); 2767 btrfs_tree_lock(left); 2768 btrfs_set_lock_blocking(left); 2769 2770 free_space = btrfs_leaf_free_space(root, left); 2771 if (free_space < data_size) { 2772 ret = 1; 2773 goto out; 2774 } 2775 2776 /* cow and double check */ 2777 ret = btrfs_cow_block(trans, root, left, 2778 path->nodes[1], slot - 1, &left); 2779 if (ret) { 2780 /* we hit -ENOSPC, but it isn't fatal here */ 2781 ret = 1; 2782 goto out; 2783 } 2784 2785 free_space = btrfs_leaf_free_space(root, left); 2786 if (free_space < data_size) { 2787 ret = 1; 2788 goto out; 2789 } 2790 2791 return __push_leaf_left(trans, root, path, min_data_size, 2792 empty, left, free_space, right_nritems, 2793 max_slot); 2794 out: 2795 btrfs_tree_unlock(left); 2796 free_extent_buffer(left); 2797 return ret; 2798 } 2799 2800 /* 2801 * split the path's leaf in two, making sure there is at least data_size 2802 * available for the resulting leaf level of the path. 2803 * 2804 * returns 0 if all went well and < 0 on failure. 2805 */ 2806 static noinline int copy_for_split(struct btrfs_trans_handle *trans, 2807 struct btrfs_root *root, 2808 struct btrfs_path *path, 2809 struct extent_buffer *l, 2810 struct extent_buffer *right, 2811 int slot, int mid, int nritems) 2812 { 2813 int data_copy_size; 2814 int rt_data_off; 2815 int i; 2816 int ret = 0; 2817 int wret; 2818 struct btrfs_disk_key disk_key; 2819 2820 nritems = nritems - mid; 2821 btrfs_set_header_nritems(right, nritems); 2822 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); 2823 2824 copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 2825 btrfs_item_nr_offset(mid), 2826 nritems * sizeof(struct btrfs_item)); 2827 2828 copy_extent_buffer(right, l, 2829 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 2830 data_copy_size, btrfs_leaf_data(l) + 2831 leaf_data_end(root, l), data_copy_size); 2832 2833 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 2834 btrfs_item_end_nr(l, mid); 2835 2836 for (i = 0; i < nritems; i++) { 2837 struct btrfs_item *item = btrfs_item_nr(right, i); 2838 u32 ioff; 2839 2840 if (!right->map_token) { 2841 map_extent_buffer(right, (unsigned long)item, 2842 sizeof(struct btrfs_item), 2843 &right->map_token, &right->kaddr, 2844 &right->map_start, &right->map_len, 2845 KM_USER1); 2846 } 2847 2848 ioff = btrfs_item_offset(right, item); 2849 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2850 } 2851 2852 if (right->map_token) { 2853 unmap_extent_buffer(right, right->map_token, KM_USER1); 2854 right->map_token = NULL; 2855 } 2856 2857 btrfs_set_header_nritems(l, mid); 2858 ret = 0; 2859 btrfs_item_key(right, &disk_key, 0); 2860 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2861 path->slots[1] + 1, 1); 2862 if (wret) 2863 ret = wret; 2864 2865 btrfs_mark_buffer_dirty(right); 2866 btrfs_mark_buffer_dirty(l); 2867 BUG_ON(path->slots[0] != slot); 2868 2869 if (mid <= slot) { 2870 btrfs_tree_unlock(path->nodes[0]); 2871 free_extent_buffer(path->nodes[0]); 2872 path->nodes[0] = right; 2873 path->slots[0] -= mid; 2874 path->slots[1] += 1; 2875 } else { 2876 btrfs_tree_unlock(right); 2877 free_extent_buffer(right); 2878 } 2879 2880 BUG_ON(path->slots[0] < 0); 2881 2882 return ret; 2883 } 2884 2885 /* 2886 * double splits happen when we need to insert a big item in the middle 2887 * of a leaf. A double split can leave us with 3 mostly empty leaves: 2888 * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] 2889 * A B C 2890 * 2891 * We avoid this by trying to push the items on either side of our target 2892 * into the adjacent leaves. If all goes well we can avoid the double split 2893 * completely. 2894 */ 2895 static noinline int push_for_double_split(struct btrfs_trans_handle *trans, 2896 struct btrfs_root *root, 2897 struct btrfs_path *path, 2898 int data_size) 2899 { 2900 int ret; 2901 int progress = 0; 2902 int slot; 2903 u32 nritems; 2904 2905 slot = path->slots[0]; 2906 2907 /* 2908 * try to push all the items after our slot into the 2909 * right leaf 2910 */ 2911 ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); 2912 if (ret < 0) 2913 return ret; 2914 2915 if (ret == 0) 2916 progress++; 2917 2918 nritems = btrfs_header_nritems(path->nodes[0]); 2919 /* 2920 * our goal is to get our slot at the start or end of a leaf. If 2921 * we've done so we're done 2922 */ 2923 if (path->slots[0] == 0 || path->slots[0] == nritems) 2924 return 0; 2925 2926 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) 2927 return 0; 2928 2929 /* try to push all the items before our slot into the next leaf */ 2930 slot = path->slots[0]; 2931 ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); 2932 if (ret < 0) 2933 return ret; 2934 2935 if (ret == 0) 2936 progress++; 2937 2938 if (progress) 2939 return 0; 2940 return 1; 2941 } 2942 2943 /* 2944 * split the path's leaf in two, making sure there is at least data_size 2945 * available for the resulting leaf level of the path. 2946 * 2947 * returns 0 if all went well and < 0 on failure. 2948 */ 2949 static noinline int split_leaf(struct btrfs_trans_handle *trans, 2950 struct btrfs_root *root, 2951 struct btrfs_key *ins_key, 2952 struct btrfs_path *path, int data_size, 2953 int extend) 2954 { 2955 struct btrfs_disk_key disk_key; 2956 struct extent_buffer *l; 2957 u32 nritems; 2958 int mid; 2959 int slot; 2960 struct extent_buffer *right; 2961 int ret = 0; 2962 int wret; 2963 int split; 2964 int num_doubles = 0; 2965 int tried_avoid_double = 0; 2966 2967 l = path->nodes[0]; 2968 slot = path->slots[0]; 2969 if (extend && data_size + btrfs_item_size_nr(l, slot) + 2970 sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) 2971 return -EOVERFLOW; 2972 2973 /* first try to make some room by pushing left and right */ 2974 if (data_size) { 2975 wret = push_leaf_right(trans, root, path, data_size, 2976 data_size, 0, 0); 2977 if (wret < 0) 2978 return wret; 2979 if (wret) { 2980 wret = push_leaf_left(trans, root, path, data_size, 2981 data_size, 0, (u32)-1); 2982 if (wret < 0) 2983 return wret; 2984 } 2985 l = path->nodes[0]; 2986 2987 /* did the pushes work? */ 2988 if (btrfs_leaf_free_space(root, l) >= data_size) 2989 return 0; 2990 } 2991 2992 if (!path->nodes[1]) { 2993 ret = insert_new_root(trans, root, path, 1); 2994 if (ret) 2995 return ret; 2996 } 2997 again: 2998 split = 1; 2999 l = path->nodes[0]; 3000 slot = path->slots[0]; 3001 nritems = btrfs_header_nritems(l); 3002 mid = (nritems + 1) / 2; 3003 3004 if (mid <= slot) { 3005 if (nritems == 1 || 3006 leaf_space_used(l, mid, nritems - mid) + data_size > 3007 BTRFS_LEAF_DATA_SIZE(root)) { 3008 if (slot >= nritems) { 3009 split = 0; 3010 } else { 3011 mid = slot; 3012 if (mid != nritems && 3013 leaf_space_used(l, mid, nritems - mid) + 3014 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3015 if (data_size && !tried_avoid_double) 3016 goto push_for_double; 3017 split = 2; 3018 } 3019 } 3020 } 3021 } else { 3022 if (leaf_space_used(l, 0, mid) + data_size > 3023 BTRFS_LEAF_DATA_SIZE(root)) { 3024 if (!extend && data_size && slot == 0) { 3025 split = 0; 3026 } else if ((extend || !data_size) && slot == 0) { 3027 mid = 1; 3028 } else { 3029 mid = slot; 3030 if (mid != nritems && 3031 leaf_space_used(l, mid, nritems - mid) + 3032 data_size > BTRFS_LEAF_DATA_SIZE(root)) { 3033 if (data_size && !tried_avoid_double) 3034 goto push_for_double; 3035 split = 2 ; 3036 } 3037 } 3038 } 3039 } 3040 3041 if (split == 0) 3042 btrfs_cpu_key_to_disk(&disk_key, ins_key); 3043 else 3044 btrfs_item_key(l, &disk_key, mid); 3045 3046 right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, 3047 root->root_key.objectid, 3048 &disk_key, 0, l->start, 0); 3049 if (IS_ERR(right)) 3050 return PTR_ERR(right); 3051 3052 root_add_used(root, root->leafsize); 3053 3054 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 3055 btrfs_set_header_bytenr(right, right->start); 3056 btrfs_set_header_generation(right, trans->transid); 3057 btrfs_set_header_backref_rev(right, BTRFS_MIXED_BACKREF_REV); 3058 btrfs_set_header_owner(right, root->root_key.objectid); 3059 btrfs_set_header_level(right, 0); 3060 write_extent_buffer(right, root->fs_info->fsid, 3061 (unsigned long)btrfs_header_fsid(right), 3062 BTRFS_FSID_SIZE); 3063 3064 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 3065 (unsigned long)btrfs_header_chunk_tree_uuid(right), 3066 BTRFS_UUID_SIZE); 3067 3068 if (split == 0) { 3069 if (mid <= slot) { 3070 btrfs_set_header_nritems(right, 0); 3071 wret = insert_ptr(trans, root, path, 3072 &disk_key, right->start, 3073 path->slots[1] + 1, 1); 3074 if (wret) 3075 ret = wret; 3076 3077 btrfs_tree_unlock(path->nodes[0]); 3078 free_extent_buffer(path->nodes[0]); 3079 path->nodes[0] = right; 3080 path->slots[0] = 0; 3081 path->slots[1] += 1; 3082 } else { 3083 btrfs_set_header_nritems(right, 0); 3084 wret = insert_ptr(trans, root, path, 3085 &disk_key, 3086 right->start, 3087 path->slots[1], 1); 3088 if (wret) 3089 ret = wret; 3090 btrfs_tree_unlock(path->nodes[0]); 3091 free_extent_buffer(path->nodes[0]); 3092 path->nodes[0] = right; 3093 path->slots[0] = 0; 3094 if (path->slots[1] == 0) { 3095 wret = fixup_low_keys(trans, root, 3096 path, &disk_key, 1); 3097 if (wret) 3098 ret = wret; 3099 } 3100 } 3101 btrfs_mark_buffer_dirty(right); 3102 return ret; 3103 } 3104 3105 ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); 3106 BUG_ON(ret); 3107 3108 if (split == 2) { 3109 BUG_ON(num_doubles != 0); 3110 num_doubles++; 3111 goto again; 3112 } 3113 3114 return ret; 3115 3116 push_for_double: 3117 push_for_double_split(trans, root, path, data_size); 3118 tried_avoid_double = 1; 3119 if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) 3120 return 0; 3121 goto again; 3122 } 3123 3124 static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 3125 struct btrfs_root *root, 3126 struct btrfs_path *path, int ins_len) 3127 { 3128 struct btrfs_key key; 3129 struct extent_buffer *leaf; 3130 struct btrfs_file_extent_item *fi; 3131 u64 extent_len = 0; 3132 u32 item_size; 3133 int ret; 3134 3135 leaf = path->nodes[0]; 3136 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3137 3138 BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && 3139 key.type != BTRFS_EXTENT_CSUM_KEY); 3140 3141 if (btrfs_leaf_free_space(root, leaf) >= ins_len) 3142 return 0; 3143 3144 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3145 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3146 fi = btrfs_item_ptr(leaf, path->slots[0], 3147 struct btrfs_file_extent_item); 3148 extent_len = btrfs_file_extent_num_bytes(leaf, fi); 3149 } 3150 btrfs_release_path(root, path); 3151 3152 path->keep_locks = 1; 3153 path->search_for_split = 1; 3154 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 3155 path->search_for_split = 0; 3156 if (ret < 0) 3157 goto err; 3158 3159 ret = -EAGAIN; 3160 leaf = path->nodes[0]; 3161 /* if our item isn't there or got smaller, return now */ 3162 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) 3163 goto err; 3164 3165 /* the leaf has changed, it now has room. return now */ 3166 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) 3167 goto err; 3168 3169 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3170 fi = btrfs_item_ptr(leaf, path->slots[0], 3171 struct btrfs_file_extent_item); 3172 if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) 3173 goto err; 3174 } 3175 3176 btrfs_set_path_blocking(path); 3177 ret = split_leaf(trans, root, &key, path, ins_len, 1); 3178 if (ret) 3179 goto err; 3180 3181 path->keep_locks = 0; 3182 btrfs_unlock_up_safe(path, 1); 3183 return 0; 3184 err: 3185 path->keep_locks = 0; 3186 return ret; 3187 } 3188 3189 static noinline int split_item(struct btrfs_trans_handle *trans, 3190 struct btrfs_root *root, 3191 struct btrfs_path *path, 3192 struct btrfs_key *new_key, 3193 unsigned long split_offset) 3194 { 3195 struct extent_buffer *leaf; 3196 struct btrfs_item *item; 3197 struct btrfs_item *new_item; 3198 int slot; 3199 char *buf; 3200 u32 nritems; 3201 u32 item_size; 3202 u32 orig_offset; 3203 struct btrfs_disk_key disk_key; 3204 3205 leaf = path->nodes[0]; 3206 BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); 3207 3208 btrfs_set_path_blocking(path); 3209 3210 item = btrfs_item_nr(leaf, path->slots[0]); 3211 orig_offset = btrfs_item_offset(leaf, item); 3212 item_size = btrfs_item_size(leaf, item); 3213 3214 buf = kmalloc(item_size, GFP_NOFS); 3215 if (!buf) 3216 return -ENOMEM; 3217 3218 read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 3219 path->slots[0]), item_size); 3220 3221 slot = path->slots[0] + 1; 3222 nritems = btrfs_header_nritems(leaf); 3223 if (slot != nritems) { 3224 /* shift the items */ 3225 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1), 3226 btrfs_item_nr_offset(slot), 3227 (nritems - slot) * sizeof(struct btrfs_item)); 3228 } 3229 3230 btrfs_cpu_key_to_disk(&disk_key, new_key); 3231 btrfs_set_item_key(leaf, &disk_key, slot); 3232 3233 new_item = btrfs_item_nr(leaf, slot); 3234 3235 btrfs_set_item_offset(leaf, new_item, orig_offset); 3236 btrfs_set_item_size(leaf, new_item, item_size - split_offset); 3237 3238 btrfs_set_item_offset(leaf, item, 3239 orig_offset + item_size - split_offset); 3240 btrfs_set_item_size(leaf, item, split_offset); 3241 3242 btrfs_set_header_nritems(leaf, nritems + 1); 3243 3244 /* write the data for the start of the original item */ 3245 write_extent_buffer(leaf, buf, 3246 btrfs_item_ptr_offset(leaf, path->slots[0]), 3247 split_offset); 3248 3249 /* write the data for the new item */ 3250 write_extent_buffer(leaf, buf + split_offset, 3251 btrfs_item_ptr_offset(leaf, slot), 3252 item_size - split_offset); 3253 btrfs_mark_buffer_dirty(leaf); 3254 3255 BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); 3256 kfree(buf); 3257 return 0; 3258 } 3259 3260 /* 3261 * This function splits a single item into two items, 3262 * giving 'new_key' to the new item and splitting the 3263 * old one at split_offset (from the start of the item). 3264 * 3265 * The path may be released by this operation. After 3266 * the split, the path is pointing to the old item. The 3267 * new item is going to be in the same node as the old one. 3268 * 3269 * Note, the item being split must be smaller enough to live alone on 3270 * a tree block with room for one extra struct btrfs_item 3271 * 3272 * This allows us to split the item in place, keeping a lock on the 3273 * leaf the entire time. 3274 */ 3275 int btrfs_split_item(struct btrfs_trans_handle *trans, 3276 struct btrfs_root *root, 3277 struct btrfs_path *path, 3278 struct btrfs_key *new_key, 3279 unsigned long split_offset) 3280 { 3281 int ret; 3282 ret = setup_leaf_for_split(trans, root, path, 3283 sizeof(struct btrfs_item)); 3284 if (ret) 3285 return ret; 3286 3287 ret = split_item(trans, root, path, new_key, split_offset); 3288 return ret; 3289 } 3290 3291 /* 3292 * This function duplicate a item, giving 'new_key' to the new item. 3293 * It guarantees both items live in the same tree leaf and the new item 3294 * is contiguous with the original item. 3295 * 3296 * This allows us to split file extent in place, keeping a lock on the 3297 * leaf the entire time. 3298 */ 3299 int btrfs_duplicate_item(struct btrfs_trans_handle *trans, 3300 struct btrfs_root *root, 3301 struct btrfs_path *path, 3302 struct btrfs_key *new_key) 3303 { 3304 struct extent_buffer *leaf; 3305 int ret; 3306 u32 item_size; 3307 3308 leaf = path->nodes[0]; 3309 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 3310 ret = setup_leaf_for_split(trans, root, path, 3311 item_size + sizeof(struct btrfs_item)); 3312 if (ret) 3313 return ret; 3314 3315 path->slots[0]++; 3316 ret = setup_items_for_insert(trans, root, path, new_key, &item_size, 3317 item_size, item_size + 3318 sizeof(struct btrfs_item), 1); 3319 BUG_ON(ret); 3320 3321 leaf = path->nodes[0]; 3322 memcpy_extent_buffer(leaf, 3323 btrfs_item_ptr_offset(leaf, path->slots[0]), 3324 btrfs_item_ptr_offset(leaf, path->slots[0] - 1), 3325 item_size); 3326 return 0; 3327 } 3328 3329 /* 3330 * make the item pointed to by the path smaller. new_size indicates 3331 * how small to make it, and from_end tells us if we just chop bytes 3332 * off the end of the item or if we shift the item to chop bytes off 3333 * the front. 3334 */ 3335 int btrfs_truncate_item(struct btrfs_trans_handle *trans, 3336 struct btrfs_root *root, 3337 struct btrfs_path *path, 3338 u32 new_size, int from_end) 3339 { 3340 int ret = 0; 3341 int slot; 3342 struct extent_buffer *leaf; 3343 struct btrfs_item *item; 3344 u32 nritems; 3345 unsigned int data_end; 3346 unsigned int old_data_start; 3347 unsigned int old_size; 3348 unsigned int size_diff; 3349 int i; 3350 3351 leaf = path->nodes[0]; 3352 slot = path->slots[0]; 3353 3354 old_size = btrfs_item_size_nr(leaf, slot); 3355 if (old_size == new_size) 3356 return 0; 3357 3358 nritems = btrfs_header_nritems(leaf); 3359 data_end = leaf_data_end(root, leaf); 3360 3361 old_data_start = btrfs_item_offset_nr(leaf, slot); 3362 3363 size_diff = old_size - new_size; 3364 3365 BUG_ON(slot < 0); 3366 BUG_ON(slot >= nritems); 3367 3368 /* 3369 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3370 */ 3371 /* first correct the data pointers */ 3372 for (i = slot; i < nritems; i++) { 3373 u32 ioff; 3374 item = btrfs_item_nr(leaf, i); 3375 3376 if (!leaf->map_token) { 3377 map_extent_buffer(leaf, (unsigned long)item, 3378 sizeof(struct btrfs_item), 3379 &leaf->map_token, &leaf->kaddr, 3380 &leaf->map_start, &leaf->map_len, 3381 KM_USER1); 3382 } 3383 3384 ioff = btrfs_item_offset(leaf, item); 3385 btrfs_set_item_offset(leaf, item, ioff + size_diff); 3386 } 3387 3388 if (leaf->map_token) { 3389 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3390 leaf->map_token = NULL; 3391 } 3392 3393 /* shift the data */ 3394 if (from_end) { 3395 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3396 data_end + size_diff, btrfs_leaf_data(leaf) + 3397 data_end, old_data_start + new_size - data_end); 3398 } else { 3399 struct btrfs_disk_key disk_key; 3400 u64 offset; 3401 3402 btrfs_item_key(leaf, &disk_key, slot); 3403 3404 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 3405 unsigned long ptr; 3406 struct btrfs_file_extent_item *fi; 3407 3408 fi = btrfs_item_ptr(leaf, slot, 3409 struct btrfs_file_extent_item); 3410 fi = (struct btrfs_file_extent_item *)( 3411 (unsigned long)fi - size_diff); 3412 3413 if (btrfs_file_extent_type(leaf, fi) == 3414 BTRFS_FILE_EXTENT_INLINE) { 3415 ptr = btrfs_item_ptr_offset(leaf, slot); 3416 memmove_extent_buffer(leaf, ptr, 3417 (unsigned long)fi, 3418 offsetof(struct btrfs_file_extent_item, 3419 disk_bytenr)); 3420 } 3421 } 3422 3423 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3424 data_end + size_diff, btrfs_leaf_data(leaf) + 3425 data_end, old_data_start - data_end); 3426 3427 offset = btrfs_disk_key_offset(&disk_key); 3428 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 3429 btrfs_set_item_key(leaf, &disk_key, slot); 3430 if (slot == 0) 3431 fixup_low_keys(trans, root, path, &disk_key, 1); 3432 } 3433 3434 item = btrfs_item_nr(leaf, slot); 3435 btrfs_set_item_size(leaf, item, new_size); 3436 btrfs_mark_buffer_dirty(leaf); 3437 3438 ret = 0; 3439 if (btrfs_leaf_free_space(root, leaf) < 0) { 3440 btrfs_print_leaf(root, leaf); 3441 BUG(); 3442 } 3443 return ret; 3444 } 3445 3446 /* 3447 * make the item pointed to by the path bigger, data_size is the new size. 3448 */ 3449 int btrfs_extend_item(struct btrfs_trans_handle *trans, 3450 struct btrfs_root *root, struct btrfs_path *path, 3451 u32 data_size) 3452 { 3453 int ret = 0; 3454 int slot; 3455 struct extent_buffer *leaf; 3456 struct btrfs_item *item; 3457 u32 nritems; 3458 unsigned int data_end; 3459 unsigned int old_data; 3460 unsigned int old_size; 3461 int i; 3462 3463 leaf = path->nodes[0]; 3464 3465 nritems = btrfs_header_nritems(leaf); 3466 data_end = leaf_data_end(root, leaf); 3467 3468 if (btrfs_leaf_free_space(root, leaf) < data_size) { 3469 btrfs_print_leaf(root, leaf); 3470 BUG(); 3471 } 3472 slot = path->slots[0]; 3473 old_data = btrfs_item_end_nr(leaf, slot); 3474 3475 BUG_ON(slot < 0); 3476 if (slot >= nritems) { 3477 btrfs_print_leaf(root, leaf); 3478 printk(KERN_CRIT "slot %d too large, nritems %d\n", 3479 slot, nritems); 3480 BUG_ON(1); 3481 } 3482 3483 /* 3484 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3485 */ 3486 /* first correct the data pointers */ 3487 for (i = slot; i < nritems; i++) { 3488 u32 ioff; 3489 item = btrfs_item_nr(leaf, i); 3490 3491 if (!leaf->map_token) { 3492 map_extent_buffer(leaf, (unsigned long)item, 3493 sizeof(struct btrfs_item), 3494 &leaf->map_token, &leaf->kaddr, 3495 &leaf->map_start, &leaf->map_len, 3496 KM_USER1); 3497 } 3498 ioff = btrfs_item_offset(leaf, item); 3499 btrfs_set_item_offset(leaf, item, ioff - data_size); 3500 } 3501 3502 if (leaf->map_token) { 3503 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3504 leaf->map_token = NULL; 3505 } 3506 3507 /* shift the data */ 3508 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3509 data_end - data_size, btrfs_leaf_data(leaf) + 3510 data_end, old_data - data_end); 3511 3512 data_end = old_data; 3513 old_size = btrfs_item_size_nr(leaf, slot); 3514 item = btrfs_item_nr(leaf, slot); 3515 btrfs_set_item_size(leaf, item, old_size + data_size); 3516 btrfs_mark_buffer_dirty(leaf); 3517 3518 ret = 0; 3519 if (btrfs_leaf_free_space(root, leaf) < 0) { 3520 btrfs_print_leaf(root, leaf); 3521 BUG(); 3522 } 3523 return ret; 3524 } 3525 3526 /* 3527 * Given a key and some data, insert items into the tree. 3528 * This does all the path init required, making room in the tree if needed. 3529 * Returns the number of keys that were inserted. 3530 */ 3531 int btrfs_insert_some_items(struct btrfs_trans_handle *trans, 3532 struct btrfs_root *root, 3533 struct btrfs_path *path, 3534 struct btrfs_key *cpu_key, u32 *data_size, 3535 int nr) 3536 { 3537 struct extent_buffer *leaf; 3538 struct btrfs_item *item; 3539 int ret = 0; 3540 int slot; 3541 int i; 3542 u32 nritems; 3543 u32 total_data = 0; 3544 u32 total_size = 0; 3545 unsigned int data_end; 3546 struct btrfs_disk_key disk_key; 3547 struct btrfs_key found_key; 3548 3549 for (i = 0; i < nr; i++) { 3550 if (total_size + data_size[i] + sizeof(struct btrfs_item) > 3551 BTRFS_LEAF_DATA_SIZE(root)) { 3552 break; 3553 nr = i; 3554 } 3555 total_data += data_size[i]; 3556 total_size += data_size[i] + sizeof(struct btrfs_item); 3557 } 3558 BUG_ON(nr == 0); 3559 3560 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3561 if (ret == 0) 3562 return -EEXIST; 3563 if (ret < 0) 3564 goto out; 3565 3566 leaf = path->nodes[0]; 3567 3568 nritems = btrfs_header_nritems(leaf); 3569 data_end = leaf_data_end(root, leaf); 3570 3571 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3572 for (i = nr; i >= 0; i--) { 3573 total_data -= data_size[i]; 3574 total_size -= data_size[i] + sizeof(struct btrfs_item); 3575 if (total_size < btrfs_leaf_free_space(root, leaf)) 3576 break; 3577 } 3578 nr = i; 3579 } 3580 3581 slot = path->slots[0]; 3582 BUG_ON(slot < 0); 3583 3584 if (slot != nritems) { 3585 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3586 3587 item = btrfs_item_nr(leaf, slot); 3588 btrfs_item_key_to_cpu(leaf, &found_key, slot); 3589 3590 /* figure out how many keys we can insert in here */ 3591 total_data = data_size[0]; 3592 for (i = 1; i < nr; i++) { 3593 if (btrfs_comp_cpu_keys(&found_key, cpu_key + i) <= 0) 3594 break; 3595 total_data += data_size[i]; 3596 } 3597 nr = i; 3598 3599 if (old_data < data_end) { 3600 btrfs_print_leaf(root, leaf); 3601 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3602 slot, old_data, data_end); 3603 BUG_ON(1); 3604 } 3605 /* 3606 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3607 */ 3608 /* first correct the data pointers */ 3609 WARN_ON(leaf->map_token); 3610 for (i = slot; i < nritems; i++) { 3611 u32 ioff; 3612 3613 item = btrfs_item_nr(leaf, i); 3614 if (!leaf->map_token) { 3615 map_extent_buffer(leaf, (unsigned long)item, 3616 sizeof(struct btrfs_item), 3617 &leaf->map_token, &leaf->kaddr, 3618 &leaf->map_start, &leaf->map_len, 3619 KM_USER1); 3620 } 3621 3622 ioff = btrfs_item_offset(leaf, item); 3623 btrfs_set_item_offset(leaf, item, ioff - total_data); 3624 } 3625 if (leaf->map_token) { 3626 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3627 leaf->map_token = NULL; 3628 } 3629 3630 /* shift the items */ 3631 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3632 btrfs_item_nr_offset(slot), 3633 (nritems - slot) * sizeof(struct btrfs_item)); 3634 3635 /* shift the data */ 3636 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3637 data_end - total_data, btrfs_leaf_data(leaf) + 3638 data_end, old_data - data_end); 3639 data_end = old_data; 3640 } else { 3641 /* 3642 * this sucks but it has to be done, if we are inserting at 3643 * the end of the leaf only insert 1 of the items, since we 3644 * have no way of knowing whats on the next leaf and we'd have 3645 * to drop our current locks to figure it out 3646 */ 3647 nr = 1; 3648 } 3649 3650 /* setup the item for the new data */ 3651 for (i = 0; i < nr; i++) { 3652 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3653 btrfs_set_item_key(leaf, &disk_key, slot + i); 3654 item = btrfs_item_nr(leaf, slot + i); 3655 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3656 data_end -= data_size[i]; 3657 btrfs_set_item_size(leaf, item, data_size[i]); 3658 } 3659 btrfs_set_header_nritems(leaf, nritems + nr); 3660 btrfs_mark_buffer_dirty(leaf); 3661 3662 ret = 0; 3663 if (slot == 0) { 3664 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3665 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3666 } 3667 3668 if (btrfs_leaf_free_space(root, leaf) < 0) { 3669 btrfs_print_leaf(root, leaf); 3670 BUG(); 3671 } 3672 out: 3673 if (!ret) 3674 ret = nr; 3675 return ret; 3676 } 3677 3678 /* 3679 * this is a helper for btrfs_insert_empty_items, the main goal here is 3680 * to save stack depth by doing the bulk of the work in a function 3681 * that doesn't call btrfs_search_slot 3682 */ 3683 static noinline_for_stack int 3684 setup_items_for_insert(struct btrfs_trans_handle *trans, 3685 struct btrfs_root *root, struct btrfs_path *path, 3686 struct btrfs_key *cpu_key, u32 *data_size, 3687 u32 total_data, u32 total_size, int nr) 3688 { 3689 struct btrfs_item *item; 3690 int i; 3691 u32 nritems; 3692 unsigned int data_end; 3693 struct btrfs_disk_key disk_key; 3694 int ret; 3695 struct extent_buffer *leaf; 3696 int slot; 3697 3698 leaf = path->nodes[0]; 3699 slot = path->slots[0]; 3700 3701 nritems = btrfs_header_nritems(leaf); 3702 data_end = leaf_data_end(root, leaf); 3703 3704 if (btrfs_leaf_free_space(root, leaf) < total_size) { 3705 btrfs_print_leaf(root, leaf); 3706 printk(KERN_CRIT "not enough freespace need %u have %d\n", 3707 total_size, btrfs_leaf_free_space(root, leaf)); 3708 BUG(); 3709 } 3710 3711 if (slot != nritems) { 3712 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 3713 3714 if (old_data < data_end) { 3715 btrfs_print_leaf(root, leaf); 3716 printk(KERN_CRIT "slot %d old_data %d data_end %d\n", 3717 slot, old_data, data_end); 3718 BUG_ON(1); 3719 } 3720 /* 3721 * item0..itemN ... dataN.offset..dataN.size .. data0.size 3722 */ 3723 /* first correct the data pointers */ 3724 WARN_ON(leaf->map_token); 3725 for (i = slot; i < nritems; i++) { 3726 u32 ioff; 3727 3728 item = btrfs_item_nr(leaf, i); 3729 if (!leaf->map_token) { 3730 map_extent_buffer(leaf, (unsigned long)item, 3731 sizeof(struct btrfs_item), 3732 &leaf->map_token, &leaf->kaddr, 3733 &leaf->map_start, &leaf->map_len, 3734 KM_USER1); 3735 } 3736 3737 ioff = btrfs_item_offset(leaf, item); 3738 btrfs_set_item_offset(leaf, item, ioff - total_data); 3739 } 3740 if (leaf->map_token) { 3741 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3742 leaf->map_token = NULL; 3743 } 3744 3745 /* shift the items */ 3746 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 3747 btrfs_item_nr_offset(slot), 3748 (nritems - slot) * sizeof(struct btrfs_item)); 3749 3750 /* shift the data */ 3751 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3752 data_end - total_data, btrfs_leaf_data(leaf) + 3753 data_end, old_data - data_end); 3754 data_end = old_data; 3755 } 3756 3757 /* setup the item for the new data */ 3758 for (i = 0; i < nr; i++) { 3759 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 3760 btrfs_set_item_key(leaf, &disk_key, slot + i); 3761 item = btrfs_item_nr(leaf, slot + i); 3762 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 3763 data_end -= data_size[i]; 3764 btrfs_set_item_size(leaf, item, data_size[i]); 3765 } 3766 3767 btrfs_set_header_nritems(leaf, nritems + nr); 3768 3769 ret = 0; 3770 if (slot == 0) { 3771 struct btrfs_disk_key disk_key; 3772 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 3773 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 3774 } 3775 btrfs_unlock_up_safe(path, 1); 3776 btrfs_mark_buffer_dirty(leaf); 3777 3778 if (btrfs_leaf_free_space(root, leaf) < 0) { 3779 btrfs_print_leaf(root, leaf); 3780 BUG(); 3781 } 3782 return ret; 3783 } 3784 3785 /* 3786 * Given a key and some data, insert items into the tree. 3787 * This does all the path init required, making room in the tree if needed. 3788 */ 3789 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 3790 struct btrfs_root *root, 3791 struct btrfs_path *path, 3792 struct btrfs_key *cpu_key, u32 *data_size, 3793 int nr) 3794 { 3795 int ret = 0; 3796 int slot; 3797 int i; 3798 u32 total_size = 0; 3799 u32 total_data = 0; 3800 3801 for (i = 0; i < nr; i++) 3802 total_data += data_size[i]; 3803 3804 total_size = total_data + (nr * sizeof(struct btrfs_item)); 3805 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 3806 if (ret == 0) 3807 return -EEXIST; 3808 if (ret < 0) 3809 goto out; 3810 3811 slot = path->slots[0]; 3812 BUG_ON(slot < 0); 3813 3814 ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, 3815 total_data, total_size, nr); 3816 3817 out: 3818 return ret; 3819 } 3820 3821 /* 3822 * Given a key and some data, insert an item into the tree. 3823 * This does all the path init required, making room in the tree if needed. 3824 */ 3825 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 3826 *root, struct btrfs_key *cpu_key, void *data, u32 3827 data_size) 3828 { 3829 int ret = 0; 3830 struct btrfs_path *path; 3831 struct extent_buffer *leaf; 3832 unsigned long ptr; 3833 3834 path = btrfs_alloc_path(); 3835 BUG_ON(!path); 3836 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 3837 if (!ret) { 3838 leaf = path->nodes[0]; 3839 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 3840 write_extent_buffer(leaf, data, ptr, data_size); 3841 btrfs_mark_buffer_dirty(leaf); 3842 } 3843 btrfs_free_path(path); 3844 return ret; 3845 } 3846 3847 /* 3848 * delete the pointer from a given node. 3849 * 3850 * the tree should have been previously balanced so the deletion does not 3851 * empty a node. 3852 */ 3853 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3854 struct btrfs_path *path, int level, int slot) 3855 { 3856 struct extent_buffer *parent = path->nodes[level]; 3857 u32 nritems; 3858 int ret = 0; 3859 int wret; 3860 3861 nritems = btrfs_header_nritems(parent); 3862 if (slot != nritems - 1) { 3863 memmove_extent_buffer(parent, 3864 btrfs_node_key_ptr_offset(slot), 3865 btrfs_node_key_ptr_offset(slot + 1), 3866 sizeof(struct btrfs_key_ptr) * 3867 (nritems - slot - 1)); 3868 } 3869 nritems--; 3870 btrfs_set_header_nritems(parent, nritems); 3871 if (nritems == 0 && parent == root->node) { 3872 BUG_ON(btrfs_header_level(root->node) != 1); 3873 /* just turn the root into a leaf and break */ 3874 btrfs_set_header_level(root->node, 0); 3875 } else if (slot == 0) { 3876 struct btrfs_disk_key disk_key; 3877 3878 btrfs_node_key(parent, &disk_key, 0); 3879 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 3880 if (wret) 3881 ret = wret; 3882 } 3883 btrfs_mark_buffer_dirty(parent); 3884 return ret; 3885 } 3886 3887 /* 3888 * a helper function to delete the leaf pointed to by path->slots[1] and 3889 * path->nodes[1]. 3890 * 3891 * This deletes the pointer in path->nodes[1] and frees the leaf 3892 * block extent. zero is returned if it all worked out, < 0 otherwise. 3893 * 3894 * The path must have already been setup for deleting the leaf, including 3895 * all the proper balancing. path->nodes[1] must be locked. 3896 */ 3897 static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 3898 struct btrfs_root *root, 3899 struct btrfs_path *path, 3900 struct extent_buffer *leaf) 3901 { 3902 int ret; 3903 3904 WARN_ON(btrfs_header_generation(leaf) != trans->transid); 3905 ret = del_ptr(trans, root, path, 1, path->slots[1]); 3906 if (ret) 3907 return ret; 3908 3909 /* 3910 * btrfs_free_extent is expensive, we want to make sure we 3911 * aren't holding any locks when we call it 3912 */ 3913 btrfs_unlock_up_safe(path, 0); 3914 3915 root_sub_used(root, leaf->len); 3916 3917 btrfs_free_tree_block(trans, root, leaf, 0, 1); 3918 return 0; 3919 } 3920 /* 3921 * delete the item at the leaf level in path. If that empties 3922 * the leaf, remove it from the tree 3923 */ 3924 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3925 struct btrfs_path *path, int slot, int nr) 3926 { 3927 struct extent_buffer *leaf; 3928 struct btrfs_item *item; 3929 int last_off; 3930 int dsize = 0; 3931 int ret = 0; 3932 int wret; 3933 int i; 3934 u32 nritems; 3935 3936 leaf = path->nodes[0]; 3937 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 3938 3939 for (i = 0; i < nr; i++) 3940 dsize += btrfs_item_size_nr(leaf, slot + i); 3941 3942 nritems = btrfs_header_nritems(leaf); 3943 3944 if (slot + nr != nritems) { 3945 int data_end = leaf_data_end(root, leaf); 3946 3947 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 3948 data_end + dsize, 3949 btrfs_leaf_data(leaf) + data_end, 3950 last_off - data_end); 3951 3952 for (i = slot + nr; i < nritems; i++) { 3953 u32 ioff; 3954 3955 item = btrfs_item_nr(leaf, i); 3956 if (!leaf->map_token) { 3957 map_extent_buffer(leaf, (unsigned long)item, 3958 sizeof(struct btrfs_item), 3959 &leaf->map_token, &leaf->kaddr, 3960 &leaf->map_start, &leaf->map_len, 3961 KM_USER1); 3962 } 3963 ioff = btrfs_item_offset(leaf, item); 3964 btrfs_set_item_offset(leaf, item, ioff + dsize); 3965 } 3966 3967 if (leaf->map_token) { 3968 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 3969 leaf->map_token = NULL; 3970 } 3971 3972 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 3973 btrfs_item_nr_offset(slot + nr), 3974 sizeof(struct btrfs_item) * 3975 (nritems - slot - nr)); 3976 } 3977 btrfs_set_header_nritems(leaf, nritems - nr); 3978 nritems -= nr; 3979 3980 /* delete the leaf if we've emptied it */ 3981 if (nritems == 0) { 3982 if (leaf == root->node) { 3983 btrfs_set_header_level(leaf, 0); 3984 } else { 3985 btrfs_set_path_blocking(path); 3986 clean_tree_block(trans, root, leaf); 3987 ret = btrfs_del_leaf(trans, root, path, leaf); 3988 BUG_ON(ret); 3989 } 3990 } else { 3991 int used = leaf_space_used(leaf, 0, nritems); 3992 if (slot == 0) { 3993 struct btrfs_disk_key disk_key; 3994 3995 btrfs_item_key(leaf, &disk_key, 0); 3996 wret = fixup_low_keys(trans, root, path, 3997 &disk_key, 1); 3998 if (wret) 3999 ret = wret; 4000 } 4001 4002 /* delete the leaf if it is mostly empty */ 4003 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { 4004 /* push_leaf_left fixes the path. 4005 * make sure the path still points to our leaf 4006 * for possible call to del_ptr below 4007 */ 4008 slot = path->slots[1]; 4009 extent_buffer_get(leaf); 4010 4011 btrfs_set_path_blocking(path); 4012 wret = push_leaf_left(trans, root, path, 1, 1, 4013 1, (u32)-1); 4014 if (wret < 0 && wret != -ENOSPC) 4015 ret = wret; 4016 4017 if (path->nodes[0] == leaf && 4018 btrfs_header_nritems(leaf)) { 4019 wret = push_leaf_right(trans, root, path, 1, 4020 1, 1, 0); 4021 if (wret < 0 && wret != -ENOSPC) 4022 ret = wret; 4023 } 4024 4025 if (btrfs_header_nritems(leaf) == 0) { 4026 path->slots[1] = slot; 4027 ret = btrfs_del_leaf(trans, root, path, leaf); 4028 BUG_ON(ret); 4029 free_extent_buffer(leaf); 4030 } else { 4031 /* if we're still in the path, make sure 4032 * we're dirty. Otherwise, one of the 4033 * push_leaf functions must have already 4034 * dirtied this buffer 4035 */ 4036 if (path->nodes[0] == leaf) 4037 btrfs_mark_buffer_dirty(leaf); 4038 free_extent_buffer(leaf); 4039 } 4040 } else { 4041 btrfs_mark_buffer_dirty(leaf); 4042 } 4043 } 4044 return ret; 4045 } 4046 4047 /* 4048 * search the tree again to find a leaf with lesser keys 4049 * returns 0 if it found something or 1 if there are no lesser leaves. 4050 * returns < 0 on io errors. 4051 * 4052 * This may release the path, and so you may lose any locks held at the 4053 * time you call it. 4054 */ 4055 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 4056 { 4057 struct btrfs_key key; 4058 struct btrfs_disk_key found_key; 4059 int ret; 4060 4061 btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 4062 4063 if (key.offset > 0) 4064 key.offset--; 4065 else if (key.type > 0) 4066 key.type--; 4067 else if (key.objectid > 0) 4068 key.objectid--; 4069 else 4070 return 1; 4071 4072 btrfs_release_path(root, path); 4073 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4074 if (ret < 0) 4075 return ret; 4076 btrfs_item_key(path->nodes[0], &found_key, 0); 4077 ret = comp_keys(&found_key, &key); 4078 if (ret < 0) 4079 return 0; 4080 return 1; 4081 } 4082 4083 /* 4084 * A helper function to walk down the tree starting at min_key, and looking 4085 * for nodes or leaves that are either in cache or have a minimum 4086 * transaction id. This is used by the btree defrag code, and tree logging 4087 * 4088 * This does not cow, but it does stuff the starting key it finds back 4089 * into min_key, so you can call btrfs_search_slot with cow=1 on the 4090 * key and get a writable path. 4091 * 4092 * This does lock as it descends, and path->keep_locks should be set 4093 * to 1 by the caller. 4094 * 4095 * This honors path->lowest_level to prevent descent past a given level 4096 * of the tree. 4097 * 4098 * min_trans indicates the oldest transaction that you are interested 4099 * in walking through. Any nodes or leaves older than min_trans are 4100 * skipped over (without reading them). 4101 * 4102 * returns zero if something useful was found, < 0 on error and 1 if there 4103 * was nothing in the tree that matched the search criteria. 4104 */ 4105 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 4106 struct btrfs_key *max_key, 4107 struct btrfs_path *path, int cache_only, 4108 u64 min_trans) 4109 { 4110 struct extent_buffer *cur; 4111 struct btrfs_key found_key; 4112 int slot; 4113 int sret; 4114 u32 nritems; 4115 int level; 4116 int ret = 1; 4117 4118 WARN_ON(!path->keep_locks); 4119 again: 4120 cur = btrfs_lock_root_node(root); 4121 level = btrfs_header_level(cur); 4122 WARN_ON(path->nodes[level]); 4123 path->nodes[level] = cur; 4124 path->locks[level] = 1; 4125 4126 if (btrfs_header_generation(cur) < min_trans) { 4127 ret = 1; 4128 goto out; 4129 } 4130 while (1) { 4131 nritems = btrfs_header_nritems(cur); 4132 level = btrfs_header_level(cur); 4133 sret = bin_search(cur, min_key, level, &slot); 4134 4135 /* at the lowest level, we're done, setup the path and exit */ 4136 if (level == path->lowest_level) { 4137 if (slot >= nritems) 4138 goto find_next_key; 4139 ret = 0; 4140 path->slots[level] = slot; 4141 btrfs_item_key_to_cpu(cur, &found_key, slot); 4142 goto out; 4143 } 4144 if (sret && slot > 0) 4145 slot--; 4146 /* 4147 * check this node pointer against the cache_only and 4148 * min_trans parameters. If it isn't in cache or is too 4149 * old, skip to the next one. 4150 */ 4151 while (slot < nritems) { 4152 u64 blockptr; 4153 u64 gen; 4154 struct extent_buffer *tmp; 4155 struct btrfs_disk_key disk_key; 4156 4157 blockptr = btrfs_node_blockptr(cur, slot); 4158 gen = btrfs_node_ptr_generation(cur, slot); 4159 if (gen < min_trans) { 4160 slot++; 4161 continue; 4162 } 4163 if (!cache_only) 4164 break; 4165 4166 if (max_key) { 4167 btrfs_node_key(cur, &disk_key, slot); 4168 if (comp_keys(&disk_key, max_key) >= 0) { 4169 ret = 1; 4170 goto out; 4171 } 4172 } 4173 4174 tmp = btrfs_find_tree_block(root, blockptr, 4175 btrfs_level_size(root, level - 1)); 4176 4177 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 4178 free_extent_buffer(tmp); 4179 break; 4180 } 4181 if (tmp) 4182 free_extent_buffer(tmp); 4183 slot++; 4184 } 4185 find_next_key: 4186 /* 4187 * we didn't find a candidate key in this node, walk forward 4188 * and find another one 4189 */ 4190 if (slot >= nritems) { 4191 path->slots[level] = slot; 4192 btrfs_set_path_blocking(path); 4193 sret = btrfs_find_next_key(root, path, min_key, level, 4194 cache_only, min_trans); 4195 if (sret == 0) { 4196 btrfs_release_path(root, path); 4197 goto again; 4198 } else { 4199 goto out; 4200 } 4201 } 4202 /* save our key for returning back */ 4203 btrfs_node_key_to_cpu(cur, &found_key, slot); 4204 path->slots[level] = slot; 4205 if (level == path->lowest_level) { 4206 ret = 0; 4207 unlock_up(path, level, 1); 4208 goto out; 4209 } 4210 btrfs_set_path_blocking(path); 4211 cur = read_node_slot(root, cur, slot); 4212 4213 btrfs_tree_lock(cur); 4214 4215 path->locks[level - 1] = 1; 4216 path->nodes[level - 1] = cur; 4217 unlock_up(path, level, 1); 4218 btrfs_clear_path_blocking(path, NULL); 4219 } 4220 out: 4221 if (ret == 0) 4222 memcpy(min_key, &found_key, sizeof(found_key)); 4223 btrfs_set_path_blocking(path); 4224 return ret; 4225 } 4226 4227 /* 4228 * this is similar to btrfs_next_leaf, but does not try to preserve 4229 * and fixup the path. It looks for and returns the next key in the 4230 * tree based on the current path and the cache_only and min_trans 4231 * parameters. 4232 * 4233 * 0 is returned if another key is found, < 0 if there are any errors 4234 * and 1 is returned if there are no higher keys in the tree 4235 * 4236 * path->keep_locks should be set to 1 on the search made before 4237 * calling this function. 4238 */ 4239 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 4240 struct btrfs_key *key, int level, 4241 int cache_only, u64 min_trans) 4242 { 4243 int slot; 4244 struct extent_buffer *c; 4245 4246 WARN_ON(!path->keep_locks); 4247 while (level < BTRFS_MAX_LEVEL) { 4248 if (!path->nodes[level]) 4249 return 1; 4250 4251 slot = path->slots[level] + 1; 4252 c = path->nodes[level]; 4253 next: 4254 if (slot >= btrfs_header_nritems(c)) { 4255 int ret; 4256 int orig_lowest; 4257 struct btrfs_key cur_key; 4258 if (level + 1 >= BTRFS_MAX_LEVEL || 4259 !path->nodes[level + 1]) 4260 return 1; 4261 4262 if (path->locks[level + 1]) { 4263 level++; 4264 continue; 4265 } 4266 4267 slot = btrfs_header_nritems(c) - 1; 4268 if (level == 0) 4269 btrfs_item_key_to_cpu(c, &cur_key, slot); 4270 else 4271 btrfs_node_key_to_cpu(c, &cur_key, slot); 4272 4273 orig_lowest = path->lowest_level; 4274 btrfs_release_path(root, path); 4275 path->lowest_level = level; 4276 ret = btrfs_search_slot(NULL, root, &cur_key, path, 4277 0, 0); 4278 path->lowest_level = orig_lowest; 4279 if (ret < 0) 4280 return ret; 4281 4282 c = path->nodes[level]; 4283 slot = path->slots[level]; 4284 if (ret == 0) 4285 slot++; 4286 goto next; 4287 } 4288 4289 if (level == 0) 4290 btrfs_item_key_to_cpu(c, key, slot); 4291 else { 4292 u64 blockptr = btrfs_node_blockptr(c, slot); 4293 u64 gen = btrfs_node_ptr_generation(c, slot); 4294 4295 if (cache_only) { 4296 struct extent_buffer *cur; 4297 cur = btrfs_find_tree_block(root, blockptr, 4298 btrfs_level_size(root, level - 1)); 4299 if (!cur || !btrfs_buffer_uptodate(cur, gen)) { 4300 slot++; 4301 if (cur) 4302 free_extent_buffer(cur); 4303 goto next; 4304 } 4305 free_extent_buffer(cur); 4306 } 4307 if (gen < min_trans) { 4308 slot++; 4309 goto next; 4310 } 4311 btrfs_node_key_to_cpu(c, key, slot); 4312 } 4313 return 0; 4314 } 4315 return 1; 4316 } 4317 4318 /* 4319 * search the tree again to find a leaf with greater keys 4320 * returns 0 if it found something or 1 if there are no greater leaves. 4321 * returns < 0 on io errors. 4322 */ 4323 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 4324 { 4325 int slot; 4326 int level; 4327 struct extent_buffer *c; 4328 struct extent_buffer *next; 4329 struct btrfs_key key; 4330 u32 nritems; 4331 int ret; 4332 int old_spinning = path->leave_spinning; 4333 int force_blocking = 0; 4334 4335 nritems = btrfs_header_nritems(path->nodes[0]); 4336 if (nritems == 0) 4337 return 1; 4338 4339 /* 4340 * we take the blocks in an order that upsets lockdep. Using 4341 * blocking mode is the only way around it. 4342 */ 4343 #ifdef CONFIG_DEBUG_LOCK_ALLOC 4344 force_blocking = 1; 4345 #endif 4346 4347 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 4348 again: 4349 level = 1; 4350 next = NULL; 4351 btrfs_release_path(root, path); 4352 4353 path->keep_locks = 1; 4354 4355 if (!force_blocking) 4356 path->leave_spinning = 1; 4357 4358 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4359 path->keep_locks = 0; 4360 4361 if (ret < 0) 4362 return ret; 4363 4364 nritems = btrfs_header_nritems(path->nodes[0]); 4365 /* 4366 * by releasing the path above we dropped all our locks. A balance 4367 * could have added more items next to the key that used to be 4368 * at the very end of the block. So, check again here and 4369 * advance the path if there are now more items available. 4370 */ 4371 if (nritems > 0 && path->slots[0] < nritems - 1) { 4372 if (ret == 0) 4373 path->slots[0]++; 4374 ret = 0; 4375 goto done; 4376 } 4377 4378 while (level < BTRFS_MAX_LEVEL) { 4379 if (!path->nodes[level]) { 4380 ret = 1; 4381 goto done; 4382 } 4383 4384 slot = path->slots[level] + 1; 4385 c = path->nodes[level]; 4386 if (slot >= btrfs_header_nritems(c)) { 4387 level++; 4388 if (level == BTRFS_MAX_LEVEL) { 4389 ret = 1; 4390 goto done; 4391 } 4392 continue; 4393 } 4394 4395 if (next) { 4396 btrfs_tree_unlock(next); 4397 free_extent_buffer(next); 4398 } 4399 4400 next = c; 4401 ret = read_block_for_search(NULL, root, path, &next, level, 4402 slot, &key); 4403 if (ret == -EAGAIN) 4404 goto again; 4405 4406 if (ret < 0) { 4407 btrfs_release_path(root, path); 4408 goto done; 4409 } 4410 4411 if (!path->skip_locking) { 4412 ret = btrfs_try_spin_lock(next); 4413 if (!ret) { 4414 btrfs_set_path_blocking(path); 4415 btrfs_tree_lock(next); 4416 if (!force_blocking) 4417 btrfs_clear_path_blocking(path, next); 4418 } 4419 if (force_blocking) 4420 btrfs_set_lock_blocking(next); 4421 } 4422 break; 4423 } 4424 path->slots[level] = slot; 4425 while (1) { 4426 level--; 4427 c = path->nodes[level]; 4428 if (path->locks[level]) 4429 btrfs_tree_unlock(c); 4430 4431 free_extent_buffer(c); 4432 path->nodes[level] = next; 4433 path->slots[level] = 0; 4434 if (!path->skip_locking) 4435 path->locks[level] = 1; 4436 4437 if (!level) 4438 break; 4439 4440 ret = read_block_for_search(NULL, root, path, &next, level, 4441 0, &key); 4442 if (ret == -EAGAIN) 4443 goto again; 4444 4445 if (ret < 0) { 4446 btrfs_release_path(root, path); 4447 goto done; 4448 } 4449 4450 if (!path->skip_locking) { 4451 btrfs_assert_tree_locked(path->nodes[level]); 4452 ret = btrfs_try_spin_lock(next); 4453 if (!ret) { 4454 btrfs_set_path_blocking(path); 4455 btrfs_tree_lock(next); 4456 if (!force_blocking) 4457 btrfs_clear_path_blocking(path, next); 4458 } 4459 if (force_blocking) 4460 btrfs_set_lock_blocking(next); 4461 } 4462 } 4463 ret = 0; 4464 done: 4465 unlock_up(path, 0, 1); 4466 path->leave_spinning = old_spinning; 4467 if (!old_spinning) 4468 btrfs_set_path_blocking(path); 4469 4470 return ret; 4471 } 4472 4473 /* 4474 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 4475 * searching until it gets past min_objectid or finds an item of 'type' 4476 * 4477 * returns 0 if something is found, 1 if nothing was found and < 0 on error 4478 */ 4479 int btrfs_previous_item(struct btrfs_root *root, 4480 struct btrfs_path *path, u64 min_objectid, 4481 int type) 4482 { 4483 struct btrfs_key found_key; 4484 struct extent_buffer *leaf; 4485 u32 nritems; 4486 int ret; 4487 4488 while (1) { 4489 if (path->slots[0] == 0) { 4490 btrfs_set_path_blocking(path); 4491 ret = btrfs_prev_leaf(root, path); 4492 if (ret != 0) 4493 return ret; 4494 } else { 4495 path->slots[0]--; 4496 } 4497 leaf = path->nodes[0]; 4498 nritems = btrfs_header_nritems(leaf); 4499 if (nritems == 0) 4500 return 1; 4501 if (path->slots[0] == nritems) 4502 path->slots[0]--; 4503 4504 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4505 if (found_key.objectid < min_objectid) 4506 break; 4507 if (found_key.type == type) 4508 return 0; 4509 if (found_key.objectid == min_objectid && 4510 found_key.type < type) 4511 break; 4512 } 4513 return 1; 4514 } 4515