1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include "ctree.h" 21 #include "disk-io.h" 22 #include "transaction.h" 23 #include "print-tree.h" 24 #include "locking.h" 25 26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 27 *root, struct btrfs_path *path, int level); 28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 29 *root, struct btrfs_key *ins_key, 30 struct btrfs_path *path, int data_size, int extend); 31 static int push_node_left(struct btrfs_trans_handle *trans, 32 struct btrfs_root *root, struct extent_buffer *dst, 33 struct extent_buffer *src, int empty); 34 static int balance_node_right(struct btrfs_trans_handle *trans, 35 struct btrfs_root *root, 36 struct extent_buffer *dst_buf, 37 struct extent_buffer *src_buf); 38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 39 struct btrfs_path *path, int level, int slot); 40 41 inline void btrfs_init_path(struct btrfs_path *p) 42 { 43 memset(p, 0, sizeof(*p)); 44 } 45 46 struct btrfs_path *btrfs_alloc_path(void) 47 { 48 struct btrfs_path *path; 49 path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); 50 if (path) { 51 btrfs_init_path(path); 52 path->reada = 1; 53 } 54 return path; 55 } 56 57 void btrfs_free_path(struct btrfs_path *p) 58 { 59 btrfs_release_path(NULL, p); 60 kmem_cache_free(btrfs_path_cachep, p); 61 } 62 63 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 64 { 65 int i; 66 67 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 68 p->slots[i] = 0; 69 if (!p->nodes[i]) 70 continue; 71 if (p->locks[i]) { 72 btrfs_tree_unlock(p->nodes[i]); 73 p->locks[i] = 0; 74 } 75 free_extent_buffer(p->nodes[i]); 76 p->nodes[i] = NULL; 77 } 78 } 79 80 struct extent_buffer *btrfs_root_node(struct btrfs_root *root) 81 { 82 struct extent_buffer *eb; 83 spin_lock(&root->node_lock); 84 eb = root->node; 85 extent_buffer_get(eb); 86 spin_unlock(&root->node_lock); 87 return eb; 88 } 89 90 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 91 { 92 struct extent_buffer *eb; 93 94 while(1) { 95 eb = btrfs_root_node(root); 96 btrfs_tree_lock(eb); 97 98 spin_lock(&root->node_lock); 99 if (eb == root->node) { 100 spin_unlock(&root->node_lock); 101 break; 102 } 103 spin_unlock(&root->node_lock); 104 105 btrfs_tree_unlock(eb); 106 free_extent_buffer(eb); 107 } 108 return eb; 109 } 110 111 static void add_root_to_dirty_list(struct btrfs_root *root) 112 { 113 if (root->track_dirty && list_empty(&root->dirty_list)) { 114 list_add(&root->dirty_list, 115 &root->fs_info->dirty_cowonly_roots); 116 } 117 } 118 119 int btrfs_copy_root(struct btrfs_trans_handle *trans, 120 struct btrfs_root *root, 121 struct extent_buffer *buf, 122 struct extent_buffer **cow_ret, u64 new_root_objectid) 123 { 124 struct extent_buffer *cow; 125 u32 nritems; 126 int ret = 0; 127 int level; 128 struct btrfs_key first_key; 129 struct btrfs_root *new_root; 130 131 new_root = kmalloc(sizeof(*new_root), GFP_NOFS); 132 if (!new_root) 133 return -ENOMEM; 134 135 memcpy(new_root, root, sizeof(*new_root)); 136 new_root->root_key.objectid = new_root_objectid; 137 138 WARN_ON(root->ref_cows && trans->transid != 139 root->fs_info->running_transaction->transid); 140 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 141 142 level = btrfs_header_level(buf); 143 nritems = btrfs_header_nritems(buf); 144 if (nritems) { 145 if (level == 0) 146 btrfs_item_key_to_cpu(buf, &first_key, 0); 147 else 148 btrfs_node_key_to_cpu(buf, &first_key, 0); 149 } else { 150 first_key.objectid = 0; 151 } 152 cow = btrfs_alloc_free_block(trans, new_root, buf->len, 153 new_root_objectid, 154 trans->transid, first_key.objectid, 155 level, buf->start, 0); 156 if (IS_ERR(cow)) { 157 kfree(new_root); 158 return PTR_ERR(cow); 159 } 160 161 copy_extent_buffer(cow, buf, 0, 0, cow->len); 162 btrfs_set_header_bytenr(cow, cow->start); 163 btrfs_set_header_generation(cow, trans->transid); 164 btrfs_set_header_owner(cow, new_root_objectid); 165 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 166 167 WARN_ON(btrfs_header_generation(buf) > trans->transid); 168 ret = btrfs_inc_ref(trans, new_root, buf); 169 kfree(new_root); 170 171 if (ret) 172 return ret; 173 174 btrfs_mark_buffer_dirty(cow); 175 *cow_ret = cow; 176 return 0; 177 } 178 179 int __btrfs_cow_block(struct btrfs_trans_handle *trans, 180 struct btrfs_root *root, 181 struct extent_buffer *buf, 182 struct extent_buffer *parent, int parent_slot, 183 struct extent_buffer **cow_ret, 184 u64 search_start, u64 empty_size) 185 { 186 u64 root_gen; 187 struct extent_buffer *cow; 188 u32 nritems; 189 int ret = 0; 190 int different_trans = 0; 191 int level; 192 int unlock_orig = 0; 193 struct btrfs_key first_key; 194 195 if (*cow_ret == buf) 196 unlock_orig = 1; 197 198 WARN_ON(!btrfs_tree_locked(buf)); 199 200 if (root->ref_cows) { 201 root_gen = trans->transid; 202 } else { 203 root_gen = 0; 204 } 205 WARN_ON(root->ref_cows && trans->transid != 206 root->fs_info->running_transaction->transid); 207 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 208 209 level = btrfs_header_level(buf); 210 nritems = btrfs_header_nritems(buf); 211 if (nritems) { 212 if (level == 0) 213 btrfs_item_key_to_cpu(buf, &first_key, 0); 214 else 215 btrfs_node_key_to_cpu(buf, &first_key, 0); 216 } else { 217 first_key.objectid = 0; 218 } 219 cow = btrfs_alloc_free_block(trans, root, buf->len, 220 root->root_key.objectid, 221 root_gen, first_key.objectid, level, 222 search_start, empty_size); 223 if (IS_ERR(cow)) 224 return PTR_ERR(cow); 225 226 copy_extent_buffer(cow, buf, 0, 0, cow->len); 227 btrfs_set_header_bytenr(cow, cow->start); 228 btrfs_set_header_generation(cow, trans->transid); 229 btrfs_set_header_owner(cow, root->root_key.objectid); 230 btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); 231 232 WARN_ON(btrfs_header_generation(buf) > trans->transid); 233 if (btrfs_header_generation(buf) != trans->transid) { 234 different_trans = 1; 235 ret = btrfs_inc_ref(trans, root, buf); 236 if (ret) 237 return ret; 238 } else { 239 clean_tree_block(trans, root, buf); 240 } 241 242 if (buf == root->node) { 243 WARN_ON(parent && parent != buf); 244 root_gen = btrfs_header_generation(buf); 245 246 spin_lock(&root->node_lock); 247 root->node = cow; 248 extent_buffer_get(cow); 249 spin_unlock(&root->node_lock); 250 251 if (buf != root->commit_root) { 252 btrfs_free_extent(trans, root, buf->start, 253 buf->len, root->root_key.objectid, 254 root_gen, 0, 0, 1); 255 } 256 free_extent_buffer(buf); 257 add_root_to_dirty_list(root); 258 } else { 259 root_gen = btrfs_header_generation(parent); 260 btrfs_set_node_blockptr(parent, parent_slot, 261 cow->start); 262 WARN_ON(trans->transid == 0); 263 btrfs_set_node_ptr_generation(parent, parent_slot, 264 trans->transid); 265 btrfs_mark_buffer_dirty(parent); 266 WARN_ON(btrfs_header_generation(parent) != trans->transid); 267 btrfs_free_extent(trans, root, buf->start, buf->len, 268 btrfs_header_owner(parent), root_gen, 269 0, 0, 1); 270 } 271 if (unlock_orig) 272 btrfs_tree_unlock(buf); 273 free_extent_buffer(buf); 274 btrfs_mark_buffer_dirty(cow); 275 *cow_ret = cow; 276 return 0; 277 } 278 279 int btrfs_cow_block(struct btrfs_trans_handle *trans, 280 struct btrfs_root *root, struct extent_buffer *buf, 281 struct extent_buffer *parent, int parent_slot, 282 struct extent_buffer **cow_ret) 283 { 284 u64 search_start; 285 u64 header_trans; 286 int ret; 287 288 if (trans->transaction != root->fs_info->running_transaction) { 289 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 290 root->fs_info->running_transaction->transid); 291 WARN_ON(1); 292 } 293 if (trans->transid != root->fs_info->generation) { 294 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 295 root->fs_info->generation); 296 WARN_ON(1); 297 } 298 299 header_trans = btrfs_header_generation(buf); 300 spin_lock(&root->fs_info->hash_lock); 301 if (header_trans == trans->transid && 302 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 303 *cow_ret = buf; 304 spin_unlock(&root->fs_info->hash_lock); 305 return 0; 306 } 307 spin_unlock(&root->fs_info->hash_lock); 308 search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); 309 ret = __btrfs_cow_block(trans, root, buf, parent, 310 parent_slot, cow_ret, search_start, 0); 311 return ret; 312 } 313 314 static int close_blocks(u64 blocknr, u64 other, u32 blocksize) 315 { 316 if (blocknr < other && other - (blocknr + blocksize) < 32768) 317 return 1; 318 if (blocknr > other && blocknr - (other + blocksize) < 32768) 319 return 1; 320 return 0; 321 } 322 323 /* 324 * compare two keys in a memcmp fashion 325 */ 326 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 327 { 328 struct btrfs_key k1; 329 330 btrfs_disk_key_to_cpu(&k1, disk); 331 332 if (k1.objectid > k2->objectid) 333 return 1; 334 if (k1.objectid < k2->objectid) 335 return -1; 336 if (k1.type > k2->type) 337 return 1; 338 if (k1.type < k2->type) 339 return -1; 340 if (k1.offset > k2->offset) 341 return 1; 342 if (k1.offset < k2->offset) 343 return -1; 344 return 0; 345 } 346 347 348 int btrfs_realloc_node(struct btrfs_trans_handle *trans, 349 struct btrfs_root *root, struct extent_buffer *parent, 350 int start_slot, int cache_only, u64 *last_ret, 351 struct btrfs_key *progress) 352 { 353 struct extent_buffer *cur; 354 u64 blocknr; 355 u64 gen; 356 u64 search_start = *last_ret; 357 u64 last_block = 0; 358 u64 other; 359 u32 parent_nritems; 360 int end_slot; 361 int i; 362 int err = 0; 363 int parent_level; 364 int uptodate; 365 u32 blocksize; 366 int progress_passed = 0; 367 struct btrfs_disk_key disk_key; 368 369 parent_level = btrfs_header_level(parent); 370 if (cache_only && parent_level != 1) 371 return 0; 372 373 if (trans->transaction != root->fs_info->running_transaction) { 374 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 375 root->fs_info->running_transaction->transid); 376 WARN_ON(1); 377 } 378 if (trans->transid != root->fs_info->generation) { 379 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 380 root->fs_info->generation); 381 WARN_ON(1); 382 } 383 384 parent_nritems = btrfs_header_nritems(parent); 385 blocksize = btrfs_level_size(root, parent_level - 1); 386 end_slot = parent_nritems; 387 388 if (parent_nritems == 1) 389 return 0; 390 391 for (i = start_slot; i < end_slot; i++) { 392 int close = 1; 393 394 if (!parent->map_token) { 395 map_extent_buffer(parent, 396 btrfs_node_key_ptr_offset(i), 397 sizeof(struct btrfs_key_ptr), 398 &parent->map_token, &parent->kaddr, 399 &parent->map_start, &parent->map_len, 400 KM_USER1); 401 } 402 btrfs_node_key(parent, &disk_key, i); 403 if (!progress_passed && comp_keys(&disk_key, progress) < 0) 404 continue; 405 406 progress_passed = 1; 407 blocknr = btrfs_node_blockptr(parent, i); 408 gen = btrfs_node_ptr_generation(parent, i); 409 if (last_block == 0) 410 last_block = blocknr; 411 412 if (i > 0) { 413 other = btrfs_node_blockptr(parent, i - 1); 414 close = close_blocks(blocknr, other, blocksize); 415 } 416 if (!close && i < end_slot - 2) { 417 other = btrfs_node_blockptr(parent, i + 1); 418 close = close_blocks(blocknr, other, blocksize); 419 } 420 if (close) { 421 last_block = blocknr; 422 continue; 423 } 424 if (parent->map_token) { 425 unmap_extent_buffer(parent, parent->map_token, 426 KM_USER1); 427 parent->map_token = NULL; 428 } 429 430 cur = btrfs_find_tree_block(root, blocknr, blocksize); 431 if (cur) 432 uptodate = btrfs_buffer_uptodate(cur, gen); 433 else 434 uptodate = 0; 435 if (!cur || !uptodate) { 436 if (cache_only) { 437 free_extent_buffer(cur); 438 continue; 439 } 440 if (!cur) { 441 cur = read_tree_block(root, blocknr, 442 blocksize, gen); 443 } else if (!uptodate) { 444 btrfs_read_buffer(cur, gen); 445 } 446 } 447 if (search_start == 0) 448 search_start = last_block; 449 450 btrfs_tree_lock(cur); 451 err = __btrfs_cow_block(trans, root, cur, parent, i, 452 &cur, search_start, 453 min(16 * blocksize, 454 (end_slot - i) * blocksize)); 455 if (err) { 456 btrfs_tree_unlock(cur); 457 free_extent_buffer(cur); 458 break; 459 } 460 search_start = cur->start; 461 last_block = cur->start; 462 *last_ret = search_start; 463 btrfs_tree_unlock(cur); 464 free_extent_buffer(cur); 465 } 466 if (parent->map_token) { 467 unmap_extent_buffer(parent, parent->map_token, 468 KM_USER1); 469 parent->map_token = NULL; 470 } 471 return err; 472 } 473 474 /* 475 * The leaf data grows from end-to-front in the node. 476 * this returns the address of the start of the last item, 477 * which is the stop of the leaf data stack 478 */ 479 static inline unsigned int leaf_data_end(struct btrfs_root *root, 480 struct extent_buffer *leaf) 481 { 482 u32 nr = btrfs_header_nritems(leaf); 483 if (nr == 0) 484 return BTRFS_LEAF_DATA_SIZE(root); 485 return btrfs_item_offset_nr(leaf, nr - 1); 486 } 487 488 static int check_node(struct btrfs_root *root, struct btrfs_path *path, 489 int level) 490 { 491 struct extent_buffer *parent = NULL; 492 struct extent_buffer *node = path->nodes[level]; 493 struct btrfs_disk_key parent_key; 494 struct btrfs_disk_key node_key; 495 int parent_slot; 496 int slot; 497 struct btrfs_key cpukey; 498 u32 nritems = btrfs_header_nritems(node); 499 500 if (path->nodes[level + 1]) 501 parent = path->nodes[level + 1]; 502 503 slot = path->slots[level]; 504 BUG_ON(nritems == 0); 505 if (parent) { 506 parent_slot = path->slots[level + 1]; 507 btrfs_node_key(parent, &parent_key, parent_slot); 508 btrfs_node_key(node, &node_key, 0); 509 BUG_ON(memcmp(&parent_key, &node_key, 510 sizeof(struct btrfs_disk_key))); 511 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 512 btrfs_header_bytenr(node)); 513 } 514 BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); 515 if (slot != 0) { 516 btrfs_node_key_to_cpu(node, &cpukey, slot - 1); 517 btrfs_node_key(node, &node_key, slot); 518 BUG_ON(comp_keys(&node_key, &cpukey) <= 0); 519 } 520 if (slot < nritems - 1) { 521 btrfs_node_key_to_cpu(node, &cpukey, slot + 1); 522 btrfs_node_key(node, &node_key, slot); 523 BUG_ON(comp_keys(&node_key, &cpukey) >= 0); 524 } 525 return 0; 526 } 527 528 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, 529 int level) 530 { 531 struct extent_buffer *leaf = path->nodes[level]; 532 struct extent_buffer *parent = NULL; 533 int parent_slot; 534 struct btrfs_key cpukey; 535 struct btrfs_disk_key parent_key; 536 struct btrfs_disk_key leaf_key; 537 int slot = path->slots[0]; 538 539 u32 nritems = btrfs_header_nritems(leaf); 540 541 if (path->nodes[level + 1]) 542 parent = path->nodes[level + 1]; 543 544 if (nritems == 0) 545 return 0; 546 547 if (parent) { 548 parent_slot = path->slots[level + 1]; 549 btrfs_node_key(parent, &parent_key, parent_slot); 550 btrfs_item_key(leaf, &leaf_key, 0); 551 552 BUG_ON(memcmp(&parent_key, &leaf_key, 553 sizeof(struct btrfs_disk_key))); 554 BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 555 btrfs_header_bytenr(leaf)); 556 } 557 #if 0 558 for (i = 0; nritems > 1 && i < nritems - 2; i++) { 559 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1); 560 btrfs_item_key(leaf, &leaf_key, i); 561 if (comp_keys(&leaf_key, &cpukey) >= 0) { 562 btrfs_print_leaf(root, leaf); 563 printk("slot %d offset bad key\n", i); 564 BUG_ON(1); 565 } 566 if (btrfs_item_offset_nr(leaf, i) != 567 btrfs_item_end_nr(leaf, i + 1)) { 568 btrfs_print_leaf(root, leaf); 569 printk("slot %d offset bad\n", i); 570 BUG_ON(1); 571 } 572 if (i == 0) { 573 if (btrfs_item_offset_nr(leaf, i) + 574 btrfs_item_size_nr(leaf, i) != 575 BTRFS_LEAF_DATA_SIZE(root)) { 576 btrfs_print_leaf(root, leaf); 577 printk("slot %d first offset bad\n", i); 578 BUG_ON(1); 579 } 580 } 581 } 582 if (nritems > 0) { 583 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) { 584 btrfs_print_leaf(root, leaf); 585 printk("slot %d bad size \n", nritems - 1); 586 BUG_ON(1); 587 } 588 } 589 #endif 590 if (slot != 0 && slot < nritems - 1) { 591 btrfs_item_key(leaf, &leaf_key, slot); 592 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1); 593 if (comp_keys(&leaf_key, &cpukey) <= 0) { 594 btrfs_print_leaf(root, leaf); 595 printk("slot %d offset bad key\n", slot); 596 BUG_ON(1); 597 } 598 if (btrfs_item_offset_nr(leaf, slot - 1) != 599 btrfs_item_end_nr(leaf, slot)) { 600 btrfs_print_leaf(root, leaf); 601 printk("slot %d offset bad\n", slot); 602 BUG_ON(1); 603 } 604 } 605 if (slot < nritems - 1) { 606 btrfs_item_key(leaf, &leaf_key, slot); 607 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1); 608 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0); 609 if (btrfs_item_offset_nr(leaf, slot) != 610 btrfs_item_end_nr(leaf, slot + 1)) { 611 btrfs_print_leaf(root, leaf); 612 printk("slot %d offset bad\n", slot); 613 BUG_ON(1); 614 } 615 } 616 BUG_ON(btrfs_item_offset_nr(leaf, 0) + 617 btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root)); 618 return 0; 619 } 620 621 static int noinline check_block(struct btrfs_root *root, 622 struct btrfs_path *path, int level) 623 { 624 u64 found_start; 625 return 0; 626 if (btrfs_header_level(path->nodes[level]) != level) 627 printk("warning: bad level %Lu wanted %d found %d\n", 628 path->nodes[level]->start, level, 629 btrfs_header_level(path->nodes[level])); 630 found_start = btrfs_header_bytenr(path->nodes[level]); 631 if (found_start != path->nodes[level]->start) { 632 printk("warning: bad bytentr %Lu found %Lu\n", 633 path->nodes[level]->start, found_start); 634 } 635 #if 0 636 struct extent_buffer *buf = path->nodes[level]; 637 638 if (memcmp_extent_buffer(buf, root->fs_info->fsid, 639 (unsigned long)btrfs_header_fsid(buf), 640 BTRFS_FSID_SIZE)) { 641 printk("warning bad block %Lu\n", buf->start); 642 return 1; 643 } 644 #endif 645 if (level == 0) 646 return check_leaf(root, path, level); 647 return check_node(root, path, level); 648 } 649 650 /* 651 * search for key in the extent_buffer. The items start at offset p, 652 * and they are item_size apart. There are 'max' items in p. 653 * 654 * the slot in the array is returned via slot, and it points to 655 * the place where you would insert key if it is not found in 656 * the array. 657 * 658 * slot may point to max if the key is bigger than all of the keys 659 */ 660 static int generic_bin_search(struct extent_buffer *eb, unsigned long p, 661 int item_size, struct btrfs_key *key, 662 int max, int *slot) 663 { 664 int low = 0; 665 int high = max; 666 int mid; 667 int ret; 668 struct btrfs_disk_key *tmp = NULL; 669 struct btrfs_disk_key unaligned; 670 unsigned long offset; 671 char *map_token = NULL; 672 char *kaddr = NULL; 673 unsigned long map_start = 0; 674 unsigned long map_len = 0; 675 int err; 676 677 while(low < high) { 678 mid = (low + high) / 2; 679 offset = p + mid * item_size; 680 681 if (!map_token || offset < map_start || 682 (offset + sizeof(struct btrfs_disk_key)) > 683 map_start + map_len) { 684 if (map_token) { 685 unmap_extent_buffer(eb, map_token, KM_USER0); 686 map_token = NULL; 687 } 688 err = map_extent_buffer(eb, offset, 689 sizeof(struct btrfs_disk_key), 690 &map_token, &kaddr, 691 &map_start, &map_len, KM_USER0); 692 693 if (!err) { 694 tmp = (struct btrfs_disk_key *)(kaddr + offset - 695 map_start); 696 } else { 697 read_extent_buffer(eb, &unaligned, 698 offset, sizeof(unaligned)); 699 tmp = &unaligned; 700 } 701 702 } else { 703 tmp = (struct btrfs_disk_key *)(kaddr + offset - 704 map_start); 705 } 706 ret = comp_keys(tmp, key); 707 708 if (ret < 0) 709 low = mid + 1; 710 else if (ret > 0) 711 high = mid; 712 else { 713 *slot = mid; 714 if (map_token) 715 unmap_extent_buffer(eb, map_token, KM_USER0); 716 return 0; 717 } 718 } 719 *slot = low; 720 if (map_token) 721 unmap_extent_buffer(eb, map_token, KM_USER0); 722 return 1; 723 } 724 725 /* 726 * simple bin_search frontend that does the right thing for 727 * leaves vs nodes 728 */ 729 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, 730 int level, int *slot) 731 { 732 if (level == 0) { 733 return generic_bin_search(eb, 734 offsetof(struct btrfs_leaf, items), 735 sizeof(struct btrfs_item), 736 key, btrfs_header_nritems(eb), 737 slot); 738 } else { 739 return generic_bin_search(eb, 740 offsetof(struct btrfs_node, ptrs), 741 sizeof(struct btrfs_key_ptr), 742 key, btrfs_header_nritems(eb), 743 slot); 744 } 745 return -1; 746 } 747 748 static struct extent_buffer *read_node_slot(struct btrfs_root *root, 749 struct extent_buffer *parent, int slot) 750 { 751 int level = btrfs_header_level(parent); 752 if (slot < 0) 753 return NULL; 754 if (slot >= btrfs_header_nritems(parent)) 755 return NULL; 756 757 BUG_ON(level == 0); 758 759 return read_tree_block(root, btrfs_node_blockptr(parent, slot), 760 btrfs_level_size(root, level - 1), 761 btrfs_node_ptr_generation(parent, slot)); 762 } 763 764 static int balance_level(struct btrfs_trans_handle *trans, 765 struct btrfs_root *root, 766 struct btrfs_path *path, int level) 767 { 768 struct extent_buffer *right = NULL; 769 struct extent_buffer *mid; 770 struct extent_buffer *left = NULL; 771 struct extent_buffer *parent = NULL; 772 int ret = 0; 773 int wret; 774 int pslot; 775 int orig_slot = path->slots[level]; 776 int err_on_enospc = 0; 777 u64 orig_ptr; 778 779 if (level == 0) 780 return 0; 781 782 mid = path->nodes[level]; 783 WARN_ON(!path->locks[level]); 784 WARN_ON(btrfs_header_generation(mid) != trans->transid); 785 786 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 787 788 if (level < BTRFS_MAX_LEVEL - 1) 789 parent = path->nodes[level + 1]; 790 pslot = path->slots[level + 1]; 791 792 /* 793 * deal with the case where there is only one pointer in the root 794 * by promoting the node below to a root 795 */ 796 if (!parent) { 797 struct extent_buffer *child; 798 799 if (btrfs_header_nritems(mid) != 1) 800 return 0; 801 802 /* promote the child to a root */ 803 child = read_node_slot(root, mid, 0); 804 btrfs_tree_lock(child); 805 BUG_ON(!child); 806 ret = btrfs_cow_block(trans, root, child, mid, 0, &child); 807 BUG_ON(ret); 808 809 spin_lock(&root->node_lock); 810 root->node = child; 811 spin_unlock(&root->node_lock); 812 813 add_root_to_dirty_list(root); 814 btrfs_tree_unlock(child); 815 path->locks[level] = 0; 816 path->nodes[level] = NULL; 817 clean_tree_block(trans, root, mid); 818 btrfs_tree_unlock(mid); 819 /* once for the path */ 820 free_extent_buffer(mid); 821 ret = btrfs_free_extent(trans, root, mid->start, mid->len, 822 root->root_key.objectid, 823 btrfs_header_generation(mid), 0, 0, 1); 824 /* once for the root ptr */ 825 free_extent_buffer(mid); 826 return ret; 827 } 828 if (btrfs_header_nritems(mid) > 829 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 830 return 0; 831 832 if (btrfs_header_nritems(mid) < 2) 833 err_on_enospc = 1; 834 835 left = read_node_slot(root, parent, pslot - 1); 836 if (left) { 837 btrfs_tree_lock(left); 838 wret = btrfs_cow_block(trans, root, left, 839 parent, pslot - 1, &left); 840 if (wret) { 841 ret = wret; 842 goto enospc; 843 } 844 } 845 right = read_node_slot(root, parent, pslot + 1); 846 if (right) { 847 btrfs_tree_lock(right); 848 wret = btrfs_cow_block(trans, root, right, 849 parent, pslot + 1, &right); 850 if (wret) { 851 ret = wret; 852 goto enospc; 853 } 854 } 855 856 /* first, try to make some room in the middle buffer */ 857 if (left) { 858 orig_slot += btrfs_header_nritems(left); 859 wret = push_node_left(trans, root, left, mid, 1); 860 if (wret < 0) 861 ret = wret; 862 if (btrfs_header_nritems(mid) < 2) 863 err_on_enospc = 1; 864 } 865 866 /* 867 * then try to empty the right most buffer into the middle 868 */ 869 if (right) { 870 wret = push_node_left(trans, root, mid, right, 1); 871 if (wret < 0 && wret != -ENOSPC) 872 ret = wret; 873 if (btrfs_header_nritems(right) == 0) { 874 u64 bytenr = right->start; 875 u64 generation = btrfs_header_generation(parent); 876 u32 blocksize = right->len; 877 878 clean_tree_block(trans, root, right); 879 btrfs_tree_unlock(right); 880 free_extent_buffer(right); 881 right = NULL; 882 wret = del_ptr(trans, root, path, level + 1, pslot + 883 1); 884 if (wret) 885 ret = wret; 886 wret = btrfs_free_extent(trans, root, bytenr, 887 blocksize, 888 btrfs_header_owner(parent), 889 generation, 0, 0, 1); 890 if (wret) 891 ret = wret; 892 } else { 893 struct btrfs_disk_key right_key; 894 btrfs_node_key(right, &right_key, 0); 895 btrfs_set_node_key(parent, &right_key, pslot + 1); 896 btrfs_mark_buffer_dirty(parent); 897 } 898 } 899 if (btrfs_header_nritems(mid) == 1) { 900 /* 901 * we're not allowed to leave a node with one item in the 902 * tree during a delete. A deletion from lower in the tree 903 * could try to delete the only pointer in this node. 904 * So, pull some keys from the left. 905 * There has to be a left pointer at this point because 906 * otherwise we would have pulled some pointers from the 907 * right 908 */ 909 BUG_ON(!left); 910 wret = balance_node_right(trans, root, mid, left); 911 if (wret < 0) { 912 ret = wret; 913 goto enospc; 914 } 915 if (wret == 1) { 916 wret = push_node_left(trans, root, left, mid, 1); 917 if (wret < 0) 918 ret = wret; 919 } 920 BUG_ON(wret == 1); 921 } 922 if (btrfs_header_nritems(mid) == 0) { 923 /* we've managed to empty the middle node, drop it */ 924 u64 root_gen = btrfs_header_generation(parent); 925 u64 bytenr = mid->start; 926 u32 blocksize = mid->len; 927 928 clean_tree_block(trans, root, mid); 929 btrfs_tree_unlock(mid); 930 free_extent_buffer(mid); 931 mid = NULL; 932 wret = del_ptr(trans, root, path, level + 1, pslot); 933 if (wret) 934 ret = wret; 935 wret = btrfs_free_extent(trans, root, bytenr, blocksize, 936 btrfs_header_owner(parent), 937 root_gen, 0, 0, 1); 938 if (wret) 939 ret = wret; 940 } else { 941 /* update the parent key to reflect our changes */ 942 struct btrfs_disk_key mid_key; 943 btrfs_node_key(mid, &mid_key, 0); 944 btrfs_set_node_key(parent, &mid_key, pslot); 945 btrfs_mark_buffer_dirty(parent); 946 } 947 948 /* update the path */ 949 if (left) { 950 if (btrfs_header_nritems(left) > orig_slot) { 951 extent_buffer_get(left); 952 /* left was locked after cow */ 953 path->nodes[level] = left; 954 path->slots[level + 1] -= 1; 955 path->slots[level] = orig_slot; 956 if (mid) { 957 btrfs_tree_unlock(mid); 958 free_extent_buffer(mid); 959 } 960 } else { 961 orig_slot -= btrfs_header_nritems(left); 962 path->slots[level] = orig_slot; 963 } 964 } 965 /* double check we haven't messed things up */ 966 check_block(root, path, level); 967 if (orig_ptr != 968 btrfs_node_blockptr(path->nodes[level], path->slots[level])) 969 BUG(); 970 enospc: 971 if (right) { 972 btrfs_tree_unlock(right); 973 free_extent_buffer(right); 974 } 975 if (left) { 976 if (path->nodes[level] != left) 977 btrfs_tree_unlock(left); 978 free_extent_buffer(left); 979 } 980 return ret; 981 } 982 983 /* returns zero if the push worked, non-zero otherwise */ 984 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans, 985 struct btrfs_root *root, 986 struct btrfs_path *path, int level) 987 { 988 struct extent_buffer *right = NULL; 989 struct extent_buffer *mid; 990 struct extent_buffer *left = NULL; 991 struct extent_buffer *parent = NULL; 992 int ret = 0; 993 int wret; 994 int pslot; 995 int orig_slot = path->slots[level]; 996 u64 orig_ptr; 997 998 if (level == 0) 999 return 1; 1000 1001 mid = path->nodes[level]; 1002 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1003 orig_ptr = btrfs_node_blockptr(mid, orig_slot); 1004 1005 if (level < BTRFS_MAX_LEVEL - 1) 1006 parent = path->nodes[level + 1]; 1007 pslot = path->slots[level + 1]; 1008 1009 if (!parent) 1010 return 1; 1011 1012 left = read_node_slot(root, parent, pslot - 1); 1013 1014 /* first, try to make some room in the middle buffer */ 1015 if (left) { 1016 u32 left_nr; 1017 1018 btrfs_tree_lock(left); 1019 left_nr = btrfs_header_nritems(left); 1020 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1021 wret = 1; 1022 } else { 1023 ret = btrfs_cow_block(trans, root, left, parent, 1024 pslot - 1, &left); 1025 if (ret) 1026 wret = 1; 1027 else { 1028 wret = push_node_left(trans, root, 1029 left, mid, 0); 1030 } 1031 } 1032 if (wret < 0) 1033 ret = wret; 1034 if (wret == 0) { 1035 struct btrfs_disk_key disk_key; 1036 orig_slot += left_nr; 1037 btrfs_node_key(mid, &disk_key, 0); 1038 btrfs_set_node_key(parent, &disk_key, pslot); 1039 btrfs_mark_buffer_dirty(parent); 1040 if (btrfs_header_nritems(left) > orig_slot) { 1041 path->nodes[level] = left; 1042 path->slots[level + 1] -= 1; 1043 path->slots[level] = orig_slot; 1044 btrfs_tree_unlock(mid); 1045 free_extent_buffer(mid); 1046 } else { 1047 orig_slot -= 1048 btrfs_header_nritems(left); 1049 path->slots[level] = orig_slot; 1050 btrfs_tree_unlock(left); 1051 free_extent_buffer(left); 1052 } 1053 return 0; 1054 } 1055 btrfs_tree_unlock(left); 1056 free_extent_buffer(left); 1057 } 1058 right = read_node_slot(root, parent, pslot + 1); 1059 1060 /* 1061 * then try to empty the right most buffer into the middle 1062 */ 1063 if (right) { 1064 u32 right_nr; 1065 btrfs_tree_lock(right); 1066 right_nr = btrfs_header_nritems(right); 1067 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 1068 wret = 1; 1069 } else { 1070 ret = btrfs_cow_block(trans, root, right, 1071 parent, pslot + 1, 1072 &right); 1073 if (ret) 1074 wret = 1; 1075 else { 1076 wret = balance_node_right(trans, root, 1077 right, mid); 1078 } 1079 } 1080 if (wret < 0) 1081 ret = wret; 1082 if (wret == 0) { 1083 struct btrfs_disk_key disk_key; 1084 1085 btrfs_node_key(right, &disk_key, 0); 1086 btrfs_set_node_key(parent, &disk_key, pslot + 1); 1087 btrfs_mark_buffer_dirty(parent); 1088 1089 if (btrfs_header_nritems(mid) <= orig_slot) { 1090 path->nodes[level] = right; 1091 path->slots[level + 1] += 1; 1092 path->slots[level] = orig_slot - 1093 btrfs_header_nritems(mid); 1094 btrfs_tree_unlock(mid); 1095 free_extent_buffer(mid); 1096 } else { 1097 btrfs_tree_unlock(right); 1098 free_extent_buffer(right); 1099 } 1100 return 0; 1101 } 1102 btrfs_tree_unlock(right); 1103 free_extent_buffer(right); 1104 } 1105 return 1; 1106 } 1107 1108 /* 1109 * readahead one full node of leaves 1110 */ 1111 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, 1112 int level, int slot, u64 objectid) 1113 { 1114 struct extent_buffer *node; 1115 struct btrfs_disk_key disk_key; 1116 u32 nritems; 1117 u64 search; 1118 u64 lowest_read; 1119 u64 highest_read; 1120 u64 nread = 0; 1121 int direction = path->reada; 1122 struct extent_buffer *eb; 1123 u32 nr; 1124 u32 blocksize; 1125 u32 nscan = 0; 1126 1127 if (level != 1) 1128 return; 1129 1130 if (!path->nodes[level]) 1131 return; 1132 1133 node = path->nodes[level]; 1134 1135 search = btrfs_node_blockptr(node, slot); 1136 blocksize = btrfs_level_size(root, level - 1); 1137 eb = btrfs_find_tree_block(root, search, blocksize); 1138 if (eb) { 1139 free_extent_buffer(eb); 1140 return; 1141 } 1142 1143 highest_read = search; 1144 lowest_read = search; 1145 1146 nritems = btrfs_header_nritems(node); 1147 nr = slot; 1148 while(1) { 1149 if (direction < 0) { 1150 if (nr == 0) 1151 break; 1152 nr--; 1153 } else if (direction > 0) { 1154 nr++; 1155 if (nr >= nritems) 1156 break; 1157 } 1158 if (path->reada < 0 && objectid) { 1159 btrfs_node_key(node, &disk_key, nr); 1160 if (btrfs_disk_key_objectid(&disk_key) != objectid) 1161 break; 1162 } 1163 search = btrfs_node_blockptr(node, nr); 1164 if ((search >= lowest_read && search <= highest_read) || 1165 (search < lowest_read && lowest_read - search <= 32768) || 1166 (search > highest_read && search - highest_read <= 32768)) { 1167 readahead_tree_block(root, search, blocksize, 1168 btrfs_node_ptr_generation(node, nr)); 1169 nread += blocksize; 1170 } 1171 nscan++; 1172 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32)) 1173 break; 1174 if(nread > (1024 * 1024) || nscan > 128) 1175 break; 1176 1177 if (search < lowest_read) 1178 lowest_read = search; 1179 if (search > highest_read) 1180 highest_read = search; 1181 } 1182 } 1183 1184 static void unlock_up(struct btrfs_path *path, int level, int lowest_unlock) 1185 { 1186 int i; 1187 int skip_level = level; 1188 int no_skips = 0; 1189 struct extent_buffer *t; 1190 1191 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1192 if (!path->nodes[i]) 1193 break; 1194 if (!path->locks[i]) 1195 break; 1196 if (!no_skips && path->slots[i] == 0) { 1197 skip_level = i + 1; 1198 continue; 1199 } 1200 if (!no_skips && path->keep_locks) { 1201 u32 nritems; 1202 t = path->nodes[i]; 1203 nritems = btrfs_header_nritems(t); 1204 if (nritems < 1 || path->slots[i] >= nritems - 1) { 1205 skip_level = i + 1; 1206 continue; 1207 } 1208 } 1209 if (skip_level < i && i >= lowest_unlock) 1210 no_skips = 1; 1211 1212 t = path->nodes[i]; 1213 if (i >= lowest_unlock && i > skip_level && path->locks[i]) { 1214 btrfs_tree_unlock(t); 1215 path->locks[i] = 0; 1216 } 1217 } 1218 } 1219 1220 /* 1221 * look for key in the tree. path is filled in with nodes along the way 1222 * if key is found, we return zero and you can find the item in the leaf 1223 * level of the path (level 0) 1224 * 1225 * If the key isn't found, the path points to the slot where it should 1226 * be inserted, and 1 is returned. If there are other errors during the 1227 * search a negative error number is returned. 1228 * 1229 * if ins_len > 0, nodes and leaves will be split as we walk down the 1230 * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 1231 * possible) 1232 */ 1233 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 1234 *root, struct btrfs_key *key, struct btrfs_path *p, int 1235 ins_len, int cow) 1236 { 1237 struct extent_buffer *b; 1238 struct extent_buffer *tmp; 1239 int slot; 1240 int ret; 1241 int level; 1242 int should_reada = p->reada; 1243 int lowest_unlock = 1; 1244 int blocksize; 1245 u8 lowest_level = 0; 1246 u64 blocknr; 1247 u64 gen; 1248 1249 lowest_level = p->lowest_level; 1250 WARN_ON(lowest_level && ins_len); 1251 WARN_ON(p->nodes[0] != NULL); 1252 WARN_ON(cow && root == root->fs_info->extent_root && 1253 !mutex_is_locked(&root->fs_info->alloc_mutex)); 1254 if (ins_len < 0) 1255 lowest_unlock = 2; 1256 again: 1257 if (p->skip_locking) 1258 b = btrfs_root_node(root); 1259 else 1260 b = btrfs_lock_root_node(root); 1261 1262 while (b) { 1263 level = btrfs_header_level(b); 1264 if (cow) { 1265 int wret; 1266 wret = btrfs_cow_block(trans, root, b, 1267 p->nodes[level + 1], 1268 p->slots[level + 1], 1269 &b); 1270 if (wret) { 1271 free_extent_buffer(b); 1272 return wret; 1273 } 1274 } 1275 BUG_ON(!cow && ins_len); 1276 if (level != btrfs_header_level(b)) 1277 WARN_ON(1); 1278 level = btrfs_header_level(b); 1279 p->nodes[level] = b; 1280 if (!p->skip_locking) 1281 p->locks[level] = 1; 1282 ret = check_block(root, p, level); 1283 if (ret) 1284 return -1; 1285 1286 ret = bin_search(b, key, level, &slot); 1287 if (level != 0) { 1288 if (ret && slot > 0) 1289 slot -= 1; 1290 p->slots[level] = slot; 1291 if (ins_len > 0 && btrfs_header_nritems(b) >= 1292 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { 1293 int sret = split_node(trans, root, p, level); 1294 BUG_ON(sret > 0); 1295 if (sret) 1296 return sret; 1297 b = p->nodes[level]; 1298 slot = p->slots[level]; 1299 } else if (ins_len < 0) { 1300 int sret = balance_level(trans, root, p, 1301 level); 1302 if (sret) 1303 return sret; 1304 b = p->nodes[level]; 1305 if (!b) { 1306 btrfs_release_path(NULL, p); 1307 goto again; 1308 } 1309 slot = p->slots[level]; 1310 BUG_ON(btrfs_header_nritems(b) == 1); 1311 } 1312 unlock_up(p, level, lowest_unlock); 1313 1314 /* this is only true while dropping a snapshot */ 1315 if (level == lowest_level) { 1316 break; 1317 } 1318 1319 blocknr = btrfs_node_blockptr(b, slot); 1320 gen = btrfs_node_ptr_generation(b, slot); 1321 blocksize = btrfs_level_size(root, level - 1); 1322 1323 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1324 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1325 b = tmp; 1326 } else { 1327 /* 1328 * reduce lock contention at high levels 1329 * of the btree by dropping locks before 1330 * we read. 1331 */ 1332 if (level > 1) { 1333 btrfs_release_path(NULL, p); 1334 if (tmp) 1335 free_extent_buffer(tmp); 1336 if (should_reada) 1337 reada_for_search(root, p, 1338 level, slot, 1339 key->objectid); 1340 1341 tmp = read_tree_block(root, blocknr, 1342 blocksize, gen); 1343 if (tmp) 1344 free_extent_buffer(tmp); 1345 goto again; 1346 } else { 1347 if (tmp) 1348 free_extent_buffer(tmp); 1349 if (should_reada) 1350 reada_for_search(root, p, 1351 level, slot, 1352 key->objectid); 1353 b = read_node_slot(root, b, slot); 1354 } 1355 } 1356 if (!p->skip_locking) 1357 btrfs_tree_lock(b); 1358 } else { 1359 p->slots[level] = slot; 1360 if (ins_len > 0 && btrfs_leaf_free_space(root, b) < 1361 sizeof(struct btrfs_item) + ins_len) { 1362 int sret = split_leaf(trans, root, key, 1363 p, ins_len, ret == 0); 1364 BUG_ON(sret > 0); 1365 if (sret) 1366 return sret; 1367 } 1368 unlock_up(p, level, lowest_unlock); 1369 return ret; 1370 } 1371 } 1372 return 1; 1373 } 1374 1375 /* 1376 * adjust the pointers going up the tree, starting at level 1377 * making sure the right key of each node is points to 'key'. 1378 * This is used after shifting pointers to the left, so it stops 1379 * fixing up pointers when a given leaf/node is not in slot 0 of the 1380 * higher levels 1381 * 1382 * If this fails to write a tree block, it returns -1, but continues 1383 * fixing up the blocks in ram so the tree is consistent. 1384 */ 1385 static int fixup_low_keys(struct btrfs_trans_handle *trans, 1386 struct btrfs_root *root, struct btrfs_path *path, 1387 struct btrfs_disk_key *key, int level) 1388 { 1389 int i; 1390 int ret = 0; 1391 struct extent_buffer *t; 1392 1393 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 1394 int tslot = path->slots[i]; 1395 if (!path->nodes[i]) 1396 break; 1397 t = path->nodes[i]; 1398 btrfs_set_node_key(t, key, tslot); 1399 btrfs_mark_buffer_dirty(path->nodes[i]); 1400 if (tslot != 0) 1401 break; 1402 } 1403 return ret; 1404 } 1405 1406 /* 1407 * try to push data from one node into the next node left in the 1408 * tree. 1409 * 1410 * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 1411 * error, and > 0 if there was no room in the left hand block. 1412 */ 1413 static int push_node_left(struct btrfs_trans_handle *trans, 1414 struct btrfs_root *root, struct extent_buffer *dst, 1415 struct extent_buffer *src, int empty) 1416 { 1417 int push_items = 0; 1418 int src_nritems; 1419 int dst_nritems; 1420 int ret = 0; 1421 1422 src_nritems = btrfs_header_nritems(src); 1423 dst_nritems = btrfs_header_nritems(dst); 1424 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1425 WARN_ON(btrfs_header_generation(src) != trans->transid); 1426 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1427 1428 if (!empty && src_nritems <= 8) 1429 return 1; 1430 1431 if (push_items <= 0) { 1432 return 1; 1433 } 1434 1435 if (empty) { 1436 push_items = min(src_nritems, push_items); 1437 if (push_items < src_nritems) { 1438 /* leave at least 8 pointers in the node if 1439 * we aren't going to empty it 1440 */ 1441 if (src_nritems - push_items < 8) { 1442 if (push_items <= 8) 1443 return 1; 1444 push_items -= 8; 1445 } 1446 } 1447 } else 1448 push_items = min(src_nritems - 8, push_items); 1449 1450 copy_extent_buffer(dst, src, 1451 btrfs_node_key_ptr_offset(dst_nritems), 1452 btrfs_node_key_ptr_offset(0), 1453 push_items * sizeof(struct btrfs_key_ptr)); 1454 1455 if (push_items < src_nritems) { 1456 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0), 1457 btrfs_node_key_ptr_offset(push_items), 1458 (src_nritems - push_items) * 1459 sizeof(struct btrfs_key_ptr)); 1460 } 1461 btrfs_set_header_nritems(src, src_nritems - push_items); 1462 btrfs_set_header_nritems(dst, dst_nritems + push_items); 1463 btrfs_mark_buffer_dirty(src); 1464 btrfs_mark_buffer_dirty(dst); 1465 return ret; 1466 } 1467 1468 /* 1469 * try to push data from one node into the next node right in the 1470 * tree. 1471 * 1472 * returns 0 if some ptrs were pushed, < 0 if there was some horrible 1473 * error, and > 0 if there was no room in the right hand block. 1474 * 1475 * this will only push up to 1/2 the contents of the left node over 1476 */ 1477 static int balance_node_right(struct btrfs_trans_handle *trans, 1478 struct btrfs_root *root, 1479 struct extent_buffer *dst, 1480 struct extent_buffer *src) 1481 { 1482 int push_items = 0; 1483 int max_push; 1484 int src_nritems; 1485 int dst_nritems; 1486 int ret = 0; 1487 1488 WARN_ON(btrfs_header_generation(src) != trans->transid); 1489 WARN_ON(btrfs_header_generation(dst) != trans->transid); 1490 1491 src_nritems = btrfs_header_nritems(src); 1492 dst_nritems = btrfs_header_nritems(dst); 1493 push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 1494 if (push_items <= 0) { 1495 return 1; 1496 } 1497 1498 if (src_nritems < 4) { 1499 return 1; 1500 } 1501 1502 max_push = src_nritems / 2 + 1; 1503 /* don't try to empty the node */ 1504 if (max_push >= src_nritems) { 1505 return 1; 1506 } 1507 1508 if (max_push < push_items) 1509 push_items = max_push; 1510 1511 memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items), 1512 btrfs_node_key_ptr_offset(0), 1513 (dst_nritems) * 1514 sizeof(struct btrfs_key_ptr)); 1515 1516 copy_extent_buffer(dst, src, 1517 btrfs_node_key_ptr_offset(0), 1518 btrfs_node_key_ptr_offset(src_nritems - push_items), 1519 push_items * sizeof(struct btrfs_key_ptr)); 1520 1521 btrfs_set_header_nritems(src, src_nritems - push_items); 1522 btrfs_set_header_nritems(dst, dst_nritems + push_items); 1523 1524 btrfs_mark_buffer_dirty(src); 1525 btrfs_mark_buffer_dirty(dst); 1526 return ret; 1527 } 1528 1529 /* 1530 * helper function to insert a new root level in the tree. 1531 * A new node is allocated, and a single item is inserted to 1532 * point to the existing root 1533 * 1534 * returns zero on success or < 0 on failure. 1535 */ 1536 static int noinline insert_new_root(struct btrfs_trans_handle *trans, 1537 struct btrfs_root *root, 1538 struct btrfs_path *path, int level) 1539 { 1540 u64 root_gen; 1541 u64 lower_gen; 1542 struct extent_buffer *lower; 1543 struct extent_buffer *c; 1544 struct extent_buffer *old; 1545 struct btrfs_disk_key lower_key; 1546 1547 BUG_ON(path->nodes[level]); 1548 BUG_ON(path->nodes[level-1] != root->node); 1549 1550 if (root->ref_cows) 1551 root_gen = trans->transid; 1552 else 1553 root_gen = 0; 1554 1555 lower = path->nodes[level-1]; 1556 if (level == 1) 1557 btrfs_item_key(lower, &lower_key, 0); 1558 else 1559 btrfs_node_key(lower, &lower_key, 0); 1560 1561 c = btrfs_alloc_free_block(trans, root, root->nodesize, 1562 root->root_key.objectid, 1563 root_gen, lower_key.objectid, level, 1564 root->node->start, 0); 1565 if (IS_ERR(c)) 1566 return PTR_ERR(c); 1567 1568 memset_extent_buffer(c, 0, 0, root->nodesize); 1569 btrfs_set_header_nritems(c, 1); 1570 btrfs_set_header_level(c, level); 1571 btrfs_set_header_bytenr(c, c->start); 1572 btrfs_set_header_generation(c, trans->transid); 1573 btrfs_set_header_owner(c, root->root_key.objectid); 1574 1575 write_extent_buffer(c, root->fs_info->fsid, 1576 (unsigned long)btrfs_header_fsid(c), 1577 BTRFS_FSID_SIZE); 1578 1579 write_extent_buffer(c, root->fs_info->chunk_tree_uuid, 1580 (unsigned long)btrfs_header_chunk_tree_uuid(c), 1581 BTRFS_UUID_SIZE); 1582 1583 btrfs_set_node_key(c, &lower_key, 0); 1584 btrfs_set_node_blockptr(c, 0, lower->start); 1585 lower_gen = btrfs_header_generation(lower); 1586 WARN_ON(lower_gen == 0); 1587 1588 btrfs_set_node_ptr_generation(c, 0, lower_gen); 1589 1590 btrfs_mark_buffer_dirty(c); 1591 1592 spin_lock(&root->node_lock); 1593 old = root->node; 1594 root->node = c; 1595 spin_unlock(&root->node_lock); 1596 1597 /* the super has an extra ref to root->node */ 1598 free_extent_buffer(old); 1599 1600 add_root_to_dirty_list(root); 1601 extent_buffer_get(c); 1602 path->nodes[level] = c; 1603 path->locks[level] = 1; 1604 path->slots[level] = 0; 1605 1606 if (root->ref_cows && lower_gen != trans->transid) { 1607 struct btrfs_path *back_path = btrfs_alloc_path(); 1608 int ret; 1609 mutex_lock(&root->fs_info->alloc_mutex); 1610 ret = btrfs_insert_extent_backref(trans, 1611 root->fs_info->extent_root, 1612 path, lower->start, 1613 root->root_key.objectid, 1614 trans->transid, 0, 0); 1615 BUG_ON(ret); 1616 mutex_unlock(&root->fs_info->alloc_mutex); 1617 btrfs_free_path(back_path); 1618 } 1619 return 0; 1620 } 1621 1622 /* 1623 * worker function to insert a single pointer in a node. 1624 * the node should have enough room for the pointer already 1625 * 1626 * slot and level indicate where you want the key to go, and 1627 * blocknr is the block the key points to. 1628 * 1629 * returns zero on success and < 0 on any error 1630 */ 1631 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 1632 *root, struct btrfs_path *path, struct btrfs_disk_key 1633 *key, u64 bytenr, int slot, int level) 1634 { 1635 struct extent_buffer *lower; 1636 int nritems; 1637 1638 BUG_ON(!path->nodes[level]); 1639 lower = path->nodes[level]; 1640 nritems = btrfs_header_nritems(lower); 1641 if (slot > nritems) 1642 BUG(); 1643 if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 1644 BUG(); 1645 if (slot != nritems) { 1646 memmove_extent_buffer(lower, 1647 btrfs_node_key_ptr_offset(slot + 1), 1648 btrfs_node_key_ptr_offset(slot), 1649 (nritems - slot) * sizeof(struct btrfs_key_ptr)); 1650 } 1651 btrfs_set_node_key(lower, key, slot); 1652 btrfs_set_node_blockptr(lower, slot, bytenr); 1653 WARN_ON(trans->transid == 0); 1654 btrfs_set_node_ptr_generation(lower, slot, trans->transid); 1655 btrfs_set_header_nritems(lower, nritems + 1); 1656 btrfs_mark_buffer_dirty(lower); 1657 return 0; 1658 } 1659 1660 /* 1661 * split the node at the specified level in path in two. 1662 * The path is corrected to point to the appropriate node after the split 1663 * 1664 * Before splitting this tries to make some room in the node by pushing 1665 * left and right, if either one works, it returns right away. 1666 * 1667 * returns 0 on success and < 0 on failure 1668 */ 1669 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 1670 *root, struct btrfs_path *path, int level) 1671 { 1672 u64 root_gen; 1673 struct extent_buffer *c; 1674 struct extent_buffer *split; 1675 struct btrfs_disk_key disk_key; 1676 int mid; 1677 int ret; 1678 int wret; 1679 u32 c_nritems; 1680 1681 c = path->nodes[level]; 1682 WARN_ON(btrfs_header_generation(c) != trans->transid); 1683 if (c == root->node) { 1684 /* trying to split the root, lets make a new one */ 1685 ret = insert_new_root(trans, root, path, level + 1); 1686 if (ret) 1687 return ret; 1688 } else { 1689 ret = push_nodes_for_insert(trans, root, path, level); 1690 c = path->nodes[level]; 1691 if (!ret && btrfs_header_nritems(c) < 1692 BTRFS_NODEPTRS_PER_BLOCK(root) - 3) 1693 return 0; 1694 if (ret < 0) 1695 return ret; 1696 } 1697 1698 c_nritems = btrfs_header_nritems(c); 1699 if (root->ref_cows) 1700 root_gen = trans->transid; 1701 else 1702 root_gen = 0; 1703 1704 btrfs_node_key(c, &disk_key, 0); 1705 split = btrfs_alloc_free_block(trans, root, root->nodesize, 1706 root->root_key.objectid, 1707 root_gen, 1708 btrfs_disk_key_objectid(&disk_key), 1709 level, c->start, 0); 1710 if (IS_ERR(split)) 1711 return PTR_ERR(split); 1712 1713 btrfs_set_header_flags(split, btrfs_header_flags(c)); 1714 btrfs_set_header_level(split, btrfs_header_level(c)); 1715 btrfs_set_header_bytenr(split, split->start); 1716 btrfs_set_header_generation(split, trans->transid); 1717 btrfs_set_header_owner(split, root->root_key.objectid); 1718 btrfs_set_header_flags(split, 0); 1719 write_extent_buffer(split, root->fs_info->fsid, 1720 (unsigned long)btrfs_header_fsid(split), 1721 BTRFS_FSID_SIZE); 1722 write_extent_buffer(split, root->fs_info->chunk_tree_uuid, 1723 (unsigned long)btrfs_header_chunk_tree_uuid(split), 1724 BTRFS_UUID_SIZE); 1725 1726 mid = (c_nritems + 1) / 2; 1727 1728 copy_extent_buffer(split, c, 1729 btrfs_node_key_ptr_offset(0), 1730 btrfs_node_key_ptr_offset(mid), 1731 (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 1732 btrfs_set_header_nritems(split, c_nritems - mid); 1733 btrfs_set_header_nritems(c, mid); 1734 ret = 0; 1735 1736 btrfs_mark_buffer_dirty(c); 1737 btrfs_mark_buffer_dirty(split); 1738 1739 btrfs_node_key(split, &disk_key, 0); 1740 wret = insert_ptr(trans, root, path, &disk_key, split->start, 1741 path->slots[level + 1] + 1, 1742 level + 1); 1743 if (wret) 1744 ret = wret; 1745 1746 if (path->slots[level] >= mid) { 1747 path->slots[level] -= mid; 1748 btrfs_tree_unlock(c); 1749 free_extent_buffer(c); 1750 path->nodes[level] = split; 1751 path->slots[level + 1] += 1; 1752 } else { 1753 btrfs_tree_unlock(split); 1754 free_extent_buffer(split); 1755 } 1756 return ret; 1757 } 1758 1759 /* 1760 * how many bytes are required to store the items in a leaf. start 1761 * and nr indicate which items in the leaf to check. This totals up the 1762 * space used both by the item structs and the item data 1763 */ 1764 static int leaf_space_used(struct extent_buffer *l, int start, int nr) 1765 { 1766 int data_len; 1767 int nritems = btrfs_header_nritems(l); 1768 int end = min(nritems, start + nr) - 1; 1769 1770 if (!nr) 1771 return 0; 1772 data_len = btrfs_item_end_nr(l, start); 1773 data_len = data_len - btrfs_item_offset_nr(l, end); 1774 data_len += sizeof(struct btrfs_item) * nr; 1775 WARN_ON(data_len < 0); 1776 return data_len; 1777 } 1778 1779 /* 1780 * The space between the end of the leaf items and 1781 * the start of the leaf data. IOW, how much room 1782 * the leaf has left for both items and data 1783 */ 1784 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf) 1785 { 1786 int nritems = btrfs_header_nritems(leaf); 1787 int ret; 1788 ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 1789 if (ret < 0) { 1790 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n", 1791 ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root), 1792 leaf_space_used(leaf, 0, nritems), nritems); 1793 } 1794 return ret; 1795 } 1796 1797 /* 1798 * push some data in the path leaf to the right, trying to free up at 1799 * least data_size bytes. returns zero if the push worked, nonzero otherwise 1800 * 1801 * returns 1 if the push failed because the other node didn't have enough 1802 * room, 0 if everything worked out and < 0 if there were major errors. 1803 */ 1804 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 1805 *root, struct btrfs_path *path, int data_size, 1806 int empty) 1807 { 1808 struct extent_buffer *left = path->nodes[0]; 1809 struct extent_buffer *right; 1810 struct extent_buffer *upper; 1811 struct btrfs_disk_key disk_key; 1812 int slot; 1813 u32 i; 1814 int free_space; 1815 int push_space = 0; 1816 int push_items = 0; 1817 struct btrfs_item *item; 1818 u32 left_nritems; 1819 u32 nr; 1820 u32 right_nritems; 1821 u32 data_end; 1822 u32 this_item_size; 1823 int ret; 1824 1825 slot = path->slots[1]; 1826 if (!path->nodes[1]) { 1827 return 1; 1828 } 1829 upper = path->nodes[1]; 1830 if (slot >= btrfs_header_nritems(upper) - 1) 1831 return 1; 1832 1833 WARN_ON(!btrfs_tree_locked(path->nodes[1])); 1834 1835 right = read_node_slot(root, upper, slot + 1); 1836 btrfs_tree_lock(right); 1837 free_space = btrfs_leaf_free_space(root, right); 1838 if (free_space < data_size + sizeof(struct btrfs_item)) 1839 goto out_unlock; 1840 1841 /* cow and double check */ 1842 ret = btrfs_cow_block(trans, root, right, upper, 1843 slot + 1, &right); 1844 if (ret) 1845 goto out_unlock; 1846 1847 free_space = btrfs_leaf_free_space(root, right); 1848 if (free_space < data_size + sizeof(struct btrfs_item)) 1849 goto out_unlock; 1850 1851 left_nritems = btrfs_header_nritems(left); 1852 if (left_nritems == 0) 1853 goto out_unlock; 1854 1855 if (empty) 1856 nr = 0; 1857 else 1858 nr = 1; 1859 1860 i = left_nritems - 1; 1861 while (i >= nr) { 1862 item = btrfs_item_nr(left, i); 1863 1864 if (path->slots[0] == i) 1865 push_space += data_size + sizeof(*item); 1866 1867 if (!left->map_token) { 1868 map_extent_buffer(left, (unsigned long)item, 1869 sizeof(struct btrfs_item), 1870 &left->map_token, &left->kaddr, 1871 &left->map_start, &left->map_len, 1872 KM_USER1); 1873 } 1874 1875 this_item_size = btrfs_item_size(left, item); 1876 if (this_item_size + sizeof(*item) + push_space > free_space) 1877 break; 1878 push_items++; 1879 push_space += this_item_size + sizeof(*item); 1880 if (i == 0) 1881 break; 1882 i--; 1883 } 1884 if (left->map_token) { 1885 unmap_extent_buffer(left, left->map_token, KM_USER1); 1886 left->map_token = NULL; 1887 } 1888 1889 if (push_items == 0) 1890 goto out_unlock; 1891 1892 if (!empty && push_items == left_nritems) 1893 WARN_ON(1); 1894 1895 /* push left to right */ 1896 right_nritems = btrfs_header_nritems(right); 1897 1898 push_space = btrfs_item_end_nr(left, left_nritems - push_items); 1899 push_space -= leaf_data_end(root, left); 1900 1901 /* make room in the right data area */ 1902 data_end = leaf_data_end(root, right); 1903 memmove_extent_buffer(right, 1904 btrfs_leaf_data(right) + data_end - push_space, 1905 btrfs_leaf_data(right) + data_end, 1906 BTRFS_LEAF_DATA_SIZE(root) - data_end); 1907 1908 /* copy from the left data area */ 1909 copy_extent_buffer(right, left, btrfs_leaf_data(right) + 1910 BTRFS_LEAF_DATA_SIZE(root) - push_space, 1911 btrfs_leaf_data(left) + leaf_data_end(root, left), 1912 push_space); 1913 1914 memmove_extent_buffer(right, btrfs_item_nr_offset(push_items), 1915 btrfs_item_nr_offset(0), 1916 right_nritems * sizeof(struct btrfs_item)); 1917 1918 /* copy the items from left to right */ 1919 copy_extent_buffer(right, left, btrfs_item_nr_offset(0), 1920 btrfs_item_nr_offset(left_nritems - push_items), 1921 push_items * sizeof(struct btrfs_item)); 1922 1923 /* update the item pointers */ 1924 right_nritems += push_items; 1925 btrfs_set_header_nritems(right, right_nritems); 1926 push_space = BTRFS_LEAF_DATA_SIZE(root); 1927 for (i = 0; i < right_nritems; i++) { 1928 item = btrfs_item_nr(right, i); 1929 if (!right->map_token) { 1930 map_extent_buffer(right, (unsigned long)item, 1931 sizeof(struct btrfs_item), 1932 &right->map_token, &right->kaddr, 1933 &right->map_start, &right->map_len, 1934 KM_USER1); 1935 } 1936 push_space -= btrfs_item_size(right, item); 1937 btrfs_set_item_offset(right, item, push_space); 1938 } 1939 1940 if (right->map_token) { 1941 unmap_extent_buffer(right, right->map_token, KM_USER1); 1942 right->map_token = NULL; 1943 } 1944 left_nritems -= push_items; 1945 btrfs_set_header_nritems(left, left_nritems); 1946 1947 if (left_nritems) 1948 btrfs_mark_buffer_dirty(left); 1949 btrfs_mark_buffer_dirty(right); 1950 1951 btrfs_item_key(right, &disk_key, 0); 1952 btrfs_set_node_key(upper, &disk_key, slot + 1); 1953 btrfs_mark_buffer_dirty(upper); 1954 1955 /* then fixup the leaf pointer in the path */ 1956 if (path->slots[0] >= left_nritems) { 1957 path->slots[0] -= left_nritems; 1958 if (btrfs_header_nritems(path->nodes[0]) == 0) 1959 clean_tree_block(trans, root, path->nodes[0]); 1960 btrfs_tree_unlock(path->nodes[0]); 1961 free_extent_buffer(path->nodes[0]); 1962 path->nodes[0] = right; 1963 path->slots[1] += 1; 1964 } else { 1965 btrfs_tree_unlock(right); 1966 free_extent_buffer(right); 1967 } 1968 return 0; 1969 1970 out_unlock: 1971 btrfs_tree_unlock(right); 1972 free_extent_buffer(right); 1973 return 1; 1974 } 1975 1976 /* 1977 * push some data in the path leaf to the left, trying to free up at 1978 * least data_size bytes. returns zero if the push worked, nonzero otherwise 1979 */ 1980 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 1981 *root, struct btrfs_path *path, int data_size, 1982 int empty) 1983 { 1984 struct btrfs_disk_key disk_key; 1985 struct extent_buffer *right = path->nodes[0]; 1986 struct extent_buffer *left; 1987 int slot; 1988 int i; 1989 int free_space; 1990 int push_space = 0; 1991 int push_items = 0; 1992 struct btrfs_item *item; 1993 u32 old_left_nritems; 1994 u32 right_nritems; 1995 u32 nr; 1996 int ret = 0; 1997 int wret; 1998 u32 this_item_size; 1999 u32 old_left_item_size; 2000 2001 slot = path->slots[1]; 2002 if (slot == 0) 2003 return 1; 2004 if (!path->nodes[1]) 2005 return 1; 2006 2007 right_nritems = btrfs_header_nritems(right); 2008 if (right_nritems == 0) { 2009 return 1; 2010 } 2011 2012 WARN_ON(!btrfs_tree_locked(path->nodes[1])); 2013 2014 left = read_node_slot(root, path->nodes[1], slot - 1); 2015 btrfs_tree_lock(left); 2016 free_space = btrfs_leaf_free_space(root, left); 2017 if (free_space < data_size + sizeof(struct btrfs_item)) { 2018 ret = 1; 2019 goto out; 2020 } 2021 2022 /* cow and double check */ 2023 ret = btrfs_cow_block(trans, root, left, 2024 path->nodes[1], slot - 1, &left); 2025 if (ret) { 2026 /* we hit -ENOSPC, but it isn't fatal here */ 2027 ret = 1; 2028 goto out; 2029 } 2030 2031 free_space = btrfs_leaf_free_space(root, left); 2032 if (free_space < data_size + sizeof(struct btrfs_item)) { 2033 ret = 1; 2034 goto out; 2035 } 2036 2037 if (empty) 2038 nr = right_nritems; 2039 else 2040 nr = right_nritems - 1; 2041 2042 for (i = 0; i < nr; i++) { 2043 item = btrfs_item_nr(right, i); 2044 if (!right->map_token) { 2045 map_extent_buffer(right, (unsigned long)item, 2046 sizeof(struct btrfs_item), 2047 &right->map_token, &right->kaddr, 2048 &right->map_start, &right->map_len, 2049 KM_USER1); 2050 } 2051 2052 if (path->slots[0] == i) 2053 push_space += data_size + sizeof(*item); 2054 2055 this_item_size = btrfs_item_size(right, item); 2056 if (this_item_size + sizeof(*item) + push_space > free_space) 2057 break; 2058 2059 push_items++; 2060 push_space += this_item_size + sizeof(*item); 2061 } 2062 2063 if (right->map_token) { 2064 unmap_extent_buffer(right, right->map_token, KM_USER1); 2065 right->map_token = NULL; 2066 } 2067 2068 if (push_items == 0) { 2069 ret = 1; 2070 goto out; 2071 } 2072 if (!empty && push_items == btrfs_header_nritems(right)) 2073 WARN_ON(1); 2074 2075 /* push data from right to left */ 2076 copy_extent_buffer(left, right, 2077 btrfs_item_nr_offset(btrfs_header_nritems(left)), 2078 btrfs_item_nr_offset(0), 2079 push_items * sizeof(struct btrfs_item)); 2080 2081 push_space = BTRFS_LEAF_DATA_SIZE(root) - 2082 btrfs_item_offset_nr(right, push_items -1); 2083 2084 copy_extent_buffer(left, right, btrfs_leaf_data(left) + 2085 leaf_data_end(root, left) - push_space, 2086 btrfs_leaf_data(right) + 2087 btrfs_item_offset_nr(right, push_items - 1), 2088 push_space); 2089 old_left_nritems = btrfs_header_nritems(left); 2090 BUG_ON(old_left_nritems < 0); 2091 2092 old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1); 2093 for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 2094 u32 ioff; 2095 2096 item = btrfs_item_nr(left, i); 2097 if (!left->map_token) { 2098 map_extent_buffer(left, (unsigned long)item, 2099 sizeof(struct btrfs_item), 2100 &left->map_token, &left->kaddr, 2101 &left->map_start, &left->map_len, 2102 KM_USER1); 2103 } 2104 2105 ioff = btrfs_item_offset(left, item); 2106 btrfs_set_item_offset(left, item, 2107 ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); 2108 } 2109 btrfs_set_header_nritems(left, old_left_nritems + push_items); 2110 if (left->map_token) { 2111 unmap_extent_buffer(left, left->map_token, KM_USER1); 2112 left->map_token = NULL; 2113 } 2114 2115 /* fixup right node */ 2116 if (push_items > right_nritems) { 2117 printk("push items %d nr %u\n", push_items, right_nritems); 2118 WARN_ON(1); 2119 } 2120 2121 if (push_items < right_nritems) { 2122 push_space = btrfs_item_offset_nr(right, push_items - 1) - 2123 leaf_data_end(root, right); 2124 memmove_extent_buffer(right, btrfs_leaf_data(right) + 2125 BTRFS_LEAF_DATA_SIZE(root) - push_space, 2126 btrfs_leaf_data(right) + 2127 leaf_data_end(root, right), push_space); 2128 2129 memmove_extent_buffer(right, btrfs_item_nr_offset(0), 2130 btrfs_item_nr_offset(push_items), 2131 (btrfs_header_nritems(right) - push_items) * 2132 sizeof(struct btrfs_item)); 2133 } 2134 right_nritems -= push_items; 2135 btrfs_set_header_nritems(right, right_nritems); 2136 push_space = BTRFS_LEAF_DATA_SIZE(root); 2137 for (i = 0; i < right_nritems; i++) { 2138 item = btrfs_item_nr(right, i); 2139 2140 if (!right->map_token) { 2141 map_extent_buffer(right, (unsigned long)item, 2142 sizeof(struct btrfs_item), 2143 &right->map_token, &right->kaddr, 2144 &right->map_start, &right->map_len, 2145 KM_USER1); 2146 } 2147 2148 push_space = push_space - btrfs_item_size(right, item); 2149 btrfs_set_item_offset(right, item, push_space); 2150 } 2151 if (right->map_token) { 2152 unmap_extent_buffer(right, right->map_token, KM_USER1); 2153 right->map_token = NULL; 2154 } 2155 2156 btrfs_mark_buffer_dirty(left); 2157 if (right_nritems) 2158 btrfs_mark_buffer_dirty(right); 2159 2160 btrfs_item_key(right, &disk_key, 0); 2161 wret = fixup_low_keys(trans, root, path, &disk_key, 1); 2162 if (wret) 2163 ret = wret; 2164 2165 /* then fixup the leaf pointer in the path */ 2166 if (path->slots[0] < push_items) { 2167 path->slots[0] += old_left_nritems; 2168 if (btrfs_header_nritems(path->nodes[0]) == 0) 2169 clean_tree_block(trans, root, path->nodes[0]); 2170 btrfs_tree_unlock(path->nodes[0]); 2171 free_extent_buffer(path->nodes[0]); 2172 path->nodes[0] = left; 2173 path->slots[1] -= 1; 2174 } else { 2175 btrfs_tree_unlock(left); 2176 free_extent_buffer(left); 2177 path->slots[0] -= push_items; 2178 } 2179 BUG_ON(path->slots[0] < 0); 2180 return ret; 2181 out: 2182 btrfs_tree_unlock(left); 2183 free_extent_buffer(left); 2184 return ret; 2185 } 2186 2187 /* 2188 * split the path's leaf in two, making sure there is at least data_size 2189 * available for the resulting leaf level of the path. 2190 * 2191 * returns 0 if all went well and < 0 on failure. 2192 */ 2193 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 2194 *root, struct btrfs_key *ins_key, 2195 struct btrfs_path *path, int data_size, int extend) 2196 { 2197 u64 root_gen; 2198 struct extent_buffer *l; 2199 u32 nritems; 2200 int mid; 2201 int slot; 2202 struct extent_buffer *right; 2203 int space_needed = data_size + sizeof(struct btrfs_item); 2204 int data_copy_size; 2205 int rt_data_off; 2206 int i; 2207 int ret = 0; 2208 int wret; 2209 int double_split; 2210 int num_doubles = 0; 2211 struct btrfs_disk_key disk_key; 2212 2213 if (extend) 2214 space_needed = data_size; 2215 2216 if (root->ref_cows) 2217 root_gen = trans->transid; 2218 else 2219 root_gen = 0; 2220 2221 /* first try to make some room by pushing left and right */ 2222 if (ins_key->type != BTRFS_DIR_ITEM_KEY) { 2223 wret = push_leaf_right(trans, root, path, data_size, 0); 2224 if (wret < 0) { 2225 return wret; 2226 } 2227 if (wret) { 2228 wret = push_leaf_left(trans, root, path, data_size, 0); 2229 if (wret < 0) 2230 return wret; 2231 } 2232 l = path->nodes[0]; 2233 2234 /* did the pushes work? */ 2235 if (btrfs_leaf_free_space(root, l) >= space_needed) 2236 return 0; 2237 } 2238 2239 if (!path->nodes[1]) { 2240 ret = insert_new_root(trans, root, path, 1); 2241 if (ret) 2242 return ret; 2243 } 2244 again: 2245 double_split = 0; 2246 l = path->nodes[0]; 2247 slot = path->slots[0]; 2248 nritems = btrfs_header_nritems(l); 2249 mid = (nritems + 1)/ 2; 2250 2251 btrfs_item_key(l, &disk_key, 0); 2252 2253 right = btrfs_alloc_free_block(trans, root, root->leafsize, 2254 root->root_key.objectid, 2255 root_gen, disk_key.objectid, 0, 2256 l->start, 0); 2257 if (IS_ERR(right)) { 2258 BUG_ON(1); 2259 return PTR_ERR(right); 2260 } 2261 2262 memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); 2263 btrfs_set_header_bytenr(right, right->start); 2264 btrfs_set_header_generation(right, trans->transid); 2265 btrfs_set_header_owner(right, root->root_key.objectid); 2266 btrfs_set_header_level(right, 0); 2267 write_extent_buffer(right, root->fs_info->fsid, 2268 (unsigned long)btrfs_header_fsid(right), 2269 BTRFS_FSID_SIZE); 2270 2271 write_extent_buffer(right, root->fs_info->chunk_tree_uuid, 2272 (unsigned long)btrfs_header_chunk_tree_uuid(right), 2273 BTRFS_UUID_SIZE); 2274 if (mid <= slot) { 2275 if (nritems == 1 || 2276 leaf_space_used(l, mid, nritems - mid) + space_needed > 2277 BTRFS_LEAF_DATA_SIZE(root)) { 2278 if (slot >= nritems) { 2279 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2280 btrfs_set_header_nritems(right, 0); 2281 wret = insert_ptr(trans, root, path, 2282 &disk_key, right->start, 2283 path->slots[1] + 1, 1); 2284 if (wret) 2285 ret = wret; 2286 2287 btrfs_tree_unlock(path->nodes[0]); 2288 free_extent_buffer(path->nodes[0]); 2289 path->nodes[0] = right; 2290 path->slots[0] = 0; 2291 path->slots[1] += 1; 2292 btrfs_mark_buffer_dirty(right); 2293 return ret; 2294 } 2295 mid = slot; 2296 if (mid != nritems && 2297 leaf_space_used(l, mid, nritems - mid) + 2298 space_needed > BTRFS_LEAF_DATA_SIZE(root)) { 2299 double_split = 1; 2300 } 2301 } 2302 } else { 2303 if (leaf_space_used(l, 0, mid + 1) + space_needed > 2304 BTRFS_LEAF_DATA_SIZE(root)) { 2305 if (!extend && slot == 0) { 2306 btrfs_cpu_key_to_disk(&disk_key, ins_key); 2307 btrfs_set_header_nritems(right, 0); 2308 wret = insert_ptr(trans, root, path, 2309 &disk_key, 2310 right->start, 2311 path->slots[1], 1); 2312 if (wret) 2313 ret = wret; 2314 btrfs_tree_unlock(path->nodes[0]); 2315 free_extent_buffer(path->nodes[0]); 2316 path->nodes[0] = right; 2317 path->slots[0] = 0; 2318 if (path->slots[1] == 0) { 2319 wret = fixup_low_keys(trans, root, 2320 path, &disk_key, 1); 2321 if (wret) 2322 ret = wret; 2323 } 2324 btrfs_mark_buffer_dirty(right); 2325 return ret; 2326 } else if (extend && slot == 0) { 2327 mid = 1; 2328 } else { 2329 mid = slot; 2330 if (mid != nritems && 2331 leaf_space_used(l, mid, nritems - mid) + 2332 space_needed > BTRFS_LEAF_DATA_SIZE(root)) { 2333 double_split = 1; 2334 } 2335 } 2336 } 2337 } 2338 nritems = nritems - mid; 2339 btrfs_set_header_nritems(right, nritems); 2340 data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); 2341 2342 copy_extent_buffer(right, l, btrfs_item_nr_offset(0), 2343 btrfs_item_nr_offset(mid), 2344 nritems * sizeof(struct btrfs_item)); 2345 2346 copy_extent_buffer(right, l, 2347 btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 2348 data_copy_size, btrfs_leaf_data(l) + 2349 leaf_data_end(root, l), data_copy_size); 2350 2351 rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 2352 btrfs_item_end_nr(l, mid); 2353 2354 for (i = 0; i < nritems; i++) { 2355 struct btrfs_item *item = btrfs_item_nr(right, i); 2356 u32 ioff; 2357 2358 if (!right->map_token) { 2359 map_extent_buffer(right, (unsigned long)item, 2360 sizeof(struct btrfs_item), 2361 &right->map_token, &right->kaddr, 2362 &right->map_start, &right->map_len, 2363 KM_USER1); 2364 } 2365 2366 ioff = btrfs_item_offset(right, item); 2367 btrfs_set_item_offset(right, item, ioff + rt_data_off); 2368 } 2369 2370 if (right->map_token) { 2371 unmap_extent_buffer(right, right->map_token, KM_USER1); 2372 right->map_token = NULL; 2373 } 2374 2375 btrfs_set_header_nritems(l, mid); 2376 ret = 0; 2377 btrfs_item_key(right, &disk_key, 0); 2378 wret = insert_ptr(trans, root, path, &disk_key, right->start, 2379 path->slots[1] + 1, 1); 2380 if (wret) 2381 ret = wret; 2382 2383 btrfs_mark_buffer_dirty(right); 2384 btrfs_mark_buffer_dirty(l); 2385 BUG_ON(path->slots[0] != slot); 2386 2387 if (mid <= slot) { 2388 btrfs_tree_unlock(path->nodes[0]); 2389 free_extent_buffer(path->nodes[0]); 2390 path->nodes[0] = right; 2391 path->slots[0] -= mid; 2392 path->slots[1] += 1; 2393 } else { 2394 btrfs_tree_unlock(right); 2395 free_extent_buffer(right); 2396 } 2397 2398 BUG_ON(path->slots[0] < 0); 2399 2400 if (double_split) { 2401 BUG_ON(num_doubles != 0); 2402 num_doubles++; 2403 goto again; 2404 } 2405 return ret; 2406 } 2407 2408 int btrfs_truncate_item(struct btrfs_trans_handle *trans, 2409 struct btrfs_root *root, 2410 struct btrfs_path *path, 2411 u32 new_size, int from_end) 2412 { 2413 int ret = 0; 2414 int slot; 2415 int slot_orig; 2416 struct extent_buffer *leaf; 2417 struct btrfs_item *item; 2418 u32 nritems; 2419 unsigned int data_end; 2420 unsigned int old_data_start; 2421 unsigned int old_size; 2422 unsigned int size_diff; 2423 int i; 2424 2425 slot_orig = path->slots[0]; 2426 leaf = path->nodes[0]; 2427 slot = path->slots[0]; 2428 2429 old_size = btrfs_item_size_nr(leaf, slot); 2430 if (old_size == new_size) 2431 return 0; 2432 2433 nritems = btrfs_header_nritems(leaf); 2434 data_end = leaf_data_end(root, leaf); 2435 2436 old_data_start = btrfs_item_offset_nr(leaf, slot); 2437 2438 size_diff = old_size - new_size; 2439 2440 BUG_ON(slot < 0); 2441 BUG_ON(slot >= nritems); 2442 2443 /* 2444 * item0..itemN ... dataN.offset..dataN.size .. data0.size 2445 */ 2446 /* first correct the data pointers */ 2447 for (i = slot; i < nritems; i++) { 2448 u32 ioff; 2449 item = btrfs_item_nr(leaf, i); 2450 2451 if (!leaf->map_token) { 2452 map_extent_buffer(leaf, (unsigned long)item, 2453 sizeof(struct btrfs_item), 2454 &leaf->map_token, &leaf->kaddr, 2455 &leaf->map_start, &leaf->map_len, 2456 KM_USER1); 2457 } 2458 2459 ioff = btrfs_item_offset(leaf, item); 2460 btrfs_set_item_offset(leaf, item, ioff + size_diff); 2461 } 2462 2463 if (leaf->map_token) { 2464 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 2465 leaf->map_token = NULL; 2466 } 2467 2468 /* shift the data */ 2469 if (from_end) { 2470 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 2471 data_end + size_diff, btrfs_leaf_data(leaf) + 2472 data_end, old_data_start + new_size - data_end); 2473 } else { 2474 struct btrfs_disk_key disk_key; 2475 u64 offset; 2476 2477 btrfs_item_key(leaf, &disk_key, slot); 2478 2479 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 2480 unsigned long ptr; 2481 struct btrfs_file_extent_item *fi; 2482 2483 fi = btrfs_item_ptr(leaf, slot, 2484 struct btrfs_file_extent_item); 2485 fi = (struct btrfs_file_extent_item *)( 2486 (unsigned long)fi - size_diff); 2487 2488 if (btrfs_file_extent_type(leaf, fi) == 2489 BTRFS_FILE_EXTENT_INLINE) { 2490 ptr = btrfs_item_ptr_offset(leaf, slot); 2491 memmove_extent_buffer(leaf, ptr, 2492 (unsigned long)fi, 2493 offsetof(struct btrfs_file_extent_item, 2494 disk_bytenr)); 2495 } 2496 } 2497 2498 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 2499 data_end + size_diff, btrfs_leaf_data(leaf) + 2500 data_end, old_data_start - data_end); 2501 2502 offset = btrfs_disk_key_offset(&disk_key); 2503 btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 2504 btrfs_set_item_key(leaf, &disk_key, slot); 2505 if (slot == 0) 2506 fixup_low_keys(trans, root, path, &disk_key, 1); 2507 } 2508 2509 item = btrfs_item_nr(leaf, slot); 2510 btrfs_set_item_size(leaf, item, new_size); 2511 btrfs_mark_buffer_dirty(leaf); 2512 2513 ret = 0; 2514 if (btrfs_leaf_free_space(root, leaf) < 0) { 2515 btrfs_print_leaf(root, leaf); 2516 BUG(); 2517 } 2518 return ret; 2519 } 2520 2521 int btrfs_extend_item(struct btrfs_trans_handle *trans, 2522 struct btrfs_root *root, struct btrfs_path *path, 2523 u32 data_size) 2524 { 2525 int ret = 0; 2526 int slot; 2527 int slot_orig; 2528 struct extent_buffer *leaf; 2529 struct btrfs_item *item; 2530 u32 nritems; 2531 unsigned int data_end; 2532 unsigned int old_data; 2533 unsigned int old_size; 2534 int i; 2535 2536 slot_orig = path->slots[0]; 2537 leaf = path->nodes[0]; 2538 2539 nritems = btrfs_header_nritems(leaf); 2540 data_end = leaf_data_end(root, leaf); 2541 2542 if (btrfs_leaf_free_space(root, leaf) < data_size) { 2543 btrfs_print_leaf(root, leaf); 2544 BUG(); 2545 } 2546 slot = path->slots[0]; 2547 old_data = btrfs_item_end_nr(leaf, slot); 2548 2549 BUG_ON(slot < 0); 2550 if (slot >= nritems) { 2551 btrfs_print_leaf(root, leaf); 2552 printk("slot %d too large, nritems %d\n", slot, nritems); 2553 BUG_ON(1); 2554 } 2555 2556 /* 2557 * item0..itemN ... dataN.offset..dataN.size .. data0.size 2558 */ 2559 /* first correct the data pointers */ 2560 for (i = slot; i < nritems; i++) { 2561 u32 ioff; 2562 item = btrfs_item_nr(leaf, i); 2563 2564 if (!leaf->map_token) { 2565 map_extent_buffer(leaf, (unsigned long)item, 2566 sizeof(struct btrfs_item), 2567 &leaf->map_token, &leaf->kaddr, 2568 &leaf->map_start, &leaf->map_len, 2569 KM_USER1); 2570 } 2571 ioff = btrfs_item_offset(leaf, item); 2572 btrfs_set_item_offset(leaf, item, ioff - data_size); 2573 } 2574 2575 if (leaf->map_token) { 2576 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 2577 leaf->map_token = NULL; 2578 } 2579 2580 /* shift the data */ 2581 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 2582 data_end - data_size, btrfs_leaf_data(leaf) + 2583 data_end, old_data - data_end); 2584 2585 data_end = old_data; 2586 old_size = btrfs_item_size_nr(leaf, slot); 2587 item = btrfs_item_nr(leaf, slot); 2588 btrfs_set_item_size(leaf, item, old_size + data_size); 2589 btrfs_mark_buffer_dirty(leaf); 2590 2591 ret = 0; 2592 if (btrfs_leaf_free_space(root, leaf) < 0) { 2593 btrfs_print_leaf(root, leaf); 2594 BUG(); 2595 } 2596 return ret; 2597 } 2598 2599 /* 2600 * Given a key and some data, insert an item into the tree. 2601 * This does all the path init required, making room in the tree if needed. 2602 */ 2603 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 2604 struct btrfs_root *root, 2605 struct btrfs_path *path, 2606 struct btrfs_key *cpu_key, u32 *data_size, 2607 int nr) 2608 { 2609 struct extent_buffer *leaf; 2610 struct btrfs_item *item; 2611 int ret = 0; 2612 int slot; 2613 int slot_orig; 2614 int i; 2615 u32 nritems; 2616 u32 total_size = 0; 2617 u32 total_data = 0; 2618 unsigned int data_end; 2619 struct btrfs_disk_key disk_key; 2620 2621 for (i = 0; i < nr; i++) { 2622 total_data += data_size[i]; 2623 } 2624 2625 total_size = total_data + (nr * sizeof(struct btrfs_item)); 2626 ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); 2627 if (ret == 0) { 2628 return -EEXIST; 2629 } 2630 if (ret < 0) 2631 goto out; 2632 2633 slot_orig = path->slots[0]; 2634 leaf = path->nodes[0]; 2635 2636 nritems = btrfs_header_nritems(leaf); 2637 data_end = leaf_data_end(root, leaf); 2638 2639 if (btrfs_leaf_free_space(root, leaf) < 2640 sizeof(struct btrfs_item) + total_size) { 2641 btrfs_print_leaf(root, leaf); 2642 printk("not enough freespace need %u have %d\n", 2643 total_size, btrfs_leaf_free_space(root, leaf)); 2644 BUG(); 2645 } 2646 2647 slot = path->slots[0]; 2648 BUG_ON(slot < 0); 2649 2650 if (slot != nritems) { 2651 int i; 2652 unsigned int old_data = btrfs_item_end_nr(leaf, slot); 2653 2654 if (old_data < data_end) { 2655 btrfs_print_leaf(root, leaf); 2656 printk("slot %d old_data %d data_end %d\n", 2657 slot, old_data, data_end); 2658 BUG_ON(1); 2659 } 2660 /* 2661 * item0..itemN ... dataN.offset..dataN.size .. data0.size 2662 */ 2663 /* first correct the data pointers */ 2664 WARN_ON(leaf->map_token); 2665 for (i = slot; i < nritems; i++) { 2666 u32 ioff; 2667 2668 item = btrfs_item_nr(leaf, i); 2669 if (!leaf->map_token) { 2670 map_extent_buffer(leaf, (unsigned long)item, 2671 sizeof(struct btrfs_item), 2672 &leaf->map_token, &leaf->kaddr, 2673 &leaf->map_start, &leaf->map_len, 2674 KM_USER1); 2675 } 2676 2677 ioff = btrfs_item_offset(leaf, item); 2678 btrfs_set_item_offset(leaf, item, ioff - total_data); 2679 } 2680 if (leaf->map_token) { 2681 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 2682 leaf->map_token = NULL; 2683 } 2684 2685 /* shift the items */ 2686 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), 2687 btrfs_item_nr_offset(slot), 2688 (nritems - slot) * sizeof(struct btrfs_item)); 2689 2690 /* shift the data */ 2691 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 2692 data_end - total_data, btrfs_leaf_data(leaf) + 2693 data_end, old_data - data_end); 2694 data_end = old_data; 2695 } 2696 2697 /* setup the item for the new data */ 2698 for (i = 0; i < nr; i++) { 2699 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i); 2700 btrfs_set_item_key(leaf, &disk_key, slot + i); 2701 item = btrfs_item_nr(leaf, slot + i); 2702 btrfs_set_item_offset(leaf, item, data_end - data_size[i]); 2703 data_end -= data_size[i]; 2704 btrfs_set_item_size(leaf, item, data_size[i]); 2705 } 2706 btrfs_set_header_nritems(leaf, nritems + nr); 2707 btrfs_mark_buffer_dirty(leaf); 2708 2709 ret = 0; 2710 if (slot == 0) { 2711 btrfs_cpu_key_to_disk(&disk_key, cpu_key); 2712 ret = fixup_low_keys(trans, root, path, &disk_key, 1); 2713 } 2714 2715 if (btrfs_leaf_free_space(root, leaf) < 0) { 2716 btrfs_print_leaf(root, leaf); 2717 BUG(); 2718 } 2719 out: 2720 return ret; 2721 } 2722 2723 /* 2724 * Given a key and some data, insert an item into the tree. 2725 * This does all the path init required, making room in the tree if needed. 2726 */ 2727 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 2728 *root, struct btrfs_key *cpu_key, void *data, u32 2729 data_size) 2730 { 2731 int ret = 0; 2732 struct btrfs_path *path; 2733 struct extent_buffer *leaf; 2734 unsigned long ptr; 2735 2736 path = btrfs_alloc_path(); 2737 BUG_ON(!path); 2738 ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 2739 if (!ret) { 2740 leaf = path->nodes[0]; 2741 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 2742 write_extent_buffer(leaf, data, ptr, data_size); 2743 btrfs_mark_buffer_dirty(leaf); 2744 } 2745 btrfs_free_path(path); 2746 return ret; 2747 } 2748 2749 /* 2750 * delete the pointer from a given node. 2751 * 2752 * If the delete empties a node, the node is removed from the tree, 2753 * continuing all the way the root if required. The root is converted into 2754 * a leaf if all the nodes are emptied. 2755 */ 2756 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2757 struct btrfs_path *path, int level, int slot) 2758 { 2759 struct extent_buffer *parent = path->nodes[level]; 2760 u32 nritems; 2761 int ret = 0; 2762 int wret; 2763 2764 nritems = btrfs_header_nritems(parent); 2765 if (slot != nritems -1) { 2766 memmove_extent_buffer(parent, 2767 btrfs_node_key_ptr_offset(slot), 2768 btrfs_node_key_ptr_offset(slot + 1), 2769 sizeof(struct btrfs_key_ptr) * 2770 (nritems - slot - 1)); 2771 } 2772 nritems--; 2773 btrfs_set_header_nritems(parent, nritems); 2774 if (nritems == 0 && parent == root->node) { 2775 BUG_ON(btrfs_header_level(root->node) != 1); 2776 /* just turn the root into a leaf and break */ 2777 btrfs_set_header_level(root->node, 0); 2778 } else if (slot == 0) { 2779 struct btrfs_disk_key disk_key; 2780 2781 btrfs_node_key(parent, &disk_key, 0); 2782 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1); 2783 if (wret) 2784 ret = wret; 2785 } 2786 btrfs_mark_buffer_dirty(parent); 2787 return ret; 2788 } 2789 2790 /* 2791 * delete the item at the leaf level in path. If that empties 2792 * the leaf, remove it from the tree 2793 */ 2794 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2795 struct btrfs_path *path, int slot, int nr) 2796 { 2797 struct extent_buffer *leaf; 2798 struct btrfs_item *item; 2799 int last_off; 2800 int dsize = 0; 2801 int ret = 0; 2802 int wret; 2803 int i; 2804 u32 nritems; 2805 2806 leaf = path->nodes[0]; 2807 last_off = btrfs_item_offset_nr(leaf, slot + nr - 1); 2808 2809 for (i = 0; i < nr; i++) 2810 dsize += btrfs_item_size_nr(leaf, slot + i); 2811 2812 nritems = btrfs_header_nritems(leaf); 2813 2814 if (slot + nr != nritems) { 2815 int i; 2816 int data_end = leaf_data_end(root, leaf); 2817 2818 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + 2819 data_end + dsize, 2820 btrfs_leaf_data(leaf) + data_end, 2821 last_off - data_end); 2822 2823 for (i = slot + nr; i < nritems; i++) { 2824 u32 ioff; 2825 2826 item = btrfs_item_nr(leaf, i); 2827 if (!leaf->map_token) { 2828 map_extent_buffer(leaf, (unsigned long)item, 2829 sizeof(struct btrfs_item), 2830 &leaf->map_token, &leaf->kaddr, 2831 &leaf->map_start, &leaf->map_len, 2832 KM_USER1); 2833 } 2834 ioff = btrfs_item_offset(leaf, item); 2835 btrfs_set_item_offset(leaf, item, ioff + dsize); 2836 } 2837 2838 if (leaf->map_token) { 2839 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); 2840 leaf->map_token = NULL; 2841 } 2842 2843 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), 2844 btrfs_item_nr_offset(slot + nr), 2845 sizeof(struct btrfs_item) * 2846 (nritems - slot - nr)); 2847 } 2848 btrfs_set_header_nritems(leaf, nritems - nr); 2849 nritems -= nr; 2850 2851 /* delete the leaf if we've emptied it */ 2852 if (nritems == 0) { 2853 if (leaf == root->node) { 2854 btrfs_set_header_level(leaf, 0); 2855 } else { 2856 u64 root_gen = btrfs_header_generation(path->nodes[1]); 2857 wret = del_ptr(trans, root, path, 1, path->slots[1]); 2858 if (wret) 2859 ret = wret; 2860 wret = btrfs_free_extent(trans, root, 2861 leaf->start, leaf->len, 2862 btrfs_header_owner(path->nodes[1]), 2863 root_gen, 0, 0, 1); 2864 if (wret) 2865 ret = wret; 2866 } 2867 } else { 2868 int used = leaf_space_used(leaf, 0, nritems); 2869 if (slot == 0) { 2870 struct btrfs_disk_key disk_key; 2871 2872 btrfs_item_key(leaf, &disk_key, 0); 2873 wret = fixup_low_keys(trans, root, path, 2874 &disk_key, 1); 2875 if (wret) 2876 ret = wret; 2877 } 2878 2879 /* delete the leaf if it is mostly empty */ 2880 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { 2881 /* push_leaf_left fixes the path. 2882 * make sure the path still points to our leaf 2883 * for possible call to del_ptr below 2884 */ 2885 slot = path->slots[1]; 2886 extent_buffer_get(leaf); 2887 2888 wret = push_leaf_left(trans, root, path, 1, 1); 2889 if (wret < 0 && wret != -ENOSPC) 2890 ret = wret; 2891 2892 if (path->nodes[0] == leaf && 2893 btrfs_header_nritems(leaf)) { 2894 wret = push_leaf_right(trans, root, path, 1, 1); 2895 if (wret < 0 && wret != -ENOSPC) 2896 ret = wret; 2897 } 2898 2899 if (btrfs_header_nritems(leaf) == 0) { 2900 u64 root_gen; 2901 u64 bytenr = leaf->start; 2902 u32 blocksize = leaf->len; 2903 2904 root_gen = btrfs_header_generation( 2905 path->nodes[1]); 2906 2907 wret = del_ptr(trans, root, path, 1, slot); 2908 if (wret) 2909 ret = wret; 2910 2911 free_extent_buffer(leaf); 2912 wret = btrfs_free_extent(trans, root, bytenr, 2913 blocksize, 2914 btrfs_header_owner(path->nodes[1]), 2915 root_gen, 0, 0, 1); 2916 if (wret) 2917 ret = wret; 2918 } else { 2919 /* if we're still in the path, make sure 2920 * we're dirty. Otherwise, one of the 2921 * push_leaf functions must have already 2922 * dirtied this buffer 2923 */ 2924 if (path->nodes[0] == leaf) 2925 btrfs_mark_buffer_dirty(leaf); 2926 free_extent_buffer(leaf); 2927 } 2928 } else { 2929 btrfs_mark_buffer_dirty(leaf); 2930 } 2931 } 2932 return ret; 2933 } 2934 2935 /* 2936 * search the tree again to find a leaf with lesser keys 2937 * returns 0 if it found something or 1 if there are no lesser leaves. 2938 * returns < 0 on io errors. 2939 */ 2940 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 2941 { 2942 struct btrfs_key key; 2943 struct btrfs_disk_key found_key; 2944 int ret; 2945 2946 btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 2947 2948 if (key.offset > 0) 2949 key.offset--; 2950 else if (key.type > 0) 2951 key.type--; 2952 else if (key.objectid > 0) 2953 key.objectid--; 2954 else 2955 return 1; 2956 2957 btrfs_release_path(root, path); 2958 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2959 if (ret < 0) 2960 return ret; 2961 btrfs_item_key(path->nodes[0], &found_key, 0); 2962 ret = comp_keys(&found_key, &key); 2963 if (ret < 0) 2964 return 0; 2965 return 1; 2966 } 2967 2968 /* 2969 * A helper function to walk down the tree starting at min_key, and looking 2970 * for nodes or leaves that are either in cache or have a minimum 2971 * transaction id. This is used by the btree defrag code, but could 2972 * also be used to search for blocks that have changed since a given 2973 * transaction id. 2974 * 2975 * This does not cow, but it does stuff the starting key it finds back 2976 * into min_key, so you can call btrfs_search_slot with cow=1 on the 2977 * key and get a writable path. 2978 * 2979 * This does lock as it descends, and path->keep_locks should be set 2980 * to 1 by the caller. 2981 * 2982 * This honors path->lowest_level to prevent descent past a given level 2983 * of the tree. 2984 * 2985 * returns zero if something useful was found, < 0 on error and 1 if there 2986 * was nothing in the tree that matched the search criteria. 2987 */ 2988 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 2989 struct btrfs_path *path, int cache_only, 2990 u64 min_trans) 2991 { 2992 struct extent_buffer *cur; 2993 struct btrfs_key found_key; 2994 int slot; 2995 u32 nritems; 2996 int level; 2997 int ret = 1; 2998 2999 again: 3000 cur = btrfs_lock_root_node(root); 3001 level = btrfs_header_level(cur); 3002 path->nodes[level] = cur; 3003 path->locks[level] = 1; 3004 3005 if (btrfs_header_generation(cur) < min_trans) { 3006 ret = 1; 3007 goto out; 3008 } 3009 while(1) { 3010 nritems = btrfs_header_nritems(cur); 3011 level = btrfs_header_level(cur); 3012 bin_search(cur, min_key, level, &slot); 3013 3014 /* at level = 0, we're done, setup the path and exit */ 3015 if (level == 0) { 3016 ret = 0; 3017 path->slots[level] = slot; 3018 btrfs_item_key_to_cpu(cur, &found_key, slot); 3019 goto out; 3020 } 3021 /* 3022 * check this node pointer against the cache_only and 3023 * min_trans parameters. If it isn't in cache or is too 3024 * old, skip to the next one. 3025 */ 3026 while(slot < nritems) { 3027 u64 blockptr; 3028 u64 gen; 3029 struct extent_buffer *tmp; 3030 blockptr = btrfs_node_blockptr(cur, slot); 3031 gen = btrfs_node_ptr_generation(cur, slot); 3032 if (gen < min_trans) { 3033 slot++; 3034 continue; 3035 } 3036 if (!cache_only) 3037 break; 3038 3039 tmp = btrfs_find_tree_block(root, blockptr, 3040 btrfs_level_size(root, level - 1)); 3041 3042 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 3043 free_extent_buffer(tmp); 3044 break; 3045 } 3046 if (tmp) 3047 free_extent_buffer(tmp); 3048 slot++; 3049 } 3050 /* 3051 * we didn't find a candidate key in this node, walk forward 3052 * and find another one 3053 */ 3054 if (slot >= nritems) { 3055 ret = btrfs_find_next_key(root, path, min_key, level, 3056 cache_only, min_trans); 3057 if (ret == 0) { 3058 btrfs_release_path(root, path); 3059 goto again; 3060 } else { 3061 goto out; 3062 } 3063 } 3064 /* save our key for returning back */ 3065 btrfs_node_key_to_cpu(cur, &found_key, slot); 3066 path->slots[level] = slot; 3067 if (level == path->lowest_level) { 3068 ret = 0; 3069 unlock_up(path, level, 1); 3070 goto out; 3071 } 3072 cur = read_node_slot(root, cur, slot); 3073 3074 btrfs_tree_lock(cur); 3075 path->locks[level - 1] = 1; 3076 path->nodes[level - 1] = cur; 3077 unlock_up(path, level, 1); 3078 } 3079 out: 3080 if (ret == 0) 3081 memcpy(min_key, &found_key, sizeof(found_key)); 3082 return ret; 3083 } 3084 3085 /* 3086 * this is similar to btrfs_next_leaf, but does not try to preserve 3087 * and fixup the path. It looks for and returns the next key in the 3088 * tree based on the current path and the cache_only and min_trans 3089 * parameters. 3090 * 3091 * 0 is returned if another key is found, < 0 if there are any errors 3092 * and 1 is returned if there are no higher keys in the tree 3093 * 3094 * path->keep_locks should be set to 1 on the search made before 3095 * calling this function. 3096 */ 3097 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 3098 struct btrfs_key *key, int lowest_level, 3099 int cache_only, u64 min_trans) 3100 { 3101 int level = lowest_level; 3102 int slot; 3103 struct extent_buffer *c; 3104 3105 while(level < BTRFS_MAX_LEVEL) { 3106 if (!path->nodes[level]) 3107 return 1; 3108 3109 slot = path->slots[level] + 1; 3110 c = path->nodes[level]; 3111 next: 3112 if (slot >= btrfs_header_nritems(c)) { 3113 level++; 3114 if (level == BTRFS_MAX_LEVEL) { 3115 return 1; 3116 } 3117 continue; 3118 } 3119 if (level == 0) 3120 btrfs_item_key_to_cpu(c, key, slot); 3121 else { 3122 u64 blockptr = btrfs_node_blockptr(c, slot); 3123 u64 gen = btrfs_node_ptr_generation(c, slot); 3124 3125 if (cache_only) { 3126 struct extent_buffer *cur; 3127 cur = btrfs_find_tree_block(root, blockptr, 3128 btrfs_level_size(root, level - 1)); 3129 if (!cur || !btrfs_buffer_uptodate(cur, gen)) { 3130 slot++; 3131 if (cur) 3132 free_extent_buffer(cur); 3133 goto next; 3134 } 3135 free_extent_buffer(cur); 3136 } 3137 if (gen < min_trans) { 3138 slot++; 3139 goto next; 3140 } 3141 btrfs_node_key_to_cpu(c, key, slot); 3142 } 3143 return 0; 3144 } 3145 return 1; 3146 } 3147 3148 /* 3149 * search the tree again to find a leaf with greater keys 3150 * returns 0 if it found something or 1 if there are no greater leaves. 3151 * returns < 0 on io errors. 3152 */ 3153 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 3154 { 3155 int slot; 3156 int level = 1; 3157 struct extent_buffer *c; 3158 struct extent_buffer *next = NULL; 3159 struct btrfs_key key; 3160 u32 nritems; 3161 int ret; 3162 3163 nritems = btrfs_header_nritems(path->nodes[0]); 3164 if (nritems == 0) { 3165 return 1; 3166 } 3167 3168 btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 3169 3170 btrfs_release_path(root, path); 3171 path->keep_locks = 1; 3172 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3173 path->keep_locks = 0; 3174 3175 if (ret < 0) 3176 return ret; 3177 3178 nritems = btrfs_header_nritems(path->nodes[0]); 3179 /* 3180 * by releasing the path above we dropped all our locks. A balance 3181 * could have added more items next to the key that used to be 3182 * at the very end of the block. So, check again here and 3183 * advance the path if there are now more items available. 3184 */ 3185 if (nritems > 0 && path->slots[0] < nritems - 1) { 3186 path->slots[0]++; 3187 goto done; 3188 } 3189 3190 while(level < BTRFS_MAX_LEVEL) { 3191 if (!path->nodes[level]) 3192 return 1; 3193 3194 slot = path->slots[level] + 1; 3195 c = path->nodes[level]; 3196 if (slot >= btrfs_header_nritems(c)) { 3197 level++; 3198 if (level == BTRFS_MAX_LEVEL) { 3199 return 1; 3200 } 3201 continue; 3202 } 3203 3204 if (next) { 3205 btrfs_tree_unlock(next); 3206 free_extent_buffer(next); 3207 } 3208 3209 if (level == 1 && (path->locks[1] || path->skip_locking) && 3210 path->reada) 3211 reada_for_search(root, path, level, slot, 0); 3212 3213 next = read_node_slot(root, c, slot); 3214 if (!path->skip_locking) { 3215 WARN_ON(!btrfs_tree_locked(c)); 3216 btrfs_tree_lock(next); 3217 } 3218 break; 3219 } 3220 path->slots[level] = slot; 3221 while(1) { 3222 level--; 3223 c = path->nodes[level]; 3224 if (path->locks[level]) 3225 btrfs_tree_unlock(c); 3226 free_extent_buffer(c); 3227 path->nodes[level] = next; 3228 path->slots[level] = 0; 3229 if (!path->skip_locking) 3230 path->locks[level] = 1; 3231 if (!level) 3232 break; 3233 if (level == 1 && path->locks[1] && path->reada) 3234 reada_for_search(root, path, level, slot, 0); 3235 next = read_node_slot(root, next, 0); 3236 if (!path->skip_locking) { 3237 WARN_ON(!btrfs_tree_locked(path->nodes[level])); 3238 btrfs_tree_lock(next); 3239 } 3240 } 3241 done: 3242 unlock_up(path, 0, 1); 3243 return 0; 3244 } 3245 3246 /* 3247 * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 3248 * searching until it gets past min_objectid or finds an item of 'type' 3249 * 3250 * returns 0 if something is found, 1 if nothing was found and < 0 on error 3251 */ 3252 int btrfs_previous_item(struct btrfs_root *root, 3253 struct btrfs_path *path, u64 min_objectid, 3254 int type) 3255 { 3256 struct btrfs_key found_key; 3257 struct extent_buffer *leaf; 3258 int ret; 3259 3260 while(1) { 3261 if (path->slots[0] == 0) { 3262 ret = btrfs_prev_leaf(root, path); 3263 if (ret != 0) 3264 return ret; 3265 } else { 3266 path->slots[0]--; 3267 } 3268 leaf = path->nodes[0]; 3269 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 3270 if (found_key.type == type) 3271 return 0; 3272 } 3273 return 1; 3274 } 3275 3276