16cbd5570SChris Mason /* 26cbd5570SChris Mason * Copyright (C) 2007 Oracle. All rights reserved. 36cbd5570SChris Mason * 46cbd5570SChris Mason * This program is free software; you can redistribute it and/or 56cbd5570SChris Mason * modify it under the terms of the GNU General Public 66cbd5570SChris Mason * License v2 as published by the Free Software Foundation. 76cbd5570SChris Mason * 86cbd5570SChris Mason * This program is distributed in the hope that it will be useful, 96cbd5570SChris Mason * but WITHOUT ANY WARRANTY; without even the implied warranty of 106cbd5570SChris Mason * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 116cbd5570SChris Mason * General Public License for more details. 126cbd5570SChris Mason * 136cbd5570SChris Mason * You should have received a copy of the GNU General Public 146cbd5570SChris Mason * License along with this program; if not, write to the 156cbd5570SChris Mason * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 166cbd5570SChris Mason * Boston, MA 021110-1307, USA. 176cbd5570SChris Mason */ 186cbd5570SChris Mason 19eb60ceacSChris Mason #include "ctree.h" 20eb60ceacSChris Mason #include "disk-io.h" 217f5c1516SChris Mason #include "transaction.h" 229a8dd150SChris Mason 23e089f05cSChris Mason static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 24e089f05cSChris Mason *root, struct btrfs_path *path, int level); 25e089f05cSChris Mason static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 26d4dbff95SChris Mason *root, struct btrfs_key *ins_key, 27d4dbff95SChris Mason struct btrfs_path *path, int data_size); 28e089f05cSChris Mason static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root 29e20d96d6SChris Mason *root, struct buffer_head *dst, struct buffer_head 30e089f05cSChris Mason *src); 31e089f05cSChris Mason static int balance_node_right(struct btrfs_trans_handle *trans, struct 32e20d96d6SChris Mason btrfs_root *root, struct buffer_head *dst_buf, 33e20d96d6SChris Mason struct buffer_head *src_buf); 34e089f05cSChris Mason static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 35e089f05cSChris Mason struct btrfs_path *path, int level, int slot); 36d97e63b6SChris Mason 37df24a2b9SChris Mason inline void btrfs_init_path(struct btrfs_path *p) 38df24a2b9SChris Mason { 39df24a2b9SChris Mason memset(p, 0, sizeof(*p)); 40df24a2b9SChris Mason } 41df24a2b9SChris Mason 422c90e5d6SChris Mason struct btrfs_path *btrfs_alloc_path(void) 432c90e5d6SChris Mason { 44df24a2b9SChris Mason struct btrfs_path *path; 45df24a2b9SChris Mason path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); 46*2cc58cf2SChris Mason if (path) { 47df24a2b9SChris Mason btrfs_init_path(path); 48*2cc58cf2SChris Mason path->reada = 1; 49*2cc58cf2SChris Mason } 50df24a2b9SChris Mason return path; 512c90e5d6SChris Mason } 522c90e5d6SChris Mason 532c90e5d6SChris Mason void btrfs_free_path(struct btrfs_path *p) 542c90e5d6SChris Mason { 55df24a2b9SChris Mason btrfs_release_path(NULL, p); 562c90e5d6SChris Mason kmem_cache_free(btrfs_path_cachep, p); 572c90e5d6SChris Mason } 582c90e5d6SChris Mason 59234b63a0SChris Mason void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 60eb60ceacSChris Mason { 61eb60ceacSChris Mason int i; 62234b63a0SChris Mason for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 63eb60ceacSChris Mason if (!p->nodes[i]) 64eb60ceacSChris Mason break; 65234b63a0SChris Mason btrfs_block_release(root, p->nodes[i]); 66eb60ceacSChris Mason } 67aa5d6bedSChris Mason memset(p, 0, sizeof(*p)); 68eb60ceacSChris Mason } 69eb60ceacSChris Mason 706702ed49SChris Mason static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root 716702ed49SChris Mason *root, struct buffer_head *buf, struct buffer_head 726702ed49SChris Mason *parent, int parent_slot, struct buffer_head 736702ed49SChris Mason **cow_ret, u64 search_start, u64 empty_size) 746702ed49SChris Mason { 756702ed49SChris Mason struct buffer_head *cow; 766702ed49SChris Mason struct btrfs_node *cow_node; 776702ed49SChris Mason int ret = 0; 786702ed49SChris Mason int different_trans = 0; 796702ed49SChris Mason 806702ed49SChris Mason WARN_ON(root->ref_cows && trans->transid != root->last_trans); 816702ed49SChris Mason WARN_ON(!buffer_uptodate(buf)); 826702ed49SChris Mason cow = btrfs_alloc_free_block(trans, root, search_start, empty_size); 836702ed49SChris Mason if (IS_ERR(cow)) 846702ed49SChris Mason return PTR_ERR(cow); 856702ed49SChris Mason 866702ed49SChris Mason cow_node = btrfs_buffer_node(cow); 876702ed49SChris Mason if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) 886702ed49SChris Mason WARN_ON(1); 896702ed49SChris Mason 906702ed49SChris Mason memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); 916702ed49SChris Mason btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow)); 926702ed49SChris Mason btrfs_set_header_generation(&cow_node->header, trans->transid); 936702ed49SChris Mason btrfs_set_header_owner(&cow_node->header, root->root_key.objectid); 946702ed49SChris Mason 956702ed49SChris Mason WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) > 966702ed49SChris Mason trans->transid); 976702ed49SChris Mason if (btrfs_header_generation(btrfs_buffer_header(buf)) != 986702ed49SChris Mason trans->transid) { 996702ed49SChris Mason different_trans = 1; 1006702ed49SChris Mason ret = btrfs_inc_ref(trans, root, buf); 1016702ed49SChris Mason if (ret) 1026702ed49SChris Mason return ret; 1036702ed49SChris Mason } else { 1046702ed49SChris Mason clean_tree_block(trans, root, buf); 1056702ed49SChris Mason } 1066702ed49SChris Mason 1076702ed49SChris Mason if (buf == root->node) { 1086702ed49SChris Mason root->node = cow; 1096702ed49SChris Mason get_bh(cow); 1106702ed49SChris Mason if (buf != root->commit_root) { 1116702ed49SChris Mason btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); 1126702ed49SChris Mason } 1136702ed49SChris Mason btrfs_block_release(root, buf); 1146702ed49SChris Mason } else { 1156702ed49SChris Mason btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, 1166702ed49SChris Mason bh_blocknr(cow)); 1176702ed49SChris Mason btrfs_mark_buffer_dirty(parent); 1186702ed49SChris Mason WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) != 1196702ed49SChris Mason trans->transid); 1206702ed49SChris Mason btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1); 1216702ed49SChris Mason } 1226702ed49SChris Mason btrfs_block_release(root, buf); 1236702ed49SChris Mason btrfs_mark_buffer_dirty(cow); 1246702ed49SChris Mason *cow_ret = cow; 1256702ed49SChris Mason return 0; 1266702ed49SChris Mason } 1276702ed49SChris Mason 1286702ed49SChris Mason int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root 129e20d96d6SChris Mason *root, struct buffer_head *buf, struct buffer_head 130e20d96d6SChris Mason *parent, int parent_slot, struct buffer_head 131e089f05cSChris Mason **cow_ret) 13202217ed2SChris Mason { 1336702ed49SChris Mason u64 search_start; 134ccd467d6SChris Mason if (trans->transaction != root->fs_info->running_transaction) { 135ccd467d6SChris Mason printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 136ccd467d6SChris Mason root->fs_info->running_transaction->transid); 137ccd467d6SChris Mason WARN_ON(1); 138ccd467d6SChris Mason } 139ccd467d6SChris Mason if (trans->transid != root->fs_info->generation) { 140ccd467d6SChris Mason printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 141ccd467d6SChris Mason root->fs_info->generation); 142ccd467d6SChris Mason WARN_ON(1); 143ccd467d6SChris Mason } 1447f5c1516SChris Mason if (btrfs_header_generation(btrfs_buffer_header(buf)) == 1457f5c1516SChris Mason trans->transid) { 14602217ed2SChris Mason *cow_ret = buf; 14702217ed2SChris Mason return 0; 14802217ed2SChris Mason } 1496702ed49SChris Mason 1506702ed49SChris Mason search_start = bh_blocknr(buf) & ~((u64)65535); 1516702ed49SChris Mason return __btrfs_cow_block(trans, root, buf, parent, 1526702ed49SChris Mason parent_slot, cow_ret, search_start, 0); 1532c90e5d6SChris Mason } 1546702ed49SChris Mason 1556702ed49SChris Mason static int close_blocks(u64 blocknr, u64 other) 1566702ed49SChris Mason { 1576702ed49SChris Mason if (blocknr < other && other - blocknr < 8) 1586702ed49SChris Mason return 1; 1596702ed49SChris Mason if (blocknr > other && blocknr - other < 8) 1606702ed49SChris Mason return 1; 16102217ed2SChris Mason return 0; 16202217ed2SChris Mason } 16302217ed2SChris Mason 164*2cc58cf2SChris Mason static int should_defrag_leaf(struct buffer_head *bh) 165*2cc58cf2SChris Mason { 166*2cc58cf2SChris Mason struct btrfs_leaf *leaf = btrfs_buffer_leaf(bh); 167*2cc58cf2SChris Mason struct btrfs_disk_key *key; 168*2cc58cf2SChris Mason u32 nritems; 169*2cc58cf2SChris Mason 170*2cc58cf2SChris Mason if (buffer_defrag(bh)) 171*2cc58cf2SChris Mason return 1; 172*2cc58cf2SChris Mason 173*2cc58cf2SChris Mason nritems = btrfs_header_nritems(&leaf->header); 174*2cc58cf2SChris Mason if (nritems == 0) 175*2cc58cf2SChris Mason return 0; 176*2cc58cf2SChris Mason 177*2cc58cf2SChris Mason key = &leaf->items[0].key; 178*2cc58cf2SChris Mason if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) 179*2cc58cf2SChris Mason return 1; 180*2cc58cf2SChris Mason 181*2cc58cf2SChris Mason key = &leaf->items[nritems-1].key; 182*2cc58cf2SChris Mason if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) 183*2cc58cf2SChris Mason return 1; 184*2cc58cf2SChris Mason if (nritems > 4) { 185*2cc58cf2SChris Mason key = &leaf->items[nritems/2].key; 186*2cc58cf2SChris Mason if (btrfs_disk_key_type(key) == BTRFS_DIR_ITEM_KEY) 187*2cc58cf2SChris Mason return 1; 188*2cc58cf2SChris Mason } 189*2cc58cf2SChris Mason return 0; 190*2cc58cf2SChris Mason } 191*2cc58cf2SChris Mason 1926702ed49SChris Mason int btrfs_realloc_node(struct btrfs_trans_handle *trans, 1936702ed49SChris Mason struct btrfs_root *root, struct buffer_head *parent, 194e9d0b13bSChris Mason int cache_only, u64 *last_ret) 1956702ed49SChris Mason { 1966702ed49SChris Mason struct btrfs_node *parent_node; 1976702ed49SChris Mason struct buffer_head *cur_bh; 1986702ed49SChris Mason struct buffer_head *tmp_bh; 1996702ed49SChris Mason u64 blocknr; 200e9d0b13bSChris Mason u64 search_start = *last_ret; 201e9d0b13bSChris Mason u64 last_block = 0; 2026702ed49SChris Mason u64 other; 2036702ed49SChris Mason u32 parent_nritems; 2046702ed49SChris Mason int start_slot; 2056702ed49SChris Mason int end_slot; 2066702ed49SChris Mason int i; 2076702ed49SChris Mason int err = 0; 208f2183bdeSChris Mason int parent_level; 2096702ed49SChris Mason 2106702ed49SChris Mason if (trans->transaction != root->fs_info->running_transaction) { 2116702ed49SChris Mason printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 2126702ed49SChris Mason root->fs_info->running_transaction->transid); 2136702ed49SChris Mason WARN_ON(1); 2146702ed49SChris Mason } 2156702ed49SChris Mason if (trans->transid != root->fs_info->generation) { 2166702ed49SChris Mason printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid, 2176702ed49SChris Mason root->fs_info->generation); 2186702ed49SChris Mason WARN_ON(1); 2196702ed49SChris Mason } 2206702ed49SChris Mason parent_node = btrfs_buffer_node(parent); 2216702ed49SChris Mason parent_nritems = btrfs_header_nritems(&parent_node->header); 222f2183bdeSChris Mason parent_level = btrfs_header_level(&parent_node->header); 2236702ed49SChris Mason 2246702ed49SChris Mason start_slot = 0; 2256702ed49SChris Mason end_slot = parent_nritems; 2266702ed49SChris Mason 2276702ed49SChris Mason if (parent_nritems == 1) 2286702ed49SChris Mason return 0; 2296702ed49SChris Mason 2306702ed49SChris Mason for (i = start_slot; i < end_slot; i++) { 2316702ed49SChris Mason int close = 1; 2326702ed49SChris Mason blocknr = btrfs_node_blockptr(parent_node, i); 233e9d0b13bSChris Mason if (last_block == 0) 234e9d0b13bSChris Mason last_block = blocknr; 2356702ed49SChris Mason if (i > 0) { 2366702ed49SChris Mason other = btrfs_node_blockptr(parent_node, i - 1); 2376702ed49SChris Mason close = close_blocks(blocknr, other); 2386702ed49SChris Mason } 2396702ed49SChris Mason if (close && i < end_slot - 1) { 2406702ed49SChris Mason other = btrfs_node_blockptr(parent_node, i + 1); 2416702ed49SChris Mason close = close_blocks(blocknr, other); 2426702ed49SChris Mason } 243e9d0b13bSChris Mason if (close) { 244e9d0b13bSChris Mason last_block = blocknr; 2456702ed49SChris Mason continue; 246e9d0b13bSChris Mason } 2476702ed49SChris Mason 2486702ed49SChris Mason cur_bh = btrfs_find_tree_block(root, blocknr); 2496702ed49SChris Mason if (!cur_bh || !buffer_uptodate(cur_bh) || 250*2cc58cf2SChris Mason buffer_locked(cur_bh) || 251*2cc58cf2SChris Mason (parent_level != 1 && !buffer_defrag(cur_bh)) || 252*2cc58cf2SChris Mason (parent_level == 1 && !should_defrag_leaf(cur_bh))) { 2536702ed49SChris Mason if (cache_only) { 2546702ed49SChris Mason brelse(cur_bh); 2556702ed49SChris Mason continue; 2566702ed49SChris Mason } 257f2183bdeSChris Mason if (!cur_bh || !buffer_uptodate(cur_bh) || 258f2183bdeSChris Mason buffer_locked(cur_bh)) { 2596702ed49SChris Mason brelse(cur_bh); 2606702ed49SChris Mason cur_bh = read_tree_block(root, blocknr); 2616702ed49SChris Mason } 262f2183bdeSChris Mason } 263e9d0b13bSChris Mason if (search_start == 0) 264e9d0b13bSChris Mason search_start = last_block & ~((u64)65535); 265e9d0b13bSChris Mason 2666702ed49SChris Mason err = __btrfs_cow_block(trans, root, cur_bh, parent, i, 2676702ed49SChris Mason &tmp_bh, search_start, 2686702ed49SChris Mason min(8, end_slot - i)); 2696702ed49SChris Mason if (err) 2706702ed49SChris Mason break; 2716702ed49SChris Mason search_start = bh_blocknr(tmp_bh); 272f2183bdeSChris Mason *last_ret = search_start; 273f2183bdeSChris Mason if (parent_level == 1) 274f2183bdeSChris Mason clear_buffer_defrag(tmp_bh); 2756702ed49SChris Mason brelse(tmp_bh); 2766702ed49SChris Mason } 2776702ed49SChris Mason return err; 2786702ed49SChris Mason } 2796702ed49SChris Mason 28074123bd7SChris Mason /* 28174123bd7SChris Mason * The leaf data grows from end-to-front in the node. 28274123bd7SChris Mason * this returns the address of the start of the last item, 28374123bd7SChris Mason * which is the stop of the leaf data stack 28474123bd7SChris Mason */ 285123abc88SChris Mason static inline unsigned int leaf_data_end(struct btrfs_root *root, 286123abc88SChris Mason struct btrfs_leaf *leaf) 287be0e5c09SChris Mason { 2887518a238SChris Mason u32 nr = btrfs_header_nritems(&leaf->header); 289be0e5c09SChris Mason if (nr == 0) 290123abc88SChris Mason return BTRFS_LEAF_DATA_SIZE(root); 2910783fcfcSChris Mason return btrfs_item_offset(leaf->items + nr - 1); 292be0e5c09SChris Mason } 293be0e5c09SChris Mason 29474123bd7SChris Mason /* 29574123bd7SChris Mason * compare two keys in a memcmp fashion 29674123bd7SChris Mason */ 2979aca1d51SChris Mason static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 298be0e5c09SChris Mason { 299e2fa7227SChris Mason struct btrfs_key k1; 300e2fa7227SChris Mason 301e2fa7227SChris Mason btrfs_disk_key_to_cpu(&k1, disk); 302e2fa7227SChris Mason 303e2fa7227SChris Mason if (k1.objectid > k2->objectid) 304be0e5c09SChris Mason return 1; 305e2fa7227SChris Mason if (k1.objectid < k2->objectid) 306be0e5c09SChris Mason return -1; 307f254e52cSChris Mason if (k1.flags > k2->flags) 308f254e52cSChris Mason return 1; 309f254e52cSChris Mason if (k1.flags < k2->flags) 310f254e52cSChris Mason return -1; 31170b2befdSChris Mason if (k1.offset > k2->offset) 31270b2befdSChris Mason return 1; 31370b2befdSChris Mason if (k1.offset < k2->offset) 31470b2befdSChris Mason return -1; 315be0e5c09SChris Mason return 0; 316be0e5c09SChris Mason } 31774123bd7SChris Mason 318123abc88SChris Mason static int check_node(struct btrfs_root *root, struct btrfs_path *path, 319123abc88SChris Mason int level) 320aa5d6bedSChris Mason { 321234b63a0SChris Mason struct btrfs_node *parent = NULL; 322e20d96d6SChris Mason struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); 323aa5d6bedSChris Mason int parent_slot; 3248d7be552SChris Mason int slot; 3258d7be552SChris Mason struct btrfs_key cpukey; 3267518a238SChris Mason u32 nritems = btrfs_header_nritems(&node->header); 327aa5d6bedSChris Mason 328aa5d6bedSChris Mason if (path->nodes[level + 1]) 329e20d96d6SChris Mason parent = btrfs_buffer_node(path->nodes[level + 1]); 330a1f39630SAneesh 3318d7be552SChris Mason slot = path->slots[level]; 332*2cc58cf2SChris Mason BUG_ON(!buffer_uptodate(path->nodes[level])); 3337518a238SChris Mason BUG_ON(nritems == 0); 3347518a238SChris Mason if (parent) { 335e2fa7227SChris Mason struct btrfs_disk_key *parent_key; 336a1f39630SAneesh 337a1f39630SAneesh parent_slot = path->slots[level + 1]; 338123abc88SChris Mason parent_key = &parent->ptrs[parent_slot].key; 339123abc88SChris Mason BUG_ON(memcmp(parent_key, &node->ptrs[0].key, 340e2fa7227SChris Mason sizeof(struct btrfs_disk_key))); 3411d4f8a0cSChris Mason BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 3427518a238SChris Mason btrfs_header_blocknr(&node->header)); 343aa5d6bedSChris Mason } 344123abc88SChris Mason BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); 3458d7be552SChris Mason if (slot != 0) { 3468d7be552SChris Mason btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot - 1].key); 3478d7be552SChris Mason BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) <= 0); 3488d7be552SChris Mason } 3498d7be552SChris Mason if (slot < nritems - 1) { 3508d7be552SChris Mason btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[slot + 1].key); 3518d7be552SChris Mason BUG_ON(comp_keys(&node->ptrs[slot].key, &cpukey) >= 0); 352aa5d6bedSChris Mason } 353aa5d6bedSChris Mason return 0; 354aa5d6bedSChris Mason } 355aa5d6bedSChris Mason 356123abc88SChris Mason static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, 357123abc88SChris Mason int level) 358aa5d6bedSChris Mason { 359e20d96d6SChris Mason struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); 360234b63a0SChris Mason struct btrfs_node *parent = NULL; 361aa5d6bedSChris Mason int parent_slot; 3628d7be552SChris Mason int slot = path->slots[0]; 3638d7be552SChris Mason struct btrfs_key cpukey; 3648d7be552SChris Mason 3657518a238SChris Mason u32 nritems = btrfs_header_nritems(&leaf->header); 366aa5d6bedSChris Mason 367aa5d6bedSChris Mason if (path->nodes[level + 1]) 368e20d96d6SChris Mason parent = btrfs_buffer_node(path->nodes[level + 1]); 369a1f39630SAneesh 370123abc88SChris Mason BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); 3717518a238SChris Mason 3727518a238SChris Mason if (nritems == 0) 3737518a238SChris Mason return 0; 3747518a238SChris Mason 3757518a238SChris Mason if (parent) { 376e2fa7227SChris Mason struct btrfs_disk_key *parent_key; 377a1f39630SAneesh 378a1f39630SAneesh parent_slot = path->slots[level + 1]; 379123abc88SChris Mason parent_key = &parent->ptrs[parent_slot].key; 3806702ed49SChris Mason 381aa5d6bedSChris Mason BUG_ON(memcmp(parent_key, &leaf->items[0].key, 382e2fa7227SChris Mason sizeof(struct btrfs_disk_key))); 3831d4f8a0cSChris Mason BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 3847518a238SChris Mason btrfs_header_blocknr(&leaf->header)); 385aa5d6bedSChris Mason } 3868d7be552SChris Mason if (slot != 0) { 3878d7be552SChris Mason btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot - 1].key); 3888d7be552SChris Mason BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) <= 0); 3898d7be552SChris Mason BUG_ON(btrfs_item_offset(leaf->items + slot - 1) != 3908d7be552SChris Mason btrfs_item_end(leaf->items + slot)); 391aa5d6bedSChris Mason } 3928d7be552SChris Mason if (slot < nritems - 1) { 3938d7be552SChris Mason btrfs_disk_key_to_cpu(&cpukey, &leaf->items[slot + 1].key); 3948d7be552SChris Mason BUG_ON(comp_keys(&leaf->items[slot].key, &cpukey) >= 0); 3958d7be552SChris Mason BUG_ON(btrfs_item_offset(leaf->items + slot) != 3968d7be552SChris Mason btrfs_item_end(leaf->items + slot + 1)); 397aa5d6bedSChris Mason } 3988d7be552SChris Mason BUG_ON(btrfs_item_offset(leaf->items) + 3998d7be552SChris Mason btrfs_item_size(leaf->items) != BTRFS_LEAF_DATA_SIZE(root)); 400aa5d6bedSChris Mason return 0; 401aa5d6bedSChris Mason } 402aa5d6bedSChris Mason 403123abc88SChris Mason static int check_block(struct btrfs_root *root, struct btrfs_path *path, 404123abc88SChris Mason int level) 405aa5d6bedSChris Mason { 4063eb0314dSChris Mason struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); 4073eb0314dSChris Mason if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid, 4083eb0314dSChris Mason sizeof(node->header.fsid))) 4093eb0314dSChris Mason BUG(); 410aa5d6bedSChris Mason if (level == 0) 411123abc88SChris Mason return check_leaf(root, path, level); 412123abc88SChris Mason return check_node(root, path, level); 413aa5d6bedSChris Mason } 414aa5d6bedSChris Mason 41574123bd7SChris Mason /* 41674123bd7SChris Mason * search for key in the array p. items p are item_size apart 41774123bd7SChris Mason * and there are 'max' items in p 41874123bd7SChris Mason * the slot in the array is returned via slot, and it points to 41974123bd7SChris Mason * the place where you would insert key if it is not found in 42074123bd7SChris Mason * the array. 42174123bd7SChris Mason * 42274123bd7SChris Mason * slot may point to max if the key is bigger than all of the keys 42374123bd7SChris Mason */ 4249aca1d51SChris Mason static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, 425be0e5c09SChris Mason int max, int *slot) 426be0e5c09SChris Mason { 427be0e5c09SChris Mason int low = 0; 428be0e5c09SChris Mason int high = max; 429be0e5c09SChris Mason int mid; 430be0e5c09SChris Mason int ret; 431e2fa7227SChris Mason struct btrfs_disk_key *tmp; 432be0e5c09SChris Mason 433be0e5c09SChris Mason while(low < high) { 434be0e5c09SChris Mason mid = (low + high) / 2; 435e2fa7227SChris Mason tmp = (struct btrfs_disk_key *)(p + mid * item_size); 436be0e5c09SChris Mason ret = comp_keys(tmp, key); 437be0e5c09SChris Mason 438be0e5c09SChris Mason if (ret < 0) 439be0e5c09SChris Mason low = mid + 1; 440be0e5c09SChris Mason else if (ret > 0) 441be0e5c09SChris Mason high = mid; 442be0e5c09SChris Mason else { 443be0e5c09SChris Mason *slot = mid; 444be0e5c09SChris Mason return 0; 445be0e5c09SChris Mason } 446be0e5c09SChris Mason } 447be0e5c09SChris Mason *slot = low; 448be0e5c09SChris Mason return 1; 449be0e5c09SChris Mason } 450be0e5c09SChris Mason 45197571fd0SChris Mason /* 45297571fd0SChris Mason * simple bin_search frontend that does the right thing for 45397571fd0SChris Mason * leaves vs nodes 45497571fd0SChris Mason */ 4559aca1d51SChris Mason static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) 456be0e5c09SChris Mason { 4577518a238SChris Mason if (btrfs_is_leaf(c)) { 458234b63a0SChris Mason struct btrfs_leaf *l = (struct btrfs_leaf *)c; 4590783fcfcSChris Mason return generic_bin_search((void *)l->items, 4600783fcfcSChris Mason sizeof(struct btrfs_item), 4617518a238SChris Mason key, btrfs_header_nritems(&c->header), 4627518a238SChris Mason slot); 463be0e5c09SChris Mason } else { 464123abc88SChris Mason return generic_bin_search((void *)c->ptrs, 465123abc88SChris Mason sizeof(struct btrfs_key_ptr), 4667518a238SChris Mason key, btrfs_header_nritems(&c->header), 4677518a238SChris Mason slot); 468be0e5c09SChris Mason } 469be0e5c09SChris Mason return -1; 470be0e5c09SChris Mason } 471be0e5c09SChris Mason 472e20d96d6SChris Mason static struct buffer_head *read_node_slot(struct btrfs_root *root, 473e20d96d6SChris Mason struct buffer_head *parent_buf, 474bb803951SChris Mason int slot) 475bb803951SChris Mason { 476e20d96d6SChris Mason struct btrfs_node *node = btrfs_buffer_node(parent_buf); 477bb803951SChris Mason if (slot < 0) 478bb803951SChris Mason return NULL; 4797518a238SChris Mason if (slot >= btrfs_header_nritems(&node->header)) 480bb803951SChris Mason return NULL; 4811d4f8a0cSChris Mason return read_tree_block(root, btrfs_node_blockptr(node, slot)); 482bb803951SChris Mason } 483bb803951SChris Mason 484e089f05cSChris Mason static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root 485e089f05cSChris Mason *root, struct btrfs_path *path, int level) 486bb803951SChris Mason { 487e20d96d6SChris Mason struct buffer_head *right_buf; 488e20d96d6SChris Mason struct buffer_head *mid_buf; 489e20d96d6SChris Mason struct buffer_head *left_buf; 490e20d96d6SChris Mason struct buffer_head *parent_buf = NULL; 491234b63a0SChris Mason struct btrfs_node *right = NULL; 492234b63a0SChris Mason struct btrfs_node *mid; 493234b63a0SChris Mason struct btrfs_node *left = NULL; 494234b63a0SChris Mason struct btrfs_node *parent = NULL; 495bb803951SChris Mason int ret = 0; 496bb803951SChris Mason int wret; 497bb803951SChris Mason int pslot; 498bb803951SChris Mason int orig_slot = path->slots[level]; 49954aa1f4dSChris Mason int err_on_enospc = 0; 50079f95c82SChris Mason u64 orig_ptr; 501bb803951SChris Mason 502bb803951SChris Mason if (level == 0) 503bb803951SChris Mason return 0; 504bb803951SChris Mason 505bb803951SChris Mason mid_buf = path->nodes[level]; 506e20d96d6SChris Mason mid = btrfs_buffer_node(mid_buf); 5071d4f8a0cSChris Mason orig_ptr = btrfs_node_blockptr(mid, orig_slot); 50879f95c82SChris Mason 509234b63a0SChris Mason if (level < BTRFS_MAX_LEVEL - 1) 510bb803951SChris Mason parent_buf = path->nodes[level + 1]; 511bb803951SChris Mason pslot = path->slots[level + 1]; 512bb803951SChris Mason 51340689478SChris Mason /* 51440689478SChris Mason * deal with the case where there is only one pointer in the root 51540689478SChris Mason * by promoting the node below to a root 51640689478SChris Mason */ 517bb803951SChris Mason if (!parent_buf) { 518e20d96d6SChris Mason struct buffer_head *child; 5197eccb903SChris Mason u64 blocknr = bh_blocknr(mid_buf); 520bb803951SChris Mason 5217518a238SChris Mason if (btrfs_header_nritems(&mid->header) != 1) 522bb803951SChris Mason return 0; 523bb803951SChris Mason 524bb803951SChris Mason /* promote the child to a root */ 525bb803951SChris Mason child = read_node_slot(root, mid_buf, 0); 526bb803951SChris Mason BUG_ON(!child); 527bb803951SChris Mason root->node = child; 528bb803951SChris Mason path->nodes[level] = NULL; 529d6025579SChris Mason clean_tree_block(trans, root, mid_buf); 530d6025579SChris Mason wait_on_buffer(mid_buf); 531bb803951SChris Mason /* once for the path */ 532234b63a0SChris Mason btrfs_block_release(root, mid_buf); 533bb803951SChris Mason /* once for the root ptr */ 534234b63a0SChris Mason btrfs_block_release(root, mid_buf); 535e089f05cSChris Mason return btrfs_free_extent(trans, root, blocknr, 1, 1); 536bb803951SChris Mason } 537e20d96d6SChris Mason parent = btrfs_buffer_node(parent_buf); 538bb803951SChris Mason 539123abc88SChris Mason if (btrfs_header_nritems(&mid->header) > 540123abc88SChris Mason BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 541bb803951SChris Mason return 0; 542bb803951SChris Mason 54354aa1f4dSChris Mason if (btrfs_header_nritems(&mid->header) < 2) 54454aa1f4dSChris Mason err_on_enospc = 1; 54554aa1f4dSChris Mason 546bb803951SChris Mason left_buf = read_node_slot(root, parent_buf, pslot - 1); 547bb803951SChris Mason if (left_buf) { 54854aa1f4dSChris Mason wret = btrfs_cow_block(trans, root, left_buf, 54954aa1f4dSChris Mason parent_buf, pslot - 1, &left_buf); 55054aa1f4dSChris Mason if (wret) { 55154aa1f4dSChris Mason ret = wret; 55254aa1f4dSChris Mason goto enospc; 55354aa1f4dSChris Mason } 554*2cc58cf2SChris Mason } 555*2cc58cf2SChris Mason right_buf = read_node_slot(root, parent_buf, pslot + 1); 556*2cc58cf2SChris Mason if (right_buf) { 557*2cc58cf2SChris Mason wret = btrfs_cow_block(trans, root, right_buf, 558*2cc58cf2SChris Mason parent_buf, pslot + 1, &right_buf); 559*2cc58cf2SChris Mason if (wret) { 560*2cc58cf2SChris Mason ret = wret; 561*2cc58cf2SChris Mason goto enospc; 562*2cc58cf2SChris Mason } 563*2cc58cf2SChris Mason } 564*2cc58cf2SChris Mason 565*2cc58cf2SChris Mason /* first, try to make some room in the middle buffer */ 566*2cc58cf2SChris Mason if (left_buf) { 567e20d96d6SChris Mason left = btrfs_buffer_node(left_buf); 5687518a238SChris Mason orig_slot += btrfs_header_nritems(&left->header); 569e089f05cSChris Mason wret = push_node_left(trans, root, left_buf, mid_buf); 57079f95c82SChris Mason if (wret < 0) 57179f95c82SChris Mason ret = wret; 57254aa1f4dSChris Mason if (btrfs_header_nritems(&mid->header) < 2) 57354aa1f4dSChris Mason err_on_enospc = 1; 574bb803951SChris Mason } 57579f95c82SChris Mason 57679f95c82SChris Mason /* 57779f95c82SChris Mason * then try to empty the right most buffer into the middle 57879f95c82SChris Mason */ 579bb803951SChris Mason if (right_buf) { 580e20d96d6SChris Mason right = btrfs_buffer_node(right_buf); 581e089f05cSChris Mason wret = push_node_left(trans, root, mid_buf, right_buf); 58254aa1f4dSChris Mason if (wret < 0 && wret != -ENOSPC) 58379f95c82SChris Mason ret = wret; 5847518a238SChris Mason if (btrfs_header_nritems(&right->header) == 0) { 5857eccb903SChris Mason u64 blocknr = bh_blocknr(right_buf); 586e089f05cSChris Mason clean_tree_block(trans, root, right_buf); 587d6025579SChris Mason wait_on_buffer(right_buf); 588d6025579SChris Mason btrfs_block_release(root, right_buf); 589bb803951SChris Mason right_buf = NULL; 590bb803951SChris Mason right = NULL; 591e089f05cSChris Mason wret = del_ptr(trans, root, path, level + 1, pslot + 592e089f05cSChris Mason 1); 593bb803951SChris Mason if (wret) 594bb803951SChris Mason ret = wret; 595e089f05cSChris Mason wret = btrfs_free_extent(trans, root, blocknr, 1, 1); 596bb803951SChris Mason if (wret) 597bb803951SChris Mason ret = wret; 598bb803951SChris Mason } else { 599d6025579SChris Mason btrfs_memcpy(root, parent, 600d6025579SChris Mason &parent->ptrs[pslot + 1].key, 601123abc88SChris Mason &right->ptrs[0].key, 602e2fa7227SChris Mason sizeof(struct btrfs_disk_key)); 603d6025579SChris Mason btrfs_mark_buffer_dirty(parent_buf); 604bb803951SChris Mason } 605bb803951SChris Mason } 6067518a238SChris Mason if (btrfs_header_nritems(&mid->header) == 1) { 60779f95c82SChris Mason /* 60879f95c82SChris Mason * we're not allowed to leave a node with one item in the 60979f95c82SChris Mason * tree during a delete. A deletion from lower in the tree 61079f95c82SChris Mason * could try to delete the only pointer in this node. 61179f95c82SChris Mason * So, pull some keys from the left. 61279f95c82SChris Mason * There has to be a left pointer at this point because 61379f95c82SChris Mason * otherwise we would have pulled some pointers from the 61479f95c82SChris Mason * right 61579f95c82SChris Mason */ 61679f95c82SChris Mason BUG_ON(!left_buf); 617e089f05cSChris Mason wret = balance_node_right(trans, root, mid_buf, left_buf); 61854aa1f4dSChris Mason if (wret < 0) { 61979f95c82SChris Mason ret = wret; 62054aa1f4dSChris Mason goto enospc; 62154aa1f4dSChris Mason } 62279f95c82SChris Mason BUG_ON(wret == 1); 62379f95c82SChris Mason } 6247518a238SChris Mason if (btrfs_header_nritems(&mid->header) == 0) { 62579f95c82SChris Mason /* we've managed to empty the middle node, drop it */ 6267eccb903SChris Mason u64 blocknr = bh_blocknr(mid_buf); 627e089f05cSChris Mason clean_tree_block(trans, root, mid_buf); 628d6025579SChris Mason wait_on_buffer(mid_buf); 629d6025579SChris Mason btrfs_block_release(root, mid_buf); 630bb803951SChris Mason mid_buf = NULL; 631bb803951SChris Mason mid = NULL; 632e089f05cSChris Mason wret = del_ptr(trans, root, path, level + 1, pslot); 633bb803951SChris Mason if (wret) 634bb803951SChris Mason ret = wret; 635e089f05cSChris Mason wret = btrfs_free_extent(trans, root, blocknr, 1, 1); 636bb803951SChris Mason if (wret) 637bb803951SChris Mason ret = wret; 63879f95c82SChris Mason } else { 63979f95c82SChris Mason /* update the parent key to reflect our changes */ 640d6025579SChris Mason btrfs_memcpy(root, parent, 641d6025579SChris Mason &parent->ptrs[pslot].key, &mid->ptrs[0].key, 642e2fa7227SChris Mason sizeof(struct btrfs_disk_key)); 643d6025579SChris Mason btrfs_mark_buffer_dirty(parent_buf); 64479f95c82SChris Mason } 645bb803951SChris Mason 64679f95c82SChris Mason /* update the path */ 647bb803951SChris Mason if (left_buf) { 6487518a238SChris Mason if (btrfs_header_nritems(&left->header) > orig_slot) { 649e20d96d6SChris Mason get_bh(left_buf); 650bb803951SChris Mason path->nodes[level] = left_buf; 651bb803951SChris Mason path->slots[level + 1] -= 1; 652bb803951SChris Mason path->slots[level] = orig_slot; 653bb803951SChris Mason if (mid_buf) 654234b63a0SChris Mason btrfs_block_release(root, mid_buf); 655bb803951SChris Mason } else { 6567518a238SChris Mason orig_slot -= btrfs_header_nritems(&left->header); 657bb803951SChris Mason path->slots[level] = orig_slot; 658bb803951SChris Mason } 659bb803951SChris Mason } 66079f95c82SChris Mason /* double check we haven't messed things up */ 661123abc88SChris Mason check_block(root, path, level); 662e20d96d6SChris Mason if (orig_ptr != 663e20d96d6SChris Mason btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), 6641d4f8a0cSChris Mason path->slots[level])) 66579f95c82SChris Mason BUG(); 66654aa1f4dSChris Mason enospc: 667bb803951SChris Mason if (right_buf) 668234b63a0SChris Mason btrfs_block_release(root, right_buf); 669bb803951SChris Mason if (left_buf) 670234b63a0SChris Mason btrfs_block_release(root, left_buf); 671bb803951SChris Mason return ret; 672bb803951SChris Mason } 673bb803951SChris Mason 674e66f709bSChris Mason /* returns zero if the push worked, non-zero otherwise */ 675e66f709bSChris Mason static int push_nodes_for_insert(struct btrfs_trans_handle *trans, 676e66f709bSChris Mason struct btrfs_root *root, 677e66f709bSChris Mason struct btrfs_path *path, int level) 678e66f709bSChris Mason { 679e66f709bSChris Mason struct buffer_head *right_buf; 680e66f709bSChris Mason struct buffer_head *mid_buf; 681e66f709bSChris Mason struct buffer_head *left_buf; 682e66f709bSChris Mason struct buffer_head *parent_buf = NULL; 683e66f709bSChris Mason struct btrfs_node *right = NULL; 684e66f709bSChris Mason struct btrfs_node *mid; 685e66f709bSChris Mason struct btrfs_node *left = NULL; 686e66f709bSChris Mason struct btrfs_node *parent = NULL; 687e66f709bSChris Mason int ret = 0; 688e66f709bSChris Mason int wret; 689e66f709bSChris Mason int pslot; 690e66f709bSChris Mason int orig_slot = path->slots[level]; 691e66f709bSChris Mason u64 orig_ptr; 692e66f709bSChris Mason 693e66f709bSChris Mason if (level == 0) 694e66f709bSChris Mason return 1; 695e66f709bSChris Mason 696e66f709bSChris Mason mid_buf = path->nodes[level]; 697e66f709bSChris Mason mid = btrfs_buffer_node(mid_buf); 698e66f709bSChris Mason orig_ptr = btrfs_node_blockptr(mid, orig_slot); 699e66f709bSChris Mason 700e66f709bSChris Mason if (level < BTRFS_MAX_LEVEL - 1) 701e66f709bSChris Mason parent_buf = path->nodes[level + 1]; 702e66f709bSChris Mason pslot = path->slots[level + 1]; 703e66f709bSChris Mason 704e66f709bSChris Mason if (!parent_buf) 705e66f709bSChris Mason return 1; 706e66f709bSChris Mason parent = btrfs_buffer_node(parent_buf); 707e66f709bSChris Mason 708e66f709bSChris Mason left_buf = read_node_slot(root, parent_buf, pslot - 1); 709e66f709bSChris Mason 710e66f709bSChris Mason /* first, try to make some room in the middle buffer */ 711e66f709bSChris Mason if (left_buf) { 712e66f709bSChris Mason u32 left_nr; 713e66f709bSChris Mason left = btrfs_buffer_node(left_buf); 714e66f709bSChris Mason left_nr = btrfs_header_nritems(&left->header); 71533ade1f8SChris Mason if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 71633ade1f8SChris Mason wret = 1; 71733ade1f8SChris Mason } else { 71854aa1f4dSChris Mason ret = btrfs_cow_block(trans, root, left_buf, parent_buf, 71933ade1f8SChris Mason pslot - 1, &left_buf); 72054aa1f4dSChris Mason if (ret) 72154aa1f4dSChris Mason wret = 1; 72254aa1f4dSChris Mason else { 72333ade1f8SChris Mason left = btrfs_buffer_node(left_buf); 72454aa1f4dSChris Mason wret = push_node_left(trans, root, 72554aa1f4dSChris Mason left_buf, mid_buf); 72654aa1f4dSChris Mason } 72733ade1f8SChris Mason } 728e66f709bSChris Mason if (wret < 0) 729e66f709bSChris Mason ret = wret; 730e66f709bSChris Mason if (wret == 0) { 731e66f709bSChris Mason orig_slot += left_nr; 732e66f709bSChris Mason btrfs_memcpy(root, parent, 733e66f709bSChris Mason &parent->ptrs[pslot].key, 734e66f709bSChris Mason &mid->ptrs[0].key, 735e66f709bSChris Mason sizeof(struct btrfs_disk_key)); 736e66f709bSChris Mason btrfs_mark_buffer_dirty(parent_buf); 737e66f709bSChris Mason if (btrfs_header_nritems(&left->header) > orig_slot) { 738e66f709bSChris Mason path->nodes[level] = left_buf; 739e66f709bSChris Mason path->slots[level + 1] -= 1; 740e66f709bSChris Mason path->slots[level] = orig_slot; 741e66f709bSChris Mason btrfs_block_release(root, mid_buf); 742e66f709bSChris Mason } else { 743e66f709bSChris Mason orig_slot -= 744e66f709bSChris Mason btrfs_header_nritems(&left->header); 745e66f709bSChris Mason path->slots[level] = orig_slot; 746e66f709bSChris Mason btrfs_block_release(root, left_buf); 747e66f709bSChris Mason } 748e66f709bSChris Mason check_node(root, path, level); 749e66f709bSChris Mason return 0; 750e66f709bSChris Mason } 751e66f709bSChris Mason btrfs_block_release(root, left_buf); 752e66f709bSChris Mason } 753e66f709bSChris Mason right_buf = read_node_slot(root, parent_buf, pslot + 1); 754e66f709bSChris Mason 755e66f709bSChris Mason /* 756e66f709bSChris Mason * then try to empty the right most buffer into the middle 757e66f709bSChris Mason */ 758e66f709bSChris Mason if (right_buf) { 75933ade1f8SChris Mason u32 right_nr; 760e66f709bSChris Mason right = btrfs_buffer_node(right_buf); 76133ade1f8SChris Mason right_nr = btrfs_header_nritems(&right->header); 76233ade1f8SChris Mason if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 76333ade1f8SChris Mason wret = 1; 76433ade1f8SChris Mason } else { 76554aa1f4dSChris Mason ret = btrfs_cow_block(trans, root, right_buf, 76654aa1f4dSChris Mason parent_buf, pslot + 1, 76754aa1f4dSChris Mason &right_buf); 76854aa1f4dSChris Mason if (ret) 76954aa1f4dSChris Mason wret = 1; 77054aa1f4dSChris Mason else { 77133ade1f8SChris Mason right = btrfs_buffer_node(right_buf); 77233ade1f8SChris Mason wret = balance_node_right(trans, root, 77333ade1f8SChris Mason right_buf, mid_buf); 77433ade1f8SChris Mason } 77554aa1f4dSChris Mason } 776e66f709bSChris Mason if (wret < 0) 777e66f709bSChris Mason ret = wret; 778e66f709bSChris Mason if (wret == 0) { 779e66f709bSChris Mason btrfs_memcpy(root, parent, 780e66f709bSChris Mason &parent->ptrs[pslot + 1].key, 781e66f709bSChris Mason &right->ptrs[0].key, 782e66f709bSChris Mason sizeof(struct btrfs_disk_key)); 783e66f709bSChris Mason btrfs_mark_buffer_dirty(parent_buf); 784e66f709bSChris Mason if (btrfs_header_nritems(&mid->header) <= orig_slot) { 785e66f709bSChris Mason path->nodes[level] = right_buf; 786e66f709bSChris Mason path->slots[level + 1] += 1; 787e66f709bSChris Mason path->slots[level] = orig_slot - 788e66f709bSChris Mason btrfs_header_nritems(&mid->header); 789e66f709bSChris Mason btrfs_block_release(root, mid_buf); 790e66f709bSChris Mason } else { 791e66f709bSChris Mason btrfs_block_release(root, right_buf); 792e66f709bSChris Mason } 793e66f709bSChris Mason check_node(root, path, level); 794e66f709bSChris Mason return 0; 795e66f709bSChris Mason } 796e66f709bSChris Mason btrfs_block_release(root, right_buf); 797e66f709bSChris Mason } 798e66f709bSChris Mason check_node(root, path, level); 799e66f709bSChris Mason return 1; 800e66f709bSChris Mason } 801e66f709bSChris Mason 80274123bd7SChris Mason /* 8033c69faecSChris Mason * readahead one full node of leaves 8043c69faecSChris Mason */ 8053c69faecSChris Mason static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path, 8066702ed49SChris Mason int level, int slot) 8073c69faecSChris Mason { 8083c69faecSChris Mason struct btrfs_node *node; 8093c69faecSChris Mason int i; 8103c69faecSChris Mason u32 nritems; 8113c69faecSChris Mason u64 item_objectid; 8123c69faecSChris Mason u64 blocknr; 8133c69faecSChris Mason u64 search; 8143c69faecSChris Mason u64 cluster_start; 8153c69faecSChris Mason int ret; 8163c69faecSChris Mason int nread = 0; 8173c69faecSChris Mason int direction = path->reada; 8183c69faecSChris Mason struct radix_tree_root found; 8193c69faecSChris Mason unsigned long gang[8]; 8203c69faecSChris Mason struct buffer_head *bh; 8213c69faecSChris Mason 8226702ed49SChris Mason if (level == 0) 8233c69faecSChris Mason return; 8243c69faecSChris Mason 8256702ed49SChris Mason if (!path->nodes[level]) 8266702ed49SChris Mason return; 8276702ed49SChris Mason 8286702ed49SChris Mason node = btrfs_buffer_node(path->nodes[level]); 8293c69faecSChris Mason search = btrfs_node_blockptr(node, slot); 8303c69faecSChris Mason bh = btrfs_find_tree_block(root, search); 8313c69faecSChris Mason if (bh) { 8323c69faecSChris Mason brelse(bh); 8333c69faecSChris Mason return; 8343c69faecSChris Mason } 8353c69faecSChris Mason 8363c69faecSChris Mason init_bit_radix(&found); 8373c69faecSChris Mason nritems = btrfs_header_nritems(&node->header); 8383c69faecSChris Mason for (i = slot; i < nritems; i++) { 8393c69faecSChris Mason item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key); 8403c69faecSChris Mason blocknr = btrfs_node_blockptr(node, i); 8413c69faecSChris Mason set_radix_bit(&found, blocknr); 8423c69faecSChris Mason } 8433c69faecSChris Mason if (direction > 0) { 8443c69faecSChris Mason cluster_start = search - 4; 8453c69faecSChris Mason if (cluster_start > search) 8463c69faecSChris Mason cluster_start = 0; 8473c69faecSChris Mason } else 8483c69faecSChris Mason cluster_start = search + 4; 8493c69faecSChris Mason while(1) { 8503c69faecSChris Mason ret = find_first_radix_bit(&found, gang, 0, ARRAY_SIZE(gang)); 8513c69faecSChris Mason if (!ret) 8523c69faecSChris Mason break; 8533c69faecSChris Mason for (i = 0; i < ret; i++) { 8543c69faecSChris Mason blocknr = gang[i]; 8553c69faecSChris Mason clear_radix_bit(&found, blocknr); 856*2cc58cf2SChris Mason if (path->reada == 1 && nread > 16) 8573c69faecSChris Mason continue; 858f2183bdeSChris Mason if (close_blocks(cluster_start, blocknr)) { 8593c69faecSChris Mason readahead_tree_block(root, blocknr); 8603c69faecSChris Mason nread++; 8613c69faecSChris Mason cluster_start = blocknr; 8623c69faecSChris Mason } 8633c69faecSChris Mason } 8643c69faecSChris Mason } 8653c69faecSChris Mason } 8663c69faecSChris Mason /* 86774123bd7SChris Mason * look for key in the tree. path is filled in with nodes along the way 86874123bd7SChris Mason * if key is found, we return zero and you can find the item in the leaf 86974123bd7SChris Mason * level of the path (level 0) 87074123bd7SChris Mason * 87174123bd7SChris Mason * If the key isn't found, the path points to the slot where it should 872aa5d6bedSChris Mason * be inserted, and 1 is returned. If there are other errors during the 873aa5d6bedSChris Mason * search a negative error number is returned. 87497571fd0SChris Mason * 87597571fd0SChris Mason * if ins_len > 0, nodes and leaves will be split as we walk down the 87697571fd0SChris Mason * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 87797571fd0SChris Mason * possible) 87874123bd7SChris Mason */ 879e089f05cSChris Mason int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 880e089f05cSChris Mason *root, struct btrfs_key *key, struct btrfs_path *p, int 881e089f05cSChris Mason ins_len, int cow) 882be0e5c09SChris Mason { 883e20d96d6SChris Mason struct buffer_head *b; 884e20d96d6SChris Mason struct buffer_head *cow_buf; 885234b63a0SChris Mason struct btrfs_node *c; 8863c69faecSChris Mason u64 blocknr; 887be0e5c09SChris Mason int slot; 888be0e5c09SChris Mason int ret; 889be0e5c09SChris Mason int level; 8903c69faecSChris Mason int should_reada = p->reada; 8919f3a7427SChris Mason u8 lowest_level = 0; 8929f3a7427SChris Mason 8936702ed49SChris Mason lowest_level = p->lowest_level; 8946702ed49SChris Mason WARN_ON(lowest_level && ins_len); 89522b0ebdaSChris Mason WARN_ON(p->nodes[0] != NULL); 89622b0ebdaSChris Mason WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); 897bb803951SChris Mason again: 898bb803951SChris Mason b = root->node; 899e20d96d6SChris Mason get_bh(b); 900eb60ceacSChris Mason while (b) { 901e20d96d6SChris Mason c = btrfs_buffer_node(b); 902e20d96d6SChris Mason level = btrfs_header_level(&c->header); 90302217ed2SChris Mason if (cow) { 90402217ed2SChris Mason int wret; 905e20d96d6SChris Mason wret = btrfs_cow_block(trans, root, b, 906e20d96d6SChris Mason p->nodes[level + 1], 907e20d96d6SChris Mason p->slots[level + 1], 908e089f05cSChris Mason &cow_buf); 90954aa1f4dSChris Mason if (wret) { 91054aa1f4dSChris Mason btrfs_block_release(root, cow_buf); 91154aa1f4dSChris Mason return wret; 91254aa1f4dSChris Mason } 91302217ed2SChris Mason b = cow_buf; 9142c90e5d6SChris Mason c = btrfs_buffer_node(b); 91502217ed2SChris Mason } 91602217ed2SChris Mason BUG_ON(!cow && ins_len); 9172c90e5d6SChris Mason if (level != btrfs_header_level(&c->header)) 9182c90e5d6SChris Mason WARN_ON(1); 9192c90e5d6SChris Mason level = btrfs_header_level(&c->header); 920eb60ceacSChris Mason p->nodes[level] = b; 921123abc88SChris Mason ret = check_block(root, p, level); 922aa5d6bedSChris Mason if (ret) 923aa5d6bedSChris Mason return -1; 924be0e5c09SChris Mason ret = bin_search(c, key, &slot); 9257518a238SChris Mason if (!btrfs_is_leaf(c)) { 926be0e5c09SChris Mason if (ret && slot > 0) 927be0e5c09SChris Mason slot -= 1; 928be0e5c09SChris Mason p->slots[level] = slot; 929d4dbff95SChris Mason if (ins_len > 0 && btrfs_header_nritems(&c->header) >= 930d4dbff95SChris Mason BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 931e089f05cSChris Mason int sret = split_node(trans, root, p, level); 9325c680ed6SChris Mason BUG_ON(sret > 0); 9335c680ed6SChris Mason if (sret) 9345c680ed6SChris Mason return sret; 9355c680ed6SChris Mason b = p->nodes[level]; 936e20d96d6SChris Mason c = btrfs_buffer_node(b); 9375c680ed6SChris Mason slot = p->slots[level]; 938bb803951SChris Mason } else if (ins_len < 0) { 939e089f05cSChris Mason int sret = balance_level(trans, root, p, 940e089f05cSChris Mason level); 941bb803951SChris Mason if (sret) 942bb803951SChris Mason return sret; 943bb803951SChris Mason b = p->nodes[level]; 944bb803951SChris Mason if (!b) 945bb803951SChris Mason goto again; 946e20d96d6SChris Mason c = btrfs_buffer_node(b); 947bb803951SChris Mason slot = p->slots[level]; 9487518a238SChris Mason BUG_ON(btrfs_header_nritems(&c->header) == 1); 9495c680ed6SChris Mason } 9509f3a7427SChris Mason /* this is only true while dropping a snapshot */ 9519f3a7427SChris Mason if (level == lowest_level) 9529f3a7427SChris Mason break; 9533c69faecSChris Mason blocknr = btrfs_node_blockptr(c, slot); 9546702ed49SChris Mason if (should_reada) 9556702ed49SChris Mason reada_for_search(root, p, level, slot); 9561d4f8a0cSChris Mason b = read_tree_block(root, btrfs_node_blockptr(c, slot)); 9573c69faecSChris Mason 958be0e5c09SChris Mason } else { 959234b63a0SChris Mason struct btrfs_leaf *l = (struct btrfs_leaf *)c; 960be0e5c09SChris Mason p->slots[level] = slot; 961123abc88SChris Mason if (ins_len > 0 && btrfs_leaf_free_space(root, l) < 9620783fcfcSChris Mason sizeof(struct btrfs_item) + ins_len) { 963d4dbff95SChris Mason int sret = split_leaf(trans, root, key, 964d4dbff95SChris Mason p, ins_len); 9655c680ed6SChris Mason BUG_ON(sret > 0); 9665c680ed6SChris Mason if (sret) 9675c680ed6SChris Mason return sret; 9685c680ed6SChris Mason } 969be0e5c09SChris Mason return ret; 970be0e5c09SChris Mason } 971be0e5c09SChris Mason } 972aa5d6bedSChris Mason return 1; 973be0e5c09SChris Mason } 974be0e5c09SChris Mason 97574123bd7SChris Mason /* 97674123bd7SChris Mason * adjust the pointers going up the tree, starting at level 97774123bd7SChris Mason * making sure the right key of each node is points to 'key'. 97874123bd7SChris Mason * This is used after shifting pointers to the left, so it stops 97974123bd7SChris Mason * fixing up pointers when a given leaf/node is not in slot 0 of the 98074123bd7SChris Mason * higher levels 981aa5d6bedSChris Mason * 982aa5d6bedSChris Mason * If this fails to write a tree block, it returns -1, but continues 983aa5d6bedSChris Mason * fixing up the blocks in ram so the tree is consistent. 98474123bd7SChris Mason */ 985e089f05cSChris Mason static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root 986e089f05cSChris Mason *root, struct btrfs_path *path, struct btrfs_disk_key 987e089f05cSChris Mason *key, int level) 988be0e5c09SChris Mason { 989be0e5c09SChris Mason int i; 990aa5d6bedSChris Mason int ret = 0; 991234b63a0SChris Mason for (i = level; i < BTRFS_MAX_LEVEL; i++) { 992234b63a0SChris Mason struct btrfs_node *t; 993be0e5c09SChris Mason int tslot = path->slots[i]; 994eb60ceacSChris Mason if (!path->nodes[i]) 995be0e5c09SChris Mason break; 996e20d96d6SChris Mason t = btrfs_buffer_node(path->nodes[i]); 997d6025579SChris Mason btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); 998d6025579SChris Mason btrfs_mark_buffer_dirty(path->nodes[i]); 999be0e5c09SChris Mason if (tslot != 0) 1000be0e5c09SChris Mason break; 1001be0e5c09SChris Mason } 1002aa5d6bedSChris Mason return ret; 1003be0e5c09SChris Mason } 1004be0e5c09SChris Mason 100574123bd7SChris Mason /* 100674123bd7SChris Mason * try to push data from one node into the next node left in the 100779f95c82SChris Mason * tree. 1008aa5d6bedSChris Mason * 1009aa5d6bedSChris Mason * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 1010aa5d6bedSChris Mason * error, and > 0 if there was no room in the left hand block. 101174123bd7SChris Mason */ 1012e089f05cSChris Mason static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root 1013e20d96d6SChris Mason *root, struct buffer_head *dst_buf, struct 1014e20d96d6SChris Mason buffer_head *src_buf) 1015be0e5c09SChris Mason { 1016e20d96d6SChris Mason struct btrfs_node *src = btrfs_buffer_node(src_buf); 1017e20d96d6SChris Mason struct btrfs_node *dst = btrfs_buffer_node(dst_buf); 1018be0e5c09SChris Mason int push_items = 0; 1019bb803951SChris Mason int src_nritems; 1020bb803951SChris Mason int dst_nritems; 1021aa5d6bedSChris Mason int ret = 0; 1022be0e5c09SChris Mason 10237518a238SChris Mason src_nritems = btrfs_header_nritems(&src->header); 10247518a238SChris Mason dst_nritems = btrfs_header_nritems(&dst->header); 1025123abc88SChris Mason push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 102654aa1f4dSChris Mason 1027eb60ceacSChris Mason if (push_items <= 0) { 1028be0e5c09SChris Mason return 1; 1029eb60ceacSChris Mason } 1030be0e5c09SChris Mason 1031be0e5c09SChris Mason if (src_nritems < push_items) 1032be0e5c09SChris Mason push_items = src_nritems; 103379f95c82SChris Mason 1034d6025579SChris Mason btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, 1035123abc88SChris Mason push_items * sizeof(struct btrfs_key_ptr)); 1036bb803951SChris Mason if (push_items < src_nritems) { 1037d6025579SChris Mason btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, 1038e2fa7227SChris Mason (src_nritems - push_items) * 1039123abc88SChris Mason sizeof(struct btrfs_key_ptr)); 1040bb803951SChris Mason } 10417518a238SChris Mason btrfs_set_header_nritems(&src->header, src_nritems - push_items); 10427518a238SChris Mason btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); 1043d6025579SChris Mason btrfs_mark_buffer_dirty(src_buf); 1044d6025579SChris Mason btrfs_mark_buffer_dirty(dst_buf); 1045bb803951SChris Mason return ret; 1046be0e5c09SChris Mason } 1047be0e5c09SChris Mason 104897571fd0SChris Mason /* 104979f95c82SChris Mason * try to push data from one node into the next node right in the 105079f95c82SChris Mason * tree. 105179f95c82SChris Mason * 105279f95c82SChris Mason * returns 0 if some ptrs were pushed, < 0 if there was some horrible 105379f95c82SChris Mason * error, and > 0 if there was no room in the right hand block. 105479f95c82SChris Mason * 105579f95c82SChris Mason * this will only push up to 1/2 the contents of the left node over 105679f95c82SChris Mason */ 1057e089f05cSChris Mason static int balance_node_right(struct btrfs_trans_handle *trans, struct 1058e20d96d6SChris Mason btrfs_root *root, struct buffer_head *dst_buf, 1059e20d96d6SChris Mason struct buffer_head *src_buf) 106079f95c82SChris Mason { 1061e20d96d6SChris Mason struct btrfs_node *src = btrfs_buffer_node(src_buf); 1062e20d96d6SChris Mason struct btrfs_node *dst = btrfs_buffer_node(dst_buf); 106379f95c82SChris Mason int push_items = 0; 106479f95c82SChris Mason int max_push; 106579f95c82SChris Mason int src_nritems; 106679f95c82SChris Mason int dst_nritems; 106779f95c82SChris Mason int ret = 0; 106879f95c82SChris Mason 10697518a238SChris Mason src_nritems = btrfs_header_nritems(&src->header); 10707518a238SChris Mason dst_nritems = btrfs_header_nritems(&dst->header); 1071123abc88SChris Mason push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 107279f95c82SChris Mason if (push_items <= 0) { 107379f95c82SChris Mason return 1; 107479f95c82SChris Mason } 107579f95c82SChris Mason 107679f95c82SChris Mason max_push = src_nritems / 2 + 1; 107779f95c82SChris Mason /* don't try to empty the node */ 107879f95c82SChris Mason if (max_push > src_nritems) 107979f95c82SChris Mason return 1; 108079f95c82SChris Mason if (max_push < push_items) 108179f95c82SChris Mason push_items = max_push; 108279f95c82SChris Mason 1083d6025579SChris Mason btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, 1084123abc88SChris Mason dst_nritems * sizeof(struct btrfs_key_ptr)); 1085d6025579SChris Mason 1086d6025579SChris Mason btrfs_memcpy(root, dst, dst->ptrs, 1087d6025579SChris Mason src->ptrs + src_nritems - push_items, 1088123abc88SChris Mason push_items * sizeof(struct btrfs_key_ptr)); 108979f95c82SChris Mason 10907518a238SChris Mason btrfs_set_header_nritems(&src->header, src_nritems - push_items); 10917518a238SChris Mason btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); 109279f95c82SChris Mason 1093d6025579SChris Mason btrfs_mark_buffer_dirty(src_buf); 1094d6025579SChris Mason btrfs_mark_buffer_dirty(dst_buf); 109579f95c82SChris Mason return ret; 109679f95c82SChris Mason } 109779f95c82SChris Mason 109879f95c82SChris Mason /* 109997571fd0SChris Mason * helper function to insert a new root level in the tree. 110097571fd0SChris Mason * A new node is allocated, and a single item is inserted to 110197571fd0SChris Mason * point to the existing root 1102aa5d6bedSChris Mason * 1103aa5d6bedSChris Mason * returns zero on success or < 0 on failure. 110497571fd0SChris Mason */ 1105e089f05cSChris Mason static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root 1106e089f05cSChris Mason *root, struct btrfs_path *path, int level) 110774123bd7SChris Mason { 1108e20d96d6SChris Mason struct buffer_head *t; 1109234b63a0SChris Mason struct btrfs_node *lower; 1110234b63a0SChris Mason struct btrfs_node *c; 1111e2fa7227SChris Mason struct btrfs_disk_key *lower_key; 11125c680ed6SChris Mason 11135c680ed6SChris Mason BUG_ON(path->nodes[level]); 11145c680ed6SChris Mason BUG_ON(path->nodes[level-1] != root->node); 11155c680ed6SChris Mason 11166702ed49SChris Mason t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0); 111754aa1f4dSChris Mason if (IS_ERR(t)) 111854aa1f4dSChris Mason return PTR_ERR(t); 1119e20d96d6SChris Mason c = btrfs_buffer_node(t); 1120123abc88SChris Mason memset(c, 0, root->blocksize); 11217518a238SChris Mason btrfs_set_header_nritems(&c->header, 1); 11227518a238SChris Mason btrfs_set_header_level(&c->header, level); 11237eccb903SChris Mason btrfs_set_header_blocknr(&c->header, bh_blocknr(t)); 11247f5c1516SChris Mason btrfs_set_header_generation(&c->header, trans->transid); 11254d775673SChris Mason btrfs_set_header_owner(&c->header, root->root_key.objectid); 1126e20d96d6SChris Mason lower = btrfs_buffer_node(path->nodes[level-1]); 11273eb0314dSChris Mason memcpy(c->header.fsid, root->fs_info->disk_super->fsid, 11283eb0314dSChris Mason sizeof(c->header.fsid)); 11297518a238SChris Mason if (btrfs_is_leaf(lower)) 1130234b63a0SChris Mason lower_key = &((struct btrfs_leaf *)lower)->items[0].key; 113174123bd7SChris Mason else 1132123abc88SChris Mason lower_key = &lower->ptrs[0].key; 1133d6025579SChris Mason btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, 1134d6025579SChris Mason sizeof(struct btrfs_disk_key)); 11357eccb903SChris Mason btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1])); 1136d5719762SChris Mason 1137d6025579SChris Mason btrfs_mark_buffer_dirty(t); 1138d5719762SChris Mason 1139cfaa7295SChris Mason /* the super has an extra ref to root->node */ 1140234b63a0SChris Mason btrfs_block_release(root, root->node); 114174123bd7SChris Mason root->node = t; 1142e20d96d6SChris Mason get_bh(t); 114374123bd7SChris Mason path->nodes[level] = t; 114474123bd7SChris Mason path->slots[level] = 0; 114574123bd7SChris Mason return 0; 114674123bd7SChris Mason } 11475c680ed6SChris Mason 11485c680ed6SChris Mason /* 11495c680ed6SChris Mason * worker function to insert a single pointer in a node. 11505c680ed6SChris Mason * the node should have enough room for the pointer already 115197571fd0SChris Mason * 11525c680ed6SChris Mason * slot and level indicate where you want the key to go, and 11535c680ed6SChris Mason * blocknr is the block the key points to. 1154aa5d6bedSChris Mason * 1155aa5d6bedSChris Mason * returns zero on success and < 0 on any error 11565c680ed6SChris Mason */ 1157e089f05cSChris Mason static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 1158e089f05cSChris Mason *root, struct btrfs_path *path, struct btrfs_disk_key 1159e089f05cSChris Mason *key, u64 blocknr, int slot, int level) 11605c680ed6SChris Mason { 1161234b63a0SChris Mason struct btrfs_node *lower; 11625c680ed6SChris Mason int nritems; 11635c680ed6SChris Mason 11645c680ed6SChris Mason BUG_ON(!path->nodes[level]); 1165e20d96d6SChris Mason lower = btrfs_buffer_node(path->nodes[level]); 11667518a238SChris Mason nritems = btrfs_header_nritems(&lower->header); 116774123bd7SChris Mason if (slot > nritems) 116874123bd7SChris Mason BUG(); 1169123abc88SChris Mason if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 117074123bd7SChris Mason BUG(); 117174123bd7SChris Mason if (slot != nritems) { 1172d6025579SChris Mason btrfs_memmove(root, lower, lower->ptrs + slot + 1, 1173d6025579SChris Mason lower->ptrs + slot, 1174123abc88SChris Mason (nritems - slot) * sizeof(struct btrfs_key_ptr)); 117574123bd7SChris Mason } 1176d6025579SChris Mason btrfs_memcpy(root, lower, &lower->ptrs[slot].key, 1177d6025579SChris Mason key, sizeof(struct btrfs_disk_key)); 11781d4f8a0cSChris Mason btrfs_set_node_blockptr(lower, slot, blocknr); 11797518a238SChris Mason btrfs_set_header_nritems(&lower->header, nritems + 1); 1180d6025579SChris Mason btrfs_mark_buffer_dirty(path->nodes[level]); 1181098f59c2SChris Mason check_node(root, path, level); 118274123bd7SChris Mason return 0; 118374123bd7SChris Mason } 118474123bd7SChris Mason 118597571fd0SChris Mason /* 118697571fd0SChris Mason * split the node at the specified level in path in two. 118797571fd0SChris Mason * The path is corrected to point to the appropriate node after the split 118897571fd0SChris Mason * 118997571fd0SChris Mason * Before splitting this tries to make some room in the node by pushing 119097571fd0SChris Mason * left and right, if either one works, it returns right away. 1191aa5d6bedSChris Mason * 1192aa5d6bedSChris Mason * returns 0 on success and < 0 on failure 119397571fd0SChris Mason */ 1194e089f05cSChris Mason static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 1195e089f05cSChris Mason *root, struct btrfs_path *path, int level) 1196be0e5c09SChris Mason { 1197e20d96d6SChris Mason struct buffer_head *t; 1198234b63a0SChris Mason struct btrfs_node *c; 1199e20d96d6SChris Mason struct buffer_head *split_buffer; 1200234b63a0SChris Mason struct btrfs_node *split; 1201be0e5c09SChris Mason int mid; 12025c680ed6SChris Mason int ret; 1203aa5d6bedSChris Mason int wret; 12047518a238SChris Mason u32 c_nritems; 1205be0e5c09SChris Mason 12065c680ed6SChris Mason t = path->nodes[level]; 1207e20d96d6SChris Mason c = btrfs_buffer_node(t); 12085c680ed6SChris Mason if (t == root->node) { 12095c680ed6SChris Mason /* trying to split the root, lets make a new one */ 1210e089f05cSChris Mason ret = insert_new_root(trans, root, path, level + 1); 12115c680ed6SChris Mason if (ret) 12125c680ed6SChris Mason return ret; 1213e66f709bSChris Mason } else { 1214e66f709bSChris Mason ret = push_nodes_for_insert(trans, root, path, level); 1215e66f709bSChris Mason t = path->nodes[level]; 1216e66f709bSChris Mason c = btrfs_buffer_node(t); 1217e66f709bSChris Mason if (!ret && 1218e66f709bSChris Mason btrfs_header_nritems(&c->header) < 1219e66f709bSChris Mason BTRFS_NODEPTRS_PER_BLOCK(root) - 1) 1220e66f709bSChris Mason return 0; 122154aa1f4dSChris Mason if (ret < 0) 122254aa1f4dSChris Mason return ret; 12235c680ed6SChris Mason } 1224e66f709bSChris Mason 12257518a238SChris Mason c_nritems = btrfs_header_nritems(&c->header); 12266702ed49SChris Mason split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0); 122754aa1f4dSChris Mason if (IS_ERR(split_buffer)) 122854aa1f4dSChris Mason return PTR_ERR(split_buffer); 122954aa1f4dSChris Mason 1230e20d96d6SChris Mason split = btrfs_buffer_node(split_buffer); 12317518a238SChris Mason btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); 12329a6f11edSChris Mason btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); 12337eccb903SChris Mason btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer)); 12347f5c1516SChris Mason btrfs_set_header_generation(&split->header, trans->transid); 12354d775673SChris Mason btrfs_set_header_owner(&split->header, root->root_key.objectid); 12363eb0314dSChris Mason memcpy(split->header.fsid, root->fs_info->disk_super->fsid, 12373eb0314dSChris Mason sizeof(split->header.fsid)); 12387518a238SChris Mason mid = (c_nritems + 1) / 2; 1239d6025579SChris Mason btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, 1240123abc88SChris Mason (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 12417518a238SChris Mason btrfs_set_header_nritems(&split->header, c_nritems - mid); 12427518a238SChris Mason btrfs_set_header_nritems(&c->header, mid); 1243aa5d6bedSChris Mason ret = 0; 1244aa5d6bedSChris Mason 1245d6025579SChris Mason btrfs_mark_buffer_dirty(t); 1246d6025579SChris Mason btrfs_mark_buffer_dirty(split_buffer); 1247e089f05cSChris Mason wret = insert_ptr(trans, root, path, &split->ptrs[0].key, 12487eccb903SChris Mason bh_blocknr(split_buffer), path->slots[level + 1] + 1, 1249123abc88SChris Mason level + 1); 1250aa5d6bedSChris Mason if (wret) 1251aa5d6bedSChris Mason ret = wret; 1252aa5d6bedSChris Mason 12535de08d7dSChris Mason if (path->slots[level] >= mid) { 12545c680ed6SChris Mason path->slots[level] -= mid; 1255234b63a0SChris Mason btrfs_block_release(root, t); 12565c680ed6SChris Mason path->nodes[level] = split_buffer; 12575c680ed6SChris Mason path->slots[level + 1] += 1; 1258eb60ceacSChris Mason } else { 1259234b63a0SChris Mason btrfs_block_release(root, split_buffer); 1260be0e5c09SChris Mason } 1261aa5d6bedSChris Mason return ret; 1262be0e5c09SChris Mason } 1263be0e5c09SChris Mason 126474123bd7SChris Mason /* 126574123bd7SChris Mason * how many bytes are required to store the items in a leaf. start 126674123bd7SChris Mason * and nr indicate which items in the leaf to check. This totals up the 126774123bd7SChris Mason * space used both by the item structs and the item data 126874123bd7SChris Mason */ 1269234b63a0SChris Mason static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) 1270be0e5c09SChris Mason { 1271be0e5c09SChris Mason int data_len; 1272d4dbff95SChris Mason int nritems = btrfs_header_nritems(&l->header); 1273d4dbff95SChris Mason int end = min(nritems, start + nr) - 1; 1274be0e5c09SChris Mason 1275be0e5c09SChris Mason if (!nr) 1276be0e5c09SChris Mason return 0; 12770783fcfcSChris Mason data_len = btrfs_item_end(l->items + start); 12780783fcfcSChris Mason data_len = data_len - btrfs_item_offset(l->items + end); 12790783fcfcSChris Mason data_len += sizeof(struct btrfs_item) * nr; 1280d4dbff95SChris Mason WARN_ON(data_len < 0); 1281be0e5c09SChris Mason return data_len; 1282be0e5c09SChris Mason } 1283be0e5c09SChris Mason 128474123bd7SChris Mason /* 1285d4dbff95SChris Mason * The space between the end of the leaf items and 1286d4dbff95SChris Mason * the start of the leaf data. IOW, how much room 1287d4dbff95SChris Mason * the leaf has left for both items and data 1288d4dbff95SChris Mason */ 1289d4dbff95SChris Mason int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) 1290d4dbff95SChris Mason { 1291d4dbff95SChris Mason int nritems = btrfs_header_nritems(&leaf->header); 1292d4dbff95SChris Mason return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 1293d4dbff95SChris Mason } 1294d4dbff95SChris Mason 1295d4dbff95SChris Mason /* 129600ec4c51SChris Mason * push some data in the path leaf to the right, trying to free up at 129700ec4c51SChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 1298aa5d6bedSChris Mason * 1299aa5d6bedSChris Mason * returns 1 if the push failed because the other node didn't have enough 1300aa5d6bedSChris Mason * room, 0 if everything worked out and < 0 if there were major errors. 130100ec4c51SChris Mason */ 1302e089f05cSChris Mason static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 1303e089f05cSChris Mason *root, struct btrfs_path *path, int data_size) 130400ec4c51SChris Mason { 1305e20d96d6SChris Mason struct buffer_head *left_buf = path->nodes[0]; 1306e20d96d6SChris Mason struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); 1307234b63a0SChris Mason struct btrfs_leaf *right; 1308e20d96d6SChris Mason struct buffer_head *right_buf; 1309e20d96d6SChris Mason struct buffer_head *upper; 1310e20d96d6SChris Mason struct btrfs_node *upper_node; 131100ec4c51SChris Mason int slot; 131200ec4c51SChris Mason int i; 131300ec4c51SChris Mason int free_space; 131400ec4c51SChris Mason int push_space = 0; 131500ec4c51SChris Mason int push_items = 0; 13160783fcfcSChris Mason struct btrfs_item *item; 13177518a238SChris Mason u32 left_nritems; 13187518a238SChris Mason u32 right_nritems; 131954aa1f4dSChris Mason int ret; 132000ec4c51SChris Mason 132100ec4c51SChris Mason slot = path->slots[1]; 132200ec4c51SChris Mason if (!path->nodes[1]) { 132300ec4c51SChris Mason return 1; 132400ec4c51SChris Mason } 132500ec4c51SChris Mason upper = path->nodes[1]; 1326e20d96d6SChris Mason upper_node = btrfs_buffer_node(upper); 1327e20d96d6SChris Mason if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { 132800ec4c51SChris Mason return 1; 132900ec4c51SChris Mason } 1330e20d96d6SChris Mason right_buf = read_tree_block(root, 1331e20d96d6SChris Mason btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); 1332e20d96d6SChris Mason right = btrfs_buffer_leaf(right_buf); 1333123abc88SChris Mason free_space = btrfs_leaf_free_space(root, right); 13340783fcfcSChris Mason if (free_space < data_size + sizeof(struct btrfs_item)) { 1335234b63a0SChris Mason btrfs_block_release(root, right_buf); 133600ec4c51SChris Mason return 1; 133700ec4c51SChris Mason } 133802217ed2SChris Mason /* cow and double check */ 133954aa1f4dSChris Mason ret = btrfs_cow_block(trans, root, right_buf, upper, 134054aa1f4dSChris Mason slot + 1, &right_buf); 134154aa1f4dSChris Mason if (ret) { 134254aa1f4dSChris Mason btrfs_block_release(root, right_buf); 134354aa1f4dSChris Mason return 1; 134454aa1f4dSChris Mason } 1345e20d96d6SChris Mason right = btrfs_buffer_leaf(right_buf); 1346123abc88SChris Mason free_space = btrfs_leaf_free_space(root, right); 13470783fcfcSChris Mason if (free_space < data_size + sizeof(struct btrfs_item)) { 1348234b63a0SChris Mason btrfs_block_release(root, right_buf); 134902217ed2SChris Mason return 1; 135002217ed2SChris Mason } 135102217ed2SChris Mason 13527518a238SChris Mason left_nritems = btrfs_header_nritems(&left->header); 1353a429e513SChris Mason if (left_nritems == 0) { 1354a429e513SChris Mason btrfs_block_release(root, right_buf); 1355a429e513SChris Mason return 1; 1356a429e513SChris Mason } 1357a429e513SChris Mason for (i = left_nritems - 1; i >= 1; i--) { 135800ec4c51SChris Mason item = left->items + i; 135900ec4c51SChris Mason if (path->slots[0] == i) 136000ec4c51SChris Mason push_space += data_size + sizeof(*item); 13610783fcfcSChris Mason if (btrfs_item_size(item) + sizeof(*item) + push_space > 13620783fcfcSChris Mason free_space) 136300ec4c51SChris Mason break; 136400ec4c51SChris Mason push_items++; 13650783fcfcSChris Mason push_space += btrfs_item_size(item) + sizeof(*item); 136600ec4c51SChris Mason } 136700ec4c51SChris Mason if (push_items == 0) { 1368234b63a0SChris Mason btrfs_block_release(root, right_buf); 136900ec4c51SChris Mason return 1; 137000ec4c51SChris Mason } 1371a429e513SChris Mason if (push_items == left_nritems) 1372a429e513SChris Mason WARN_ON(1); 13737518a238SChris Mason right_nritems = btrfs_header_nritems(&right->header); 137400ec4c51SChris Mason /* push left to right */ 13750783fcfcSChris Mason push_space = btrfs_item_end(left->items + left_nritems - push_items); 1376123abc88SChris Mason push_space -= leaf_data_end(root, left); 137700ec4c51SChris Mason /* make room in the right data area */ 1378d6025579SChris Mason btrfs_memmove(root, right, btrfs_leaf_data(right) + 1379d6025579SChris Mason leaf_data_end(root, right) - push_space, 1380d6025579SChris Mason btrfs_leaf_data(right) + 1381d6025579SChris Mason leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - 1382d6025579SChris Mason leaf_data_end(root, right)); 138300ec4c51SChris Mason /* copy from the left data area */ 1384d6025579SChris Mason btrfs_memcpy(root, right, btrfs_leaf_data(right) + 1385d6025579SChris Mason BTRFS_LEAF_DATA_SIZE(root) - push_space, 1386d6025579SChris Mason btrfs_leaf_data(left) + leaf_data_end(root, left), 1387d6025579SChris Mason push_space); 1388d6025579SChris Mason btrfs_memmove(root, right, right->items + push_items, right->items, 13890783fcfcSChris Mason right_nritems * sizeof(struct btrfs_item)); 139000ec4c51SChris Mason /* copy the items from left to right */ 1391d6025579SChris Mason btrfs_memcpy(root, right, right->items, left->items + 1392d6025579SChris Mason left_nritems - push_items, 13930783fcfcSChris Mason push_items * sizeof(struct btrfs_item)); 139400ec4c51SChris Mason 139500ec4c51SChris Mason /* update the item pointers */ 13967518a238SChris Mason right_nritems += push_items; 13977518a238SChris Mason btrfs_set_header_nritems(&right->header, right_nritems); 1398123abc88SChris Mason push_space = BTRFS_LEAF_DATA_SIZE(root); 13997518a238SChris Mason for (i = 0; i < right_nritems; i++) { 14000783fcfcSChris Mason btrfs_set_item_offset(right->items + i, push_space - 14010783fcfcSChris Mason btrfs_item_size(right->items + i)); 14020783fcfcSChris Mason push_space = btrfs_item_offset(right->items + i); 140300ec4c51SChris Mason } 14047518a238SChris Mason left_nritems -= push_items; 14057518a238SChris Mason btrfs_set_header_nritems(&left->header, left_nritems); 140600ec4c51SChris Mason 1407d6025579SChris Mason btrfs_mark_buffer_dirty(left_buf); 1408d6025579SChris Mason btrfs_mark_buffer_dirty(right_buf); 1409a429e513SChris Mason 1410d6025579SChris Mason btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, 1411e2fa7227SChris Mason &right->items[0].key, sizeof(struct btrfs_disk_key)); 1412d6025579SChris Mason btrfs_mark_buffer_dirty(upper); 141302217ed2SChris Mason 141400ec4c51SChris Mason /* then fixup the leaf pointer in the path */ 14157518a238SChris Mason if (path->slots[0] >= left_nritems) { 14167518a238SChris Mason path->slots[0] -= left_nritems; 1417234b63a0SChris Mason btrfs_block_release(root, path->nodes[0]); 141800ec4c51SChris Mason path->nodes[0] = right_buf; 141900ec4c51SChris Mason path->slots[1] += 1; 142000ec4c51SChris Mason } else { 1421234b63a0SChris Mason btrfs_block_release(root, right_buf); 142200ec4c51SChris Mason } 1423098f59c2SChris Mason if (path->nodes[1]) 1424098f59c2SChris Mason check_node(root, path, 1); 142500ec4c51SChris Mason return 0; 142600ec4c51SChris Mason } 142700ec4c51SChris Mason /* 142874123bd7SChris Mason * push some data in the path leaf to the left, trying to free up at 142974123bd7SChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 143074123bd7SChris Mason */ 1431e089f05cSChris Mason static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 1432e089f05cSChris Mason *root, struct btrfs_path *path, int data_size) 1433be0e5c09SChris Mason { 1434e20d96d6SChris Mason struct buffer_head *right_buf = path->nodes[0]; 1435e20d96d6SChris Mason struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); 1436e20d96d6SChris Mason struct buffer_head *t; 1437234b63a0SChris Mason struct btrfs_leaf *left; 1438be0e5c09SChris Mason int slot; 1439be0e5c09SChris Mason int i; 1440be0e5c09SChris Mason int free_space; 1441be0e5c09SChris Mason int push_space = 0; 1442be0e5c09SChris Mason int push_items = 0; 14430783fcfcSChris Mason struct btrfs_item *item; 14447518a238SChris Mason u32 old_left_nritems; 1445aa5d6bedSChris Mason int ret = 0; 1446aa5d6bedSChris Mason int wret; 1447be0e5c09SChris Mason 1448be0e5c09SChris Mason slot = path->slots[1]; 1449be0e5c09SChris Mason if (slot == 0) { 1450be0e5c09SChris Mason return 1; 1451be0e5c09SChris Mason } 1452be0e5c09SChris Mason if (!path->nodes[1]) { 1453be0e5c09SChris Mason return 1; 1454be0e5c09SChris Mason } 1455e20d96d6SChris Mason t = read_tree_block(root, 1456e20d96d6SChris Mason btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); 1457e20d96d6SChris Mason left = btrfs_buffer_leaf(t); 1458123abc88SChris Mason free_space = btrfs_leaf_free_space(root, left); 14590783fcfcSChris Mason if (free_space < data_size + sizeof(struct btrfs_item)) { 1460234b63a0SChris Mason btrfs_block_release(root, t); 1461be0e5c09SChris Mason return 1; 1462be0e5c09SChris Mason } 146302217ed2SChris Mason 146402217ed2SChris Mason /* cow and double check */ 146554aa1f4dSChris Mason ret = btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); 146654aa1f4dSChris Mason if (ret) { 146754aa1f4dSChris Mason /* we hit -ENOSPC, but it isn't fatal here */ 146854aa1f4dSChris Mason return 1; 146954aa1f4dSChris Mason } 1470e20d96d6SChris Mason left = btrfs_buffer_leaf(t); 1471123abc88SChris Mason free_space = btrfs_leaf_free_space(root, left); 14720783fcfcSChris Mason if (free_space < data_size + sizeof(struct btrfs_item)) { 1473234b63a0SChris Mason btrfs_block_release(root, t); 147402217ed2SChris Mason return 1; 147502217ed2SChris Mason } 147602217ed2SChris Mason 1477a429e513SChris Mason if (btrfs_header_nritems(&right->header) == 0) { 1478a429e513SChris Mason btrfs_block_release(root, t); 1479a429e513SChris Mason return 1; 1480a429e513SChris Mason } 1481a429e513SChris Mason 1482a429e513SChris Mason for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) { 1483be0e5c09SChris Mason item = right->items + i; 1484be0e5c09SChris Mason if (path->slots[0] == i) 1485be0e5c09SChris Mason push_space += data_size + sizeof(*item); 14860783fcfcSChris Mason if (btrfs_item_size(item) + sizeof(*item) + push_space > 14870783fcfcSChris Mason free_space) 1488be0e5c09SChris Mason break; 1489be0e5c09SChris Mason push_items++; 14900783fcfcSChris Mason push_space += btrfs_item_size(item) + sizeof(*item); 1491be0e5c09SChris Mason } 1492be0e5c09SChris Mason if (push_items == 0) { 1493234b63a0SChris Mason btrfs_block_release(root, t); 1494be0e5c09SChris Mason return 1; 1495be0e5c09SChris Mason } 1496a429e513SChris Mason if (push_items == btrfs_header_nritems(&right->header)) 1497a429e513SChris Mason WARN_ON(1); 1498be0e5c09SChris Mason /* push data from right to left */ 1499d6025579SChris Mason btrfs_memcpy(root, left, left->items + 1500d6025579SChris Mason btrfs_header_nritems(&left->header), 15010783fcfcSChris Mason right->items, push_items * sizeof(struct btrfs_item)); 1502123abc88SChris Mason push_space = BTRFS_LEAF_DATA_SIZE(root) - 15030783fcfcSChris Mason btrfs_item_offset(right->items + push_items -1); 1504d6025579SChris Mason btrfs_memcpy(root, left, btrfs_leaf_data(left) + 1505d6025579SChris Mason leaf_data_end(root, left) - push_space, 1506123abc88SChris Mason btrfs_leaf_data(right) + 1507123abc88SChris Mason btrfs_item_offset(right->items + push_items - 1), 1508be0e5c09SChris Mason push_space); 15097518a238SChris Mason old_left_nritems = btrfs_header_nritems(&left->header); 1510eb60ceacSChris Mason BUG_ON(old_left_nritems < 0); 1511eb60ceacSChris Mason 1512be0e5c09SChris Mason for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 1513123abc88SChris Mason u32 ioff = btrfs_item_offset(left->items + i); 1514123abc88SChris Mason btrfs_set_item_offset(left->items + i, ioff - 1515123abc88SChris Mason (BTRFS_LEAF_DATA_SIZE(root) - 15160783fcfcSChris Mason btrfs_item_offset(left->items + 15170783fcfcSChris Mason old_left_nritems - 1))); 1518be0e5c09SChris Mason } 15197518a238SChris Mason btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); 1520be0e5c09SChris Mason 1521be0e5c09SChris Mason /* fixup right node */ 15220783fcfcSChris Mason push_space = btrfs_item_offset(right->items + push_items - 1) - 1523123abc88SChris Mason leaf_data_end(root, right); 1524d6025579SChris Mason btrfs_memmove(root, right, btrfs_leaf_data(right) + 1525d6025579SChris Mason BTRFS_LEAF_DATA_SIZE(root) - push_space, 1526d6025579SChris Mason btrfs_leaf_data(right) + 1527123abc88SChris Mason leaf_data_end(root, right), push_space); 1528d6025579SChris Mason btrfs_memmove(root, right, right->items, right->items + push_items, 15297518a238SChris Mason (btrfs_header_nritems(&right->header) - push_items) * 15300783fcfcSChris Mason sizeof(struct btrfs_item)); 15317518a238SChris Mason btrfs_set_header_nritems(&right->header, 15327518a238SChris Mason btrfs_header_nritems(&right->header) - 15337518a238SChris Mason push_items); 1534123abc88SChris Mason push_space = BTRFS_LEAF_DATA_SIZE(root); 1535eb60ceacSChris Mason 15367518a238SChris Mason for (i = 0; i < btrfs_header_nritems(&right->header); i++) { 15370783fcfcSChris Mason btrfs_set_item_offset(right->items + i, push_space - 15380783fcfcSChris Mason btrfs_item_size(right->items + i)); 15390783fcfcSChris Mason push_space = btrfs_item_offset(right->items + i); 1540be0e5c09SChris Mason } 1541eb60ceacSChris Mason 1542d6025579SChris Mason btrfs_mark_buffer_dirty(t); 1543d6025579SChris Mason btrfs_mark_buffer_dirty(right_buf); 1544098f59c2SChris Mason 1545e089f05cSChris Mason wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); 1546aa5d6bedSChris Mason if (wret) 1547aa5d6bedSChris Mason ret = wret; 1548be0e5c09SChris Mason 1549be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 1550be0e5c09SChris Mason if (path->slots[0] < push_items) { 1551be0e5c09SChris Mason path->slots[0] += old_left_nritems; 1552234b63a0SChris Mason btrfs_block_release(root, path->nodes[0]); 1553eb60ceacSChris Mason path->nodes[0] = t; 1554be0e5c09SChris Mason path->slots[1] -= 1; 1555be0e5c09SChris Mason } else { 1556234b63a0SChris Mason btrfs_block_release(root, t); 1557be0e5c09SChris Mason path->slots[0] -= push_items; 1558be0e5c09SChris Mason } 1559eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 1560098f59c2SChris Mason if (path->nodes[1]) 1561098f59c2SChris Mason check_node(root, path, 1); 1562aa5d6bedSChris Mason return ret; 1563be0e5c09SChris Mason } 1564be0e5c09SChris Mason 156574123bd7SChris Mason /* 156674123bd7SChris Mason * split the path's leaf in two, making sure there is at least data_size 156774123bd7SChris Mason * available for the resulting leaf level of the path. 1568aa5d6bedSChris Mason * 1569aa5d6bedSChris Mason * returns 0 if all went well and < 0 on failure. 157074123bd7SChris Mason */ 1571e089f05cSChris Mason static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 1572d4dbff95SChris Mason *root, struct btrfs_key *ins_key, 1573d4dbff95SChris Mason struct btrfs_path *path, int data_size) 1574be0e5c09SChris Mason { 1575e20d96d6SChris Mason struct buffer_head *l_buf; 1576234b63a0SChris Mason struct btrfs_leaf *l; 15777518a238SChris Mason u32 nritems; 1578eb60ceacSChris Mason int mid; 1579eb60ceacSChris Mason int slot; 1580234b63a0SChris Mason struct btrfs_leaf *right; 1581e20d96d6SChris Mason struct buffer_head *right_buffer; 15820783fcfcSChris Mason int space_needed = data_size + sizeof(struct btrfs_item); 1583be0e5c09SChris Mason int data_copy_size; 1584be0e5c09SChris Mason int rt_data_off; 1585be0e5c09SChris Mason int i; 1586d4dbff95SChris Mason int ret = 0; 1587aa5d6bedSChris Mason int wret; 1588d4dbff95SChris Mason int double_split = 0; 1589d4dbff95SChris Mason struct btrfs_disk_key disk_key; 1590be0e5c09SChris Mason 159140689478SChris Mason /* first try to make some room by pushing left and right */ 1592e089f05cSChris Mason wret = push_leaf_left(trans, root, path, data_size); 1593eaee50e8SChris Mason if (wret < 0) 1594eaee50e8SChris Mason return wret; 1595eaee50e8SChris Mason if (wret) { 1596e089f05cSChris Mason wret = push_leaf_right(trans, root, path, data_size); 1597eaee50e8SChris Mason if (wret < 0) 1598eaee50e8SChris Mason return wret; 1599eaee50e8SChris Mason } 1600eb60ceacSChris Mason l_buf = path->nodes[0]; 1601e20d96d6SChris Mason l = btrfs_buffer_leaf(l_buf); 1602aa5d6bedSChris Mason 1603aa5d6bedSChris Mason /* did the pushes work? */ 1604123abc88SChris Mason if (btrfs_leaf_free_space(root, l) >= 1605123abc88SChris Mason sizeof(struct btrfs_item) + data_size) 1606be0e5c09SChris Mason return 0; 1607aa5d6bedSChris Mason 16085c680ed6SChris Mason if (!path->nodes[1]) { 1609e089f05cSChris Mason ret = insert_new_root(trans, root, path, 1); 16105c680ed6SChris Mason if (ret) 16115c680ed6SChris Mason return ret; 16125c680ed6SChris Mason } 1613eb60ceacSChris Mason slot = path->slots[0]; 16147518a238SChris Mason nritems = btrfs_header_nritems(&l->header); 1615eb60ceacSChris Mason mid = (nritems + 1)/ 2; 161654aa1f4dSChris Mason 16176702ed49SChris Mason right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); 161854aa1f4dSChris Mason if (IS_ERR(right_buffer)) 161954aa1f4dSChris Mason return PTR_ERR(right_buffer); 162054aa1f4dSChris Mason 1621e20d96d6SChris Mason right = btrfs_buffer_leaf(right_buffer); 1622123abc88SChris Mason memset(&right->header, 0, sizeof(right->header)); 16237eccb903SChris Mason btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); 16247f5c1516SChris Mason btrfs_set_header_generation(&right->header, trans->transid); 16254d775673SChris Mason btrfs_set_header_owner(&right->header, root->root_key.objectid); 16267518a238SChris Mason btrfs_set_header_level(&right->header, 0); 16273eb0314dSChris Mason memcpy(right->header.fsid, root->fs_info->disk_super->fsid, 16283eb0314dSChris Mason sizeof(right->header.fsid)); 1629d4dbff95SChris Mason if (mid <= slot) { 1630d4dbff95SChris Mason if (nritems == 1 || 1631d4dbff95SChris Mason leaf_space_used(l, mid, nritems - mid) + space_needed > 1632d4dbff95SChris Mason BTRFS_LEAF_DATA_SIZE(root)) { 1633d4dbff95SChris Mason if (slot >= nritems) { 1634d4dbff95SChris Mason btrfs_cpu_key_to_disk(&disk_key, ins_key); 1635d4dbff95SChris Mason btrfs_set_header_nritems(&right->header, 0); 1636d4dbff95SChris Mason wret = insert_ptr(trans, root, path, 1637d4dbff95SChris Mason &disk_key, 16387eccb903SChris Mason bh_blocknr(right_buffer), 1639d4dbff95SChris Mason path->slots[1] + 1, 1); 1640d4dbff95SChris Mason if (wret) 1641d4dbff95SChris Mason ret = wret; 1642d4dbff95SChris Mason btrfs_block_release(root, path->nodes[0]); 1643d4dbff95SChris Mason path->nodes[0] = right_buffer; 1644d4dbff95SChris Mason path->slots[0] = 0; 1645d4dbff95SChris Mason path->slots[1] += 1; 1646d4dbff95SChris Mason return ret; 1647d4dbff95SChris Mason } 1648d4dbff95SChris Mason mid = slot; 1649d4dbff95SChris Mason double_split = 1; 1650d4dbff95SChris Mason } 1651d4dbff95SChris Mason } else { 1652d4dbff95SChris Mason if (leaf_space_used(l, 0, mid + 1) + space_needed > 1653d4dbff95SChris Mason BTRFS_LEAF_DATA_SIZE(root)) { 1654d4dbff95SChris Mason if (slot == 0) { 1655d4dbff95SChris Mason btrfs_cpu_key_to_disk(&disk_key, ins_key); 1656d4dbff95SChris Mason btrfs_set_header_nritems(&right->header, 0); 1657d4dbff95SChris Mason wret = insert_ptr(trans, root, path, 1658d4dbff95SChris Mason &disk_key, 16597eccb903SChris Mason bh_blocknr(right_buffer), 1660098f59c2SChris Mason path->slots[1], 1); 1661d4dbff95SChris Mason if (wret) 1662d4dbff95SChris Mason ret = wret; 1663d4dbff95SChris Mason btrfs_block_release(root, path->nodes[0]); 1664d4dbff95SChris Mason path->nodes[0] = right_buffer; 1665d4dbff95SChris Mason path->slots[0] = 0; 1666a429e513SChris Mason if (path->slots[1] == 0) { 1667a429e513SChris Mason wret = fixup_low_keys(trans, root, 1668a429e513SChris Mason path, &disk_key, 1); 1669a429e513SChris Mason if (wret) 1670a429e513SChris Mason ret = wret; 1671a429e513SChris Mason } 1672d4dbff95SChris Mason return ret; 1673d4dbff95SChris Mason } 1674d4dbff95SChris Mason mid = slot; 1675d4dbff95SChris Mason double_split = 1; 1676d4dbff95SChris Mason } 1677d4dbff95SChris Mason } 1678d4dbff95SChris Mason btrfs_set_header_nritems(&right->header, nritems - mid); 1679123abc88SChris Mason data_copy_size = btrfs_item_end(l->items + mid) - 1680123abc88SChris Mason leaf_data_end(root, l); 1681d6025579SChris Mason btrfs_memcpy(root, right, right->items, l->items + mid, 16820783fcfcSChris Mason (nritems - mid) * sizeof(struct btrfs_item)); 1683d6025579SChris Mason btrfs_memcpy(root, right, 1684d6025579SChris Mason btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 1685123abc88SChris Mason data_copy_size, btrfs_leaf_data(l) + 1686123abc88SChris Mason leaf_data_end(root, l), data_copy_size); 1687123abc88SChris Mason rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 1688123abc88SChris Mason btrfs_item_end(l->items + mid); 168974123bd7SChris Mason 16900783fcfcSChris Mason for (i = 0; i < btrfs_header_nritems(&right->header); i++) { 1691123abc88SChris Mason u32 ioff = btrfs_item_offset(right->items + i); 16920783fcfcSChris Mason btrfs_set_item_offset(right->items + i, ioff + rt_data_off); 16930783fcfcSChris Mason } 169474123bd7SChris Mason 16957518a238SChris Mason btrfs_set_header_nritems(&l->header, mid); 1696aa5d6bedSChris Mason ret = 0; 1697e089f05cSChris Mason wret = insert_ptr(trans, root, path, &right->items[0].key, 16987eccb903SChris Mason bh_blocknr(right_buffer), path->slots[1] + 1, 1); 1699aa5d6bedSChris Mason if (wret) 1700aa5d6bedSChris Mason ret = wret; 1701d6025579SChris Mason btrfs_mark_buffer_dirty(right_buffer); 1702d6025579SChris Mason btrfs_mark_buffer_dirty(l_buf); 1703eb60ceacSChris Mason BUG_ON(path->slots[0] != slot); 1704be0e5c09SChris Mason if (mid <= slot) { 1705234b63a0SChris Mason btrfs_block_release(root, path->nodes[0]); 1706eb60ceacSChris Mason path->nodes[0] = right_buffer; 1707be0e5c09SChris Mason path->slots[0] -= mid; 1708be0e5c09SChris Mason path->slots[1] += 1; 1709eb60ceacSChris Mason } else 1710234b63a0SChris Mason btrfs_block_release(root, right_buffer); 1711eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 1712098f59c2SChris Mason check_node(root, path, 1); 1713d4dbff95SChris Mason 1714d4dbff95SChris Mason if (!double_split) 1715d4dbff95SChris Mason return ret; 17166702ed49SChris Mason right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0); 171754aa1f4dSChris Mason if (IS_ERR(right_buffer)) 171854aa1f4dSChris Mason return PTR_ERR(right_buffer); 171954aa1f4dSChris Mason 1720d4dbff95SChris Mason right = btrfs_buffer_leaf(right_buffer); 1721d4dbff95SChris Mason memset(&right->header, 0, sizeof(right->header)); 17227eccb903SChris Mason btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer)); 1723d4dbff95SChris Mason btrfs_set_header_generation(&right->header, trans->transid); 17244d775673SChris Mason btrfs_set_header_owner(&right->header, root->root_key.objectid); 1725d4dbff95SChris Mason btrfs_set_header_level(&right->header, 0); 17263eb0314dSChris Mason memcpy(right->header.fsid, root->fs_info->disk_super->fsid, 17273eb0314dSChris Mason sizeof(right->header.fsid)); 1728d4dbff95SChris Mason btrfs_cpu_key_to_disk(&disk_key, ins_key); 1729d4dbff95SChris Mason btrfs_set_header_nritems(&right->header, 0); 1730d4dbff95SChris Mason wret = insert_ptr(trans, root, path, 1731d4dbff95SChris Mason &disk_key, 17327eccb903SChris Mason bh_blocknr(right_buffer), 1733d4dbff95SChris Mason path->slots[1], 1); 1734d4dbff95SChris Mason if (wret) 1735d4dbff95SChris Mason ret = wret; 1736a429e513SChris Mason if (path->slots[1] == 0) { 1737a429e513SChris Mason wret = fixup_low_keys(trans, root, path, &disk_key, 1); 1738a429e513SChris Mason if (wret) 1739a429e513SChris Mason ret = wret; 1740a429e513SChris Mason } 1741d4dbff95SChris Mason btrfs_block_release(root, path->nodes[0]); 1742d4dbff95SChris Mason path->nodes[0] = right_buffer; 1743d4dbff95SChris Mason path->slots[0] = 0; 1744d4dbff95SChris Mason check_node(root, path, 1); 1745d4dbff95SChris Mason check_leaf(root, path, 0); 1746be0e5c09SChris Mason return ret; 1747be0e5c09SChris Mason } 1748be0e5c09SChris Mason 1749b18c6685SChris Mason int btrfs_truncate_item(struct btrfs_trans_handle *trans, 1750b18c6685SChris Mason struct btrfs_root *root, 1751b18c6685SChris Mason struct btrfs_path *path, 1752b18c6685SChris Mason u32 new_size) 1753b18c6685SChris Mason { 1754b18c6685SChris Mason int ret = 0; 1755b18c6685SChris Mason int slot; 1756b18c6685SChris Mason int slot_orig; 1757b18c6685SChris Mason struct btrfs_leaf *leaf; 1758b18c6685SChris Mason struct buffer_head *leaf_buf; 1759b18c6685SChris Mason u32 nritems; 1760b18c6685SChris Mason unsigned int data_end; 1761b18c6685SChris Mason unsigned int old_data_start; 1762b18c6685SChris Mason unsigned int old_size; 1763b18c6685SChris Mason unsigned int size_diff; 1764b18c6685SChris Mason int i; 1765b18c6685SChris Mason 1766b18c6685SChris Mason slot_orig = path->slots[0]; 1767b18c6685SChris Mason leaf_buf = path->nodes[0]; 1768b18c6685SChris Mason leaf = btrfs_buffer_leaf(leaf_buf); 1769b18c6685SChris Mason 1770b18c6685SChris Mason nritems = btrfs_header_nritems(&leaf->header); 1771b18c6685SChris Mason data_end = leaf_data_end(root, leaf); 1772b18c6685SChris Mason 1773b18c6685SChris Mason slot = path->slots[0]; 1774b18c6685SChris Mason old_data_start = btrfs_item_offset(leaf->items + slot); 1775b18c6685SChris Mason old_size = btrfs_item_size(leaf->items + slot); 1776b18c6685SChris Mason BUG_ON(old_size <= new_size); 1777b18c6685SChris Mason size_diff = old_size - new_size; 1778b18c6685SChris Mason 1779b18c6685SChris Mason BUG_ON(slot < 0); 1780b18c6685SChris Mason BUG_ON(slot >= nritems); 1781b18c6685SChris Mason 1782b18c6685SChris Mason /* 1783b18c6685SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 1784b18c6685SChris Mason */ 1785b18c6685SChris Mason /* first correct the data pointers */ 1786b18c6685SChris Mason for (i = slot; i < nritems; i++) { 1787b18c6685SChris Mason u32 ioff = btrfs_item_offset(leaf->items + i); 1788b18c6685SChris Mason btrfs_set_item_offset(leaf->items + i, 1789b18c6685SChris Mason ioff + size_diff); 1790b18c6685SChris Mason } 1791b18c6685SChris Mason /* shift the data */ 1792b18c6685SChris Mason btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + 1793b18c6685SChris Mason data_end + size_diff, btrfs_leaf_data(leaf) + 1794b18c6685SChris Mason data_end, old_data_start + new_size - data_end); 1795b18c6685SChris Mason btrfs_set_item_size(leaf->items + slot, new_size); 1796b18c6685SChris Mason btrfs_mark_buffer_dirty(leaf_buf); 1797b18c6685SChris Mason 1798b18c6685SChris Mason ret = 0; 1799b18c6685SChris Mason if (btrfs_leaf_free_space(root, leaf) < 0) 1800b18c6685SChris Mason BUG(); 1801b18c6685SChris Mason check_leaf(root, path, 0); 1802b18c6685SChris Mason return ret; 1803b18c6685SChris Mason } 1804b18c6685SChris Mason 18056567e837SChris Mason int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root 18066567e837SChris Mason *root, struct btrfs_path *path, u32 data_size) 18076567e837SChris Mason { 18086567e837SChris Mason int ret = 0; 18096567e837SChris Mason int slot; 18106567e837SChris Mason int slot_orig; 18116567e837SChris Mason struct btrfs_leaf *leaf; 18126567e837SChris Mason struct buffer_head *leaf_buf; 18136567e837SChris Mason u32 nritems; 18146567e837SChris Mason unsigned int data_end; 18156567e837SChris Mason unsigned int old_data; 18166567e837SChris Mason unsigned int old_size; 18176567e837SChris Mason int i; 18186567e837SChris Mason 18196567e837SChris Mason slot_orig = path->slots[0]; 18206567e837SChris Mason leaf_buf = path->nodes[0]; 18216567e837SChris Mason leaf = btrfs_buffer_leaf(leaf_buf); 18226567e837SChris Mason 18236567e837SChris Mason nritems = btrfs_header_nritems(&leaf->header); 18246567e837SChris Mason data_end = leaf_data_end(root, leaf); 18256567e837SChris Mason 18266567e837SChris Mason if (btrfs_leaf_free_space(root, leaf) < data_size) 18276567e837SChris Mason BUG(); 18286567e837SChris Mason slot = path->slots[0]; 18296567e837SChris Mason old_data = btrfs_item_end(leaf->items + slot); 18306567e837SChris Mason 18316567e837SChris Mason BUG_ON(slot < 0); 18326567e837SChris Mason BUG_ON(slot >= nritems); 18336567e837SChris Mason 18346567e837SChris Mason /* 18356567e837SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 18366567e837SChris Mason */ 18376567e837SChris Mason /* first correct the data pointers */ 18386567e837SChris Mason for (i = slot; i < nritems; i++) { 18396567e837SChris Mason u32 ioff = btrfs_item_offset(leaf->items + i); 18406567e837SChris Mason btrfs_set_item_offset(leaf->items + i, 18416567e837SChris Mason ioff - data_size); 18426567e837SChris Mason } 18436567e837SChris Mason /* shift the data */ 18446567e837SChris Mason btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + 18456567e837SChris Mason data_end - data_size, btrfs_leaf_data(leaf) + 18466567e837SChris Mason data_end, old_data - data_end); 18476567e837SChris Mason data_end = old_data; 18486567e837SChris Mason old_size = btrfs_item_size(leaf->items + slot); 18496567e837SChris Mason btrfs_set_item_size(leaf->items + slot, old_size + data_size); 18506567e837SChris Mason btrfs_mark_buffer_dirty(leaf_buf); 18516567e837SChris Mason 18526567e837SChris Mason ret = 0; 18536567e837SChris Mason if (btrfs_leaf_free_space(root, leaf) < 0) 18546567e837SChris Mason BUG(); 18556567e837SChris Mason check_leaf(root, path, 0); 18566567e837SChris Mason return ret; 18576567e837SChris Mason } 18586567e837SChris Mason 185974123bd7SChris Mason /* 186074123bd7SChris Mason * Given a key and some data, insert an item into the tree. 186174123bd7SChris Mason * This does all the path init required, making room in the tree if needed. 186274123bd7SChris Mason */ 1863e089f05cSChris Mason int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root 1864e089f05cSChris Mason *root, struct btrfs_path *path, struct btrfs_key 1865e089f05cSChris Mason *cpu_key, u32 data_size) 1866be0e5c09SChris Mason { 1867aa5d6bedSChris Mason int ret = 0; 1868be0e5c09SChris Mason int slot; 1869eb60ceacSChris Mason int slot_orig; 1870234b63a0SChris Mason struct btrfs_leaf *leaf; 1871e20d96d6SChris Mason struct buffer_head *leaf_buf; 18727518a238SChris Mason u32 nritems; 1873be0e5c09SChris Mason unsigned int data_end; 1874e2fa7227SChris Mason struct btrfs_disk_key disk_key; 1875e2fa7227SChris Mason 1876e2fa7227SChris Mason btrfs_cpu_key_to_disk(&disk_key, cpu_key); 1877be0e5c09SChris Mason 187874123bd7SChris Mason /* create a root if there isn't one */ 18795c680ed6SChris Mason if (!root->node) 1880cfaa7295SChris Mason BUG(); 1881e089f05cSChris Mason ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); 1882eb60ceacSChris Mason if (ret == 0) { 1883f0930a37SChris Mason return -EEXIST; 1884aa5d6bedSChris Mason } 1885ed2ff2cbSChris Mason if (ret < 0) 1886ed2ff2cbSChris Mason goto out; 1887be0e5c09SChris Mason 188862e2749eSChris Mason slot_orig = path->slots[0]; 188962e2749eSChris Mason leaf_buf = path->nodes[0]; 1890e20d96d6SChris Mason leaf = btrfs_buffer_leaf(leaf_buf); 189174123bd7SChris Mason 18927518a238SChris Mason nritems = btrfs_header_nritems(&leaf->header); 1893123abc88SChris Mason data_end = leaf_data_end(root, leaf); 1894eb60ceacSChris Mason 1895123abc88SChris Mason if (btrfs_leaf_free_space(root, leaf) < 1896d4dbff95SChris Mason sizeof(struct btrfs_item) + data_size) { 1897be0e5c09SChris Mason BUG(); 1898d4dbff95SChris Mason } 189962e2749eSChris Mason slot = path->slots[0]; 1900eb60ceacSChris Mason BUG_ON(slot < 0); 1901be0e5c09SChris Mason if (slot != nritems) { 1902be0e5c09SChris Mason int i; 19030783fcfcSChris Mason unsigned int old_data = btrfs_item_end(leaf->items + slot); 1904be0e5c09SChris Mason 1905be0e5c09SChris Mason /* 1906be0e5c09SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 1907be0e5c09SChris Mason */ 1908be0e5c09SChris Mason /* first correct the data pointers */ 19090783fcfcSChris Mason for (i = slot; i < nritems; i++) { 1910123abc88SChris Mason u32 ioff = btrfs_item_offset(leaf->items + i); 19110783fcfcSChris Mason btrfs_set_item_offset(leaf->items + i, 19120783fcfcSChris Mason ioff - data_size); 19130783fcfcSChris Mason } 1914be0e5c09SChris Mason 1915be0e5c09SChris Mason /* shift the items */ 1916d6025579SChris Mason btrfs_memmove(root, leaf, leaf->items + slot + 1, 1917d6025579SChris Mason leaf->items + slot, 19180783fcfcSChris Mason (nritems - slot) * sizeof(struct btrfs_item)); 1919be0e5c09SChris Mason 1920be0e5c09SChris Mason /* shift the data */ 1921d6025579SChris Mason btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + 1922d6025579SChris Mason data_end - data_size, btrfs_leaf_data(leaf) + 1923be0e5c09SChris Mason data_end, old_data - data_end); 1924be0e5c09SChris Mason data_end = old_data; 1925be0e5c09SChris Mason } 192662e2749eSChris Mason /* setup the item for the new data */ 1927d6025579SChris Mason btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, 1928e2fa7227SChris Mason sizeof(struct btrfs_disk_key)); 19290783fcfcSChris Mason btrfs_set_item_offset(leaf->items + slot, data_end - data_size); 19300783fcfcSChris Mason btrfs_set_item_size(leaf->items + slot, data_size); 19317518a238SChris Mason btrfs_set_header_nritems(&leaf->header, nritems + 1); 1932d6025579SChris Mason btrfs_mark_buffer_dirty(leaf_buf); 1933aa5d6bedSChris Mason 1934aa5d6bedSChris Mason ret = 0; 19358e19f2cdSChris Mason if (slot == 0) 1936e089f05cSChris Mason ret = fixup_low_keys(trans, root, path, &disk_key, 1); 1937aa5d6bedSChris Mason 1938123abc88SChris Mason if (btrfs_leaf_free_space(root, leaf) < 0) 1939be0e5c09SChris Mason BUG(); 194062e2749eSChris Mason check_leaf(root, path, 0); 1941ed2ff2cbSChris Mason out: 194262e2749eSChris Mason return ret; 194362e2749eSChris Mason } 194462e2749eSChris Mason 194562e2749eSChris Mason /* 194662e2749eSChris Mason * Given a key and some data, insert an item into the tree. 194762e2749eSChris Mason * This does all the path init required, making room in the tree if needed. 194862e2749eSChris Mason */ 1949e089f05cSChris Mason int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 1950e089f05cSChris Mason *root, struct btrfs_key *cpu_key, void *data, u32 1951e089f05cSChris Mason data_size) 195262e2749eSChris Mason { 195362e2749eSChris Mason int ret = 0; 19542c90e5d6SChris Mason struct btrfs_path *path; 195562e2749eSChris Mason u8 *ptr; 195662e2749eSChris Mason 19572c90e5d6SChris Mason path = btrfs_alloc_path(); 19582c90e5d6SChris Mason BUG_ON(!path); 19592c90e5d6SChris Mason ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 196062e2749eSChris Mason if (!ret) { 19612c90e5d6SChris Mason ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), 19622c90e5d6SChris Mason path->slots[0], u8); 19632c90e5d6SChris Mason btrfs_memcpy(root, path->nodes[0]->b_data, 1964d6025579SChris Mason ptr, data, data_size); 19652c90e5d6SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 196662e2749eSChris Mason } 19672c90e5d6SChris Mason btrfs_free_path(path); 1968aa5d6bedSChris Mason return ret; 1969be0e5c09SChris Mason } 1970be0e5c09SChris Mason 197174123bd7SChris Mason /* 19725de08d7dSChris Mason * delete the pointer from a given node. 197374123bd7SChris Mason * 197474123bd7SChris Mason * If the delete empties a node, the node is removed from the tree, 197574123bd7SChris Mason * continuing all the way the root if required. The root is converted into 197674123bd7SChris Mason * a leaf if all the nodes are emptied. 197774123bd7SChris Mason */ 1978e089f05cSChris Mason static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1979e089f05cSChris Mason struct btrfs_path *path, int level, int slot) 1980be0e5c09SChris Mason { 1981234b63a0SChris Mason struct btrfs_node *node; 1982e20d96d6SChris Mason struct buffer_head *parent = path->nodes[level]; 19837518a238SChris Mason u32 nritems; 1984aa5d6bedSChris Mason int ret = 0; 1985bb803951SChris Mason int wret; 1986be0e5c09SChris Mason 1987e20d96d6SChris Mason node = btrfs_buffer_node(parent); 19887518a238SChris Mason nritems = btrfs_header_nritems(&node->header); 1989be0e5c09SChris Mason if (slot != nritems -1) { 1990d6025579SChris Mason btrfs_memmove(root, node, node->ptrs + slot, 1991d6025579SChris Mason node->ptrs + slot + 1, 1992d6025579SChris Mason sizeof(struct btrfs_key_ptr) * 1993d6025579SChris Mason (nritems - slot - 1)); 1994be0e5c09SChris Mason } 19957518a238SChris Mason nritems--; 19967518a238SChris Mason btrfs_set_header_nritems(&node->header, nritems); 19977518a238SChris Mason if (nritems == 0 && parent == root->node) { 1998e20d96d6SChris Mason struct btrfs_header *header = btrfs_buffer_header(root->node); 1999e20d96d6SChris Mason BUG_ON(btrfs_header_level(header) != 1); 2000eb60ceacSChris Mason /* just turn the root into a leaf and break */ 2001e20d96d6SChris Mason btrfs_set_header_level(header, 0); 2002bb803951SChris Mason } else if (slot == 0) { 2003e089f05cSChris Mason wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, 2004123abc88SChris Mason level + 1); 20050f70abe2SChris Mason if (wret) 20060f70abe2SChris Mason ret = wret; 2007be0e5c09SChris Mason } 2008d6025579SChris Mason btrfs_mark_buffer_dirty(parent); 2009aa5d6bedSChris Mason return ret; 2010be0e5c09SChris Mason } 2011be0e5c09SChris Mason 201274123bd7SChris Mason /* 201374123bd7SChris Mason * delete the item at the leaf level in path. If that empties 201474123bd7SChris Mason * the leaf, remove it from the tree 201574123bd7SChris Mason */ 2016e089f05cSChris Mason int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2017e089f05cSChris Mason struct btrfs_path *path) 2018be0e5c09SChris Mason { 2019be0e5c09SChris Mason int slot; 2020234b63a0SChris Mason struct btrfs_leaf *leaf; 2021e20d96d6SChris Mason struct buffer_head *leaf_buf; 2022be0e5c09SChris Mason int doff; 2023be0e5c09SChris Mason int dsize; 2024aa5d6bedSChris Mason int ret = 0; 2025aa5d6bedSChris Mason int wret; 20267518a238SChris Mason u32 nritems; 2027be0e5c09SChris Mason 2028eb60ceacSChris Mason leaf_buf = path->nodes[0]; 2029e20d96d6SChris Mason leaf = btrfs_buffer_leaf(leaf_buf); 20304920c9acSChris Mason slot = path->slots[0]; 20310783fcfcSChris Mason doff = btrfs_item_offset(leaf->items + slot); 20320783fcfcSChris Mason dsize = btrfs_item_size(leaf->items + slot); 20337518a238SChris Mason nritems = btrfs_header_nritems(&leaf->header); 2034be0e5c09SChris Mason 20357518a238SChris Mason if (slot != nritems - 1) { 2036be0e5c09SChris Mason int i; 2037123abc88SChris Mason int data_end = leaf_data_end(root, leaf); 2038d6025579SChris Mason btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + 2039d6025579SChris Mason data_end + dsize, 2040123abc88SChris Mason btrfs_leaf_data(leaf) + data_end, 2041be0e5c09SChris Mason doff - data_end); 20420783fcfcSChris Mason for (i = slot + 1; i < nritems; i++) { 2043123abc88SChris Mason u32 ioff = btrfs_item_offset(leaf->items + i); 20440783fcfcSChris Mason btrfs_set_item_offset(leaf->items + i, ioff + dsize); 20450783fcfcSChris Mason } 2046d6025579SChris Mason btrfs_memmove(root, leaf, leaf->items + slot, 2047d6025579SChris Mason leaf->items + slot + 1, 20480783fcfcSChris Mason sizeof(struct btrfs_item) * 20497518a238SChris Mason (nritems - slot - 1)); 2050be0e5c09SChris Mason } 20517518a238SChris Mason btrfs_set_header_nritems(&leaf->header, nritems - 1); 20527518a238SChris Mason nritems--; 205374123bd7SChris Mason /* delete the leaf if we've emptied it */ 20547518a238SChris Mason if (nritems == 0) { 2055eb60ceacSChris Mason if (leaf_buf == root->node) { 20567518a238SChris Mason btrfs_set_header_level(&leaf->header, 0); 20579a8dd150SChris Mason } else { 2058e089f05cSChris Mason clean_tree_block(trans, root, leaf_buf); 2059d6025579SChris Mason wait_on_buffer(leaf_buf); 2060e089f05cSChris Mason wret = del_ptr(trans, root, path, 1, path->slots[1]); 2061aa5d6bedSChris Mason if (wret) 2062aa5d6bedSChris Mason ret = wret; 2063e089f05cSChris Mason wret = btrfs_free_extent(trans, root, 20647eccb903SChris Mason bh_blocknr(leaf_buf), 1, 1); 20650f70abe2SChris Mason if (wret) 20660f70abe2SChris Mason ret = wret; 20679a8dd150SChris Mason } 2068be0e5c09SChris Mason } else { 20697518a238SChris Mason int used = leaf_space_used(leaf, 0, nritems); 2070aa5d6bedSChris Mason if (slot == 0) { 2071e089f05cSChris Mason wret = fixup_low_keys(trans, root, path, 2072aa5d6bedSChris Mason &leaf->items[0].key, 1); 2073aa5d6bedSChris Mason if (wret) 2074aa5d6bedSChris Mason ret = wret; 2075aa5d6bedSChris Mason } 2076aa5d6bedSChris Mason 207774123bd7SChris Mason /* delete the leaf if it is mostly empty */ 2078123abc88SChris Mason if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { 2079be0e5c09SChris Mason /* push_leaf_left fixes the path. 2080be0e5c09SChris Mason * make sure the path still points to our leaf 2081be0e5c09SChris Mason * for possible call to del_ptr below 2082be0e5c09SChris Mason */ 20834920c9acSChris Mason slot = path->slots[1]; 2084e20d96d6SChris Mason get_bh(leaf_buf); 2085e089f05cSChris Mason wret = push_leaf_left(trans, root, path, 1); 208654aa1f4dSChris Mason if (wret < 0 && wret != -ENOSPC) 2087aa5d6bedSChris Mason ret = wret; 2088f0930a37SChris Mason if (path->nodes[0] == leaf_buf && 20897518a238SChris Mason btrfs_header_nritems(&leaf->header)) { 2090e089f05cSChris Mason wret = push_leaf_right(trans, root, path, 1); 209154aa1f4dSChris Mason if (wret < 0 && wret != -ENOSPC) 2092aa5d6bedSChris Mason ret = wret; 2093aa5d6bedSChris Mason } 20947518a238SChris Mason if (btrfs_header_nritems(&leaf->header) == 0) { 20957eccb903SChris Mason u64 blocknr = bh_blocknr(leaf_buf); 2096e089f05cSChris Mason clean_tree_block(trans, root, leaf_buf); 2097d6025579SChris Mason wait_on_buffer(leaf_buf); 2098e089f05cSChris Mason wret = del_ptr(trans, root, path, 1, slot); 2099aa5d6bedSChris Mason if (wret) 2100aa5d6bedSChris Mason ret = wret; 2101234b63a0SChris Mason btrfs_block_release(root, leaf_buf); 2102e089f05cSChris Mason wret = btrfs_free_extent(trans, root, blocknr, 2103e089f05cSChris Mason 1, 1); 21040f70abe2SChris Mason if (wret) 21050f70abe2SChris Mason ret = wret; 21065de08d7dSChris Mason } else { 2107d6025579SChris Mason btrfs_mark_buffer_dirty(leaf_buf); 2108234b63a0SChris Mason btrfs_block_release(root, leaf_buf); 2109be0e5c09SChris Mason } 2110d5719762SChris Mason } else { 2111d6025579SChris Mason btrfs_mark_buffer_dirty(leaf_buf); 2112be0e5c09SChris Mason } 21139a8dd150SChris Mason } 2114aa5d6bedSChris Mason return ret; 21159a8dd150SChris Mason } 21169a8dd150SChris Mason 211797571fd0SChris Mason /* 211897571fd0SChris Mason * walk up the tree as far as required to find the next leaf. 21190f70abe2SChris Mason * returns 0 if it found something or 1 if there are no greater leaves. 21200f70abe2SChris Mason * returns < 0 on io errors. 212197571fd0SChris Mason */ 2122234b63a0SChris Mason int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 2123d97e63b6SChris Mason { 2124d97e63b6SChris Mason int slot; 2125d97e63b6SChris Mason int level = 1; 2126d97e63b6SChris Mason u64 blocknr; 2127e20d96d6SChris Mason struct buffer_head *c; 2128e20d96d6SChris Mason struct btrfs_node *c_node; 2129e20d96d6SChris Mason struct buffer_head *next = NULL; 2130d97e63b6SChris Mason 2131234b63a0SChris Mason while(level < BTRFS_MAX_LEVEL) { 2132d97e63b6SChris Mason if (!path->nodes[level]) 21330f70abe2SChris Mason return 1; 2134d97e63b6SChris Mason slot = path->slots[level] + 1; 2135d97e63b6SChris Mason c = path->nodes[level]; 2136e20d96d6SChris Mason c_node = btrfs_buffer_node(c); 2137e20d96d6SChris Mason if (slot >= btrfs_header_nritems(&c_node->header)) { 2138d97e63b6SChris Mason level++; 2139d97e63b6SChris Mason continue; 2140d97e63b6SChris Mason } 2141e20d96d6SChris Mason blocknr = btrfs_node_blockptr(c_node, slot); 2142cfaa7295SChris Mason if (next) 2143234b63a0SChris Mason btrfs_block_release(root, next); 21446702ed49SChris Mason if (path->reada) 21456702ed49SChris Mason reada_for_search(root, path, level, slot); 2146d97e63b6SChris Mason next = read_tree_block(root, blocknr); 2147d97e63b6SChris Mason break; 2148d97e63b6SChris Mason } 2149d97e63b6SChris Mason path->slots[level] = slot; 2150d97e63b6SChris Mason while(1) { 2151d97e63b6SChris Mason level--; 2152d97e63b6SChris Mason c = path->nodes[level]; 2153234b63a0SChris Mason btrfs_block_release(root, c); 2154d97e63b6SChris Mason path->nodes[level] = next; 2155d97e63b6SChris Mason path->slots[level] = 0; 2156d97e63b6SChris Mason if (!level) 2157d97e63b6SChris Mason break; 21586702ed49SChris Mason if (path->reada) 215932020611SYan reada_for_search(root, path, level, 0); 21601d4f8a0cSChris Mason next = read_tree_block(root, 2161e20d96d6SChris Mason btrfs_node_blockptr(btrfs_buffer_node(next), 0)); 2162d97e63b6SChris Mason } 2163d97e63b6SChris Mason return 0; 2164d97e63b6SChris Mason } 2165