12e635a27SChris Mason #include <linux/module.h> 2eb60ceacSChris Mason #include "ctree.h" 3eb60ceacSChris Mason #include "disk-io.h" 47f5c1516SChris Mason #include "transaction.h" 59a8dd150SChris Mason 6e089f05cSChris Mason static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 7e089f05cSChris Mason *root, struct btrfs_path *path, int level); 8e089f05cSChris Mason static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 9*d4dbff95SChris Mason *root, struct btrfs_key *ins_key, 10*d4dbff95SChris Mason struct btrfs_path *path, int data_size); 11e089f05cSChris Mason static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root 12e20d96d6SChris Mason *root, struct buffer_head *dst, struct buffer_head 13e089f05cSChris Mason *src); 14e089f05cSChris Mason static int balance_node_right(struct btrfs_trans_handle *trans, struct 15e20d96d6SChris Mason btrfs_root *root, struct buffer_head *dst_buf, 16e20d96d6SChris Mason struct buffer_head *src_buf); 17e089f05cSChris Mason static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 18e089f05cSChris Mason struct btrfs_path *path, int level, int slot); 19d97e63b6SChris Mason 20df24a2b9SChris Mason inline void btrfs_init_path(struct btrfs_path *p) 21df24a2b9SChris Mason { 22df24a2b9SChris Mason memset(p, 0, sizeof(*p)); 23df24a2b9SChris Mason } 24df24a2b9SChris Mason 252c90e5d6SChris Mason struct btrfs_path *btrfs_alloc_path(void) 262c90e5d6SChris Mason { 27df24a2b9SChris Mason struct btrfs_path *path; 28df24a2b9SChris Mason path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS); 29df24a2b9SChris Mason if (path) 30df24a2b9SChris Mason btrfs_init_path(path); 31df24a2b9SChris Mason return path; 322c90e5d6SChris Mason } 332c90e5d6SChris Mason 342c90e5d6SChris Mason void btrfs_free_path(struct btrfs_path *p) 352c90e5d6SChris Mason { 36df24a2b9SChris Mason btrfs_release_path(NULL, p); 372c90e5d6SChris Mason kmem_cache_free(btrfs_path_cachep, p); 382c90e5d6SChris Mason } 392c90e5d6SChris Mason 40234b63a0SChris Mason void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p) 41eb60ceacSChris Mason { 42eb60ceacSChris Mason int i; 43234b63a0SChris Mason for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 44eb60ceacSChris Mason if (!p->nodes[i]) 45eb60ceacSChris Mason break; 46234b63a0SChris Mason btrfs_block_release(root, p->nodes[i]); 47eb60ceacSChris Mason } 48aa5d6bedSChris Mason memset(p, 0, sizeof(*p)); 49eb60ceacSChris Mason } 50eb60ceacSChris Mason 51e089f05cSChris Mason static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root 52e20d96d6SChris Mason *root, struct buffer_head *buf, struct buffer_head 53e20d96d6SChris Mason *parent, int parent_slot, struct buffer_head 54e089f05cSChris Mason **cow_ret) 5502217ed2SChris Mason { 56e20d96d6SChris Mason struct buffer_head *cow; 57e20d96d6SChris Mason struct btrfs_node *cow_node; 5802217ed2SChris Mason 597f5c1516SChris Mason if (btrfs_header_generation(btrfs_buffer_header(buf)) == 607f5c1516SChris Mason trans->transid) { 6102217ed2SChris Mason *cow_ret = buf; 6202217ed2SChris Mason return 0; 6302217ed2SChris Mason } 64e089f05cSChris Mason cow = btrfs_alloc_free_block(trans, root); 65e20d96d6SChris Mason cow_node = btrfs_buffer_node(cow); 662c90e5d6SChris Mason if (buf->b_size != root->blocksize || cow->b_size != root->blocksize) 672c90e5d6SChris Mason WARN_ON(1); 68e20d96d6SChris Mason memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize); 69e20d96d6SChris Mason btrfs_set_header_blocknr(&cow_node->header, cow->b_blocknr); 707f5c1516SChris Mason btrfs_set_header_generation(&cow_node->header, trans->transid); 71e089f05cSChris Mason btrfs_inc_ref(trans, root, buf); 7202217ed2SChris Mason if (buf == root->node) { 7302217ed2SChris Mason root->node = cow; 74e20d96d6SChris Mason get_bh(cow); 752c90e5d6SChris Mason if (buf != root->commit_root) { 76e20d96d6SChris Mason btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); 772c90e5d6SChris Mason } 78234b63a0SChris Mason btrfs_block_release(root, buf); 7902217ed2SChris Mason } else { 80e20d96d6SChris Mason btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot, 81e20d96d6SChris Mason cow->b_blocknr); 82d6025579SChris Mason btrfs_mark_buffer_dirty(parent); 83e20d96d6SChris Mason btrfs_free_extent(trans, root, buf->b_blocknr, 1, 1); 8402217ed2SChris Mason } 85234b63a0SChris Mason btrfs_block_release(root, buf); 86df24a2b9SChris Mason mark_buffer_dirty(cow); 872c90e5d6SChris Mason *cow_ret = cow; 8802217ed2SChris Mason return 0; 8902217ed2SChris Mason } 9002217ed2SChris Mason 9174123bd7SChris Mason /* 9274123bd7SChris Mason * The leaf data grows from end-to-front in the node. 9374123bd7SChris Mason * this returns the address of the start of the last item, 9474123bd7SChris Mason * which is the stop of the leaf data stack 9574123bd7SChris Mason */ 96123abc88SChris Mason static inline unsigned int leaf_data_end(struct btrfs_root *root, 97123abc88SChris Mason struct btrfs_leaf *leaf) 98be0e5c09SChris Mason { 997518a238SChris Mason u32 nr = btrfs_header_nritems(&leaf->header); 100be0e5c09SChris Mason if (nr == 0) 101123abc88SChris Mason return BTRFS_LEAF_DATA_SIZE(root); 1020783fcfcSChris Mason return btrfs_item_offset(leaf->items + nr - 1); 103be0e5c09SChris Mason } 104be0e5c09SChris Mason 10574123bd7SChris Mason /* 10674123bd7SChris Mason * compare two keys in a memcmp fashion 10774123bd7SChris Mason */ 1089aca1d51SChris Mason static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) 109be0e5c09SChris Mason { 110e2fa7227SChris Mason struct btrfs_key k1; 111e2fa7227SChris Mason 112e2fa7227SChris Mason btrfs_disk_key_to_cpu(&k1, disk); 113e2fa7227SChris Mason 114e2fa7227SChris Mason if (k1.objectid > k2->objectid) 115be0e5c09SChris Mason return 1; 116e2fa7227SChris Mason if (k1.objectid < k2->objectid) 117be0e5c09SChris Mason return -1; 118a8a2ee0cSChris Mason if (k1.offset > k2->offset) 119a8a2ee0cSChris Mason return 1; 120a8a2ee0cSChris Mason if (k1.offset < k2->offset) 121a8a2ee0cSChris Mason return -1; 122f254e52cSChris Mason if (k1.flags > k2->flags) 123f254e52cSChris Mason return 1; 124f254e52cSChris Mason if (k1.flags < k2->flags) 125f254e52cSChris Mason return -1; 126be0e5c09SChris Mason return 0; 127be0e5c09SChris Mason } 12874123bd7SChris Mason 129123abc88SChris Mason static int check_node(struct btrfs_root *root, struct btrfs_path *path, 130123abc88SChris Mason int level) 131aa5d6bedSChris Mason { 132aa5d6bedSChris Mason int i; 133234b63a0SChris Mason struct btrfs_node *parent = NULL; 134e20d96d6SChris Mason struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]); 135aa5d6bedSChris Mason int parent_slot; 1367518a238SChris Mason u32 nritems = btrfs_header_nritems(&node->header); 137aa5d6bedSChris Mason 138aa5d6bedSChris Mason if (path->nodes[level + 1]) 139e20d96d6SChris Mason parent = btrfs_buffer_node(path->nodes[level + 1]); 140aa5d6bedSChris Mason parent_slot = path->slots[level + 1]; 1417518a238SChris Mason BUG_ON(nritems == 0); 1427518a238SChris Mason if (parent) { 143e2fa7227SChris Mason struct btrfs_disk_key *parent_key; 144123abc88SChris Mason parent_key = &parent->ptrs[parent_slot].key; 145123abc88SChris Mason BUG_ON(memcmp(parent_key, &node->ptrs[0].key, 146e2fa7227SChris Mason sizeof(struct btrfs_disk_key))); 1471d4f8a0cSChris Mason BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 1487518a238SChris Mason btrfs_header_blocknr(&node->header)); 149aa5d6bedSChris Mason } 150123abc88SChris Mason BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root)); 1517518a238SChris Mason for (i = 0; nritems > 1 && i < nritems - 2; i++) { 152e2fa7227SChris Mason struct btrfs_key cpukey; 153123abc88SChris Mason btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key); 154123abc88SChris Mason BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0); 155aa5d6bedSChris Mason } 156aa5d6bedSChris Mason return 0; 157aa5d6bedSChris Mason } 158aa5d6bedSChris Mason 159123abc88SChris Mason static int check_leaf(struct btrfs_root *root, struct btrfs_path *path, 160123abc88SChris Mason int level) 161aa5d6bedSChris Mason { 162aa5d6bedSChris Mason int i; 163e20d96d6SChris Mason struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]); 164234b63a0SChris Mason struct btrfs_node *parent = NULL; 165aa5d6bedSChris Mason int parent_slot; 1667518a238SChris Mason u32 nritems = btrfs_header_nritems(&leaf->header); 167aa5d6bedSChris Mason 168aa5d6bedSChris Mason if (path->nodes[level + 1]) 169e20d96d6SChris Mason parent = btrfs_buffer_node(path->nodes[level + 1]); 170aa5d6bedSChris Mason parent_slot = path->slots[level + 1]; 171123abc88SChris Mason BUG_ON(btrfs_leaf_free_space(root, leaf) < 0); 1727518a238SChris Mason 1737518a238SChris Mason if (nritems == 0) 1747518a238SChris Mason return 0; 1757518a238SChris Mason 1767518a238SChris Mason if (parent) { 177e2fa7227SChris Mason struct btrfs_disk_key *parent_key; 178123abc88SChris Mason parent_key = &parent->ptrs[parent_slot].key; 179aa5d6bedSChris Mason BUG_ON(memcmp(parent_key, &leaf->items[0].key, 180e2fa7227SChris Mason sizeof(struct btrfs_disk_key))); 1811d4f8a0cSChris Mason BUG_ON(btrfs_node_blockptr(parent, parent_slot) != 1827518a238SChris Mason btrfs_header_blocknr(&leaf->header)); 183aa5d6bedSChris Mason } 1847518a238SChris Mason for (i = 0; nritems > 1 && i < nritems - 2; i++) { 185e2fa7227SChris Mason struct btrfs_key cpukey; 186e2fa7227SChris Mason btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key); 187aa5d6bedSChris Mason BUG_ON(comp_keys(&leaf->items[i].key, 188e2fa7227SChris Mason &cpukey) >= 0); 1890783fcfcSChris Mason BUG_ON(btrfs_item_offset(leaf->items + i) != 1900783fcfcSChris Mason btrfs_item_end(leaf->items + i + 1)); 191aa5d6bedSChris Mason if (i == 0) { 1920783fcfcSChris Mason BUG_ON(btrfs_item_offset(leaf->items + i) + 1930783fcfcSChris Mason btrfs_item_size(leaf->items + i) != 194123abc88SChris Mason BTRFS_LEAF_DATA_SIZE(root)); 195aa5d6bedSChris Mason } 196aa5d6bedSChris Mason } 197aa5d6bedSChris Mason return 0; 198aa5d6bedSChris Mason } 199aa5d6bedSChris Mason 200123abc88SChris Mason static int check_block(struct btrfs_root *root, struct btrfs_path *path, 201123abc88SChris Mason int level) 202aa5d6bedSChris Mason { 203aa5d6bedSChris Mason if (level == 0) 204123abc88SChris Mason return check_leaf(root, path, level); 205123abc88SChris Mason return check_node(root, path, level); 206aa5d6bedSChris Mason } 207aa5d6bedSChris Mason 20874123bd7SChris Mason /* 20974123bd7SChris Mason * search for key in the array p. items p are item_size apart 21074123bd7SChris Mason * and there are 'max' items in p 21174123bd7SChris Mason * the slot in the array is returned via slot, and it points to 21274123bd7SChris Mason * the place where you would insert key if it is not found in 21374123bd7SChris Mason * the array. 21474123bd7SChris Mason * 21574123bd7SChris Mason * slot may point to max if the key is bigger than all of the keys 21674123bd7SChris Mason */ 2179aca1d51SChris Mason static int generic_bin_search(char *p, int item_size, struct btrfs_key *key, 218be0e5c09SChris Mason int max, int *slot) 219be0e5c09SChris Mason { 220be0e5c09SChris Mason int low = 0; 221be0e5c09SChris Mason int high = max; 222be0e5c09SChris Mason int mid; 223be0e5c09SChris Mason int ret; 224e2fa7227SChris Mason struct btrfs_disk_key *tmp; 225be0e5c09SChris Mason 226be0e5c09SChris Mason while(low < high) { 227be0e5c09SChris Mason mid = (low + high) / 2; 228e2fa7227SChris Mason tmp = (struct btrfs_disk_key *)(p + mid * item_size); 229be0e5c09SChris Mason ret = comp_keys(tmp, key); 230be0e5c09SChris Mason 231be0e5c09SChris Mason if (ret < 0) 232be0e5c09SChris Mason low = mid + 1; 233be0e5c09SChris Mason else if (ret > 0) 234be0e5c09SChris Mason high = mid; 235be0e5c09SChris Mason else { 236be0e5c09SChris Mason *slot = mid; 237be0e5c09SChris Mason return 0; 238be0e5c09SChris Mason } 239be0e5c09SChris Mason } 240be0e5c09SChris Mason *slot = low; 241be0e5c09SChris Mason return 1; 242be0e5c09SChris Mason } 243be0e5c09SChris Mason 24497571fd0SChris Mason /* 24597571fd0SChris Mason * simple bin_search frontend that does the right thing for 24697571fd0SChris Mason * leaves vs nodes 24797571fd0SChris Mason */ 2489aca1d51SChris Mason static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot) 249be0e5c09SChris Mason { 2507518a238SChris Mason if (btrfs_is_leaf(c)) { 251234b63a0SChris Mason struct btrfs_leaf *l = (struct btrfs_leaf *)c; 2520783fcfcSChris Mason return generic_bin_search((void *)l->items, 2530783fcfcSChris Mason sizeof(struct btrfs_item), 2547518a238SChris Mason key, btrfs_header_nritems(&c->header), 2557518a238SChris Mason slot); 256be0e5c09SChris Mason } else { 257123abc88SChris Mason return generic_bin_search((void *)c->ptrs, 258123abc88SChris Mason sizeof(struct btrfs_key_ptr), 2597518a238SChris Mason key, btrfs_header_nritems(&c->header), 2607518a238SChris Mason slot); 261be0e5c09SChris Mason } 262be0e5c09SChris Mason return -1; 263be0e5c09SChris Mason } 264be0e5c09SChris Mason 265e20d96d6SChris Mason static struct buffer_head *read_node_slot(struct btrfs_root *root, 266e20d96d6SChris Mason struct buffer_head *parent_buf, 267bb803951SChris Mason int slot) 268bb803951SChris Mason { 269e20d96d6SChris Mason struct btrfs_node *node = btrfs_buffer_node(parent_buf); 270bb803951SChris Mason if (slot < 0) 271bb803951SChris Mason return NULL; 2727518a238SChris Mason if (slot >= btrfs_header_nritems(&node->header)) 273bb803951SChris Mason return NULL; 2741d4f8a0cSChris Mason return read_tree_block(root, btrfs_node_blockptr(node, slot)); 275bb803951SChris Mason } 276bb803951SChris Mason 277e089f05cSChris Mason static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root 278e089f05cSChris Mason *root, struct btrfs_path *path, int level) 279bb803951SChris Mason { 280e20d96d6SChris Mason struct buffer_head *right_buf; 281e20d96d6SChris Mason struct buffer_head *mid_buf; 282e20d96d6SChris Mason struct buffer_head *left_buf; 283e20d96d6SChris Mason struct buffer_head *parent_buf = NULL; 284234b63a0SChris Mason struct btrfs_node *right = NULL; 285234b63a0SChris Mason struct btrfs_node *mid; 286234b63a0SChris Mason struct btrfs_node *left = NULL; 287234b63a0SChris Mason struct btrfs_node *parent = NULL; 288bb803951SChris Mason int ret = 0; 289bb803951SChris Mason int wret; 290bb803951SChris Mason int pslot; 291bb803951SChris Mason int orig_slot = path->slots[level]; 29279f95c82SChris Mason u64 orig_ptr; 293bb803951SChris Mason 294bb803951SChris Mason if (level == 0) 295bb803951SChris Mason return 0; 296bb803951SChris Mason 297bb803951SChris Mason mid_buf = path->nodes[level]; 298e20d96d6SChris Mason mid = btrfs_buffer_node(mid_buf); 2991d4f8a0cSChris Mason orig_ptr = btrfs_node_blockptr(mid, orig_slot); 30079f95c82SChris Mason 301234b63a0SChris Mason if (level < BTRFS_MAX_LEVEL - 1) 302bb803951SChris Mason parent_buf = path->nodes[level + 1]; 303bb803951SChris Mason pslot = path->slots[level + 1]; 304bb803951SChris Mason 30540689478SChris Mason /* 30640689478SChris Mason * deal with the case where there is only one pointer in the root 30740689478SChris Mason * by promoting the node below to a root 30840689478SChris Mason */ 309bb803951SChris Mason if (!parent_buf) { 310e20d96d6SChris Mason struct buffer_head *child; 311e20d96d6SChris Mason u64 blocknr = mid_buf->b_blocknr; 312bb803951SChris Mason 3137518a238SChris Mason if (btrfs_header_nritems(&mid->header) != 1) 314bb803951SChris Mason return 0; 315bb803951SChris Mason 316bb803951SChris Mason /* promote the child to a root */ 317bb803951SChris Mason child = read_node_slot(root, mid_buf, 0); 318bb803951SChris Mason BUG_ON(!child); 319bb803951SChris Mason root->node = child; 320bb803951SChris Mason path->nodes[level] = NULL; 321d6025579SChris Mason clean_tree_block(trans, root, mid_buf); 322d6025579SChris Mason wait_on_buffer(mid_buf); 323bb803951SChris Mason /* once for the path */ 324234b63a0SChris Mason btrfs_block_release(root, mid_buf); 325bb803951SChris Mason /* once for the root ptr */ 326234b63a0SChris Mason btrfs_block_release(root, mid_buf); 327e089f05cSChris Mason return btrfs_free_extent(trans, root, blocknr, 1, 1); 328bb803951SChris Mason } 329e20d96d6SChris Mason parent = btrfs_buffer_node(parent_buf); 330bb803951SChris Mason 331123abc88SChris Mason if (btrfs_header_nritems(&mid->header) > 332123abc88SChris Mason BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 333bb803951SChris Mason return 0; 334bb803951SChris Mason 335bb803951SChris Mason left_buf = read_node_slot(root, parent_buf, pslot - 1); 336bb803951SChris Mason right_buf = read_node_slot(root, parent_buf, pslot + 1); 33779f95c82SChris Mason 33879f95c82SChris Mason /* first, try to make some room in the middle buffer */ 339bb803951SChris Mason if (left_buf) { 340e089f05cSChris Mason btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1, 341e089f05cSChris Mason &left_buf); 342e20d96d6SChris Mason left = btrfs_buffer_node(left_buf); 3437518a238SChris Mason orig_slot += btrfs_header_nritems(&left->header); 344e089f05cSChris Mason wret = push_node_left(trans, root, left_buf, mid_buf); 34579f95c82SChris Mason if (wret < 0) 34679f95c82SChris Mason ret = wret; 347bb803951SChris Mason } 34879f95c82SChris Mason 34979f95c82SChris Mason /* 35079f95c82SChris Mason * then try to empty the right most buffer into the middle 35179f95c82SChris Mason */ 352bb803951SChris Mason if (right_buf) { 353e089f05cSChris Mason btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1, 354e089f05cSChris Mason &right_buf); 355e20d96d6SChris Mason right = btrfs_buffer_node(right_buf); 356e089f05cSChris Mason wret = push_node_left(trans, root, mid_buf, right_buf); 35779f95c82SChris Mason if (wret < 0) 35879f95c82SChris Mason ret = wret; 3597518a238SChris Mason if (btrfs_header_nritems(&right->header) == 0) { 360e20d96d6SChris Mason u64 blocknr = right_buf->b_blocknr; 361e089f05cSChris Mason clean_tree_block(trans, root, right_buf); 362d6025579SChris Mason wait_on_buffer(right_buf); 363d6025579SChris Mason btrfs_block_release(root, right_buf); 364bb803951SChris Mason right_buf = NULL; 365bb803951SChris Mason right = NULL; 366e089f05cSChris Mason wret = del_ptr(trans, root, path, level + 1, pslot + 367e089f05cSChris Mason 1); 368bb803951SChris Mason if (wret) 369bb803951SChris Mason ret = wret; 370e089f05cSChris Mason wret = btrfs_free_extent(trans, root, blocknr, 1, 1); 371bb803951SChris Mason if (wret) 372bb803951SChris Mason ret = wret; 373bb803951SChris Mason } else { 374d6025579SChris Mason btrfs_memcpy(root, parent, 375d6025579SChris Mason &parent->ptrs[pslot + 1].key, 376123abc88SChris Mason &right->ptrs[0].key, 377e2fa7227SChris Mason sizeof(struct btrfs_disk_key)); 378d6025579SChris Mason btrfs_mark_buffer_dirty(parent_buf); 379bb803951SChris Mason } 380bb803951SChris Mason } 3817518a238SChris Mason if (btrfs_header_nritems(&mid->header) == 1) { 38279f95c82SChris Mason /* 38379f95c82SChris Mason * we're not allowed to leave a node with one item in the 38479f95c82SChris Mason * tree during a delete. A deletion from lower in the tree 38579f95c82SChris Mason * could try to delete the only pointer in this node. 38679f95c82SChris Mason * So, pull some keys from the left. 38779f95c82SChris Mason * There has to be a left pointer at this point because 38879f95c82SChris Mason * otherwise we would have pulled some pointers from the 38979f95c82SChris Mason * right 39079f95c82SChris Mason */ 39179f95c82SChris Mason BUG_ON(!left_buf); 392e089f05cSChris Mason wret = balance_node_right(trans, root, mid_buf, left_buf); 39379f95c82SChris Mason if (wret < 0) 39479f95c82SChris Mason ret = wret; 39579f95c82SChris Mason BUG_ON(wret == 1); 39679f95c82SChris Mason } 3977518a238SChris Mason if (btrfs_header_nritems(&mid->header) == 0) { 39879f95c82SChris Mason /* we've managed to empty the middle node, drop it */ 399e20d96d6SChris Mason u64 blocknr = mid_buf->b_blocknr; 400e089f05cSChris Mason clean_tree_block(trans, root, mid_buf); 401d6025579SChris Mason wait_on_buffer(mid_buf); 402d6025579SChris Mason btrfs_block_release(root, mid_buf); 403bb803951SChris Mason mid_buf = NULL; 404bb803951SChris Mason mid = NULL; 405e089f05cSChris Mason wret = del_ptr(trans, root, path, level + 1, pslot); 406bb803951SChris Mason if (wret) 407bb803951SChris Mason ret = wret; 408e089f05cSChris Mason wret = btrfs_free_extent(trans, root, blocknr, 1, 1); 409bb803951SChris Mason if (wret) 410bb803951SChris Mason ret = wret; 41179f95c82SChris Mason } else { 41279f95c82SChris Mason /* update the parent key to reflect our changes */ 413d6025579SChris Mason btrfs_memcpy(root, parent, 414d6025579SChris Mason &parent->ptrs[pslot].key, &mid->ptrs[0].key, 415e2fa7227SChris Mason sizeof(struct btrfs_disk_key)); 416d6025579SChris Mason btrfs_mark_buffer_dirty(parent_buf); 41779f95c82SChris Mason } 418bb803951SChris Mason 41979f95c82SChris Mason /* update the path */ 420bb803951SChris Mason if (left_buf) { 4217518a238SChris Mason if (btrfs_header_nritems(&left->header) > orig_slot) { 422e20d96d6SChris Mason get_bh(left_buf); 423bb803951SChris Mason path->nodes[level] = left_buf; 424bb803951SChris Mason path->slots[level + 1] -= 1; 425bb803951SChris Mason path->slots[level] = orig_slot; 426bb803951SChris Mason if (mid_buf) 427234b63a0SChris Mason btrfs_block_release(root, mid_buf); 428bb803951SChris Mason } else { 4297518a238SChris Mason orig_slot -= btrfs_header_nritems(&left->header); 430bb803951SChris Mason path->slots[level] = orig_slot; 431bb803951SChris Mason } 432bb803951SChris Mason } 43379f95c82SChris Mason /* double check we haven't messed things up */ 434123abc88SChris Mason check_block(root, path, level); 435e20d96d6SChris Mason if (orig_ptr != 436e20d96d6SChris Mason btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]), 4371d4f8a0cSChris Mason path->slots[level])) 43879f95c82SChris Mason BUG(); 439bb803951SChris Mason 440bb803951SChris Mason if (right_buf) 441234b63a0SChris Mason btrfs_block_release(root, right_buf); 442bb803951SChris Mason if (left_buf) 443234b63a0SChris Mason btrfs_block_release(root, left_buf); 444bb803951SChris Mason return ret; 445bb803951SChris Mason } 446bb803951SChris Mason 44774123bd7SChris Mason /* 44874123bd7SChris Mason * look for key in the tree. path is filled in with nodes along the way 44974123bd7SChris Mason * if key is found, we return zero and you can find the item in the leaf 45074123bd7SChris Mason * level of the path (level 0) 45174123bd7SChris Mason * 45274123bd7SChris Mason * If the key isn't found, the path points to the slot where it should 453aa5d6bedSChris Mason * be inserted, and 1 is returned. If there are other errors during the 454aa5d6bedSChris Mason * search a negative error number is returned. 45597571fd0SChris Mason * 45697571fd0SChris Mason * if ins_len > 0, nodes and leaves will be split as we walk down the 45797571fd0SChris Mason * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if 45897571fd0SChris Mason * possible) 45974123bd7SChris Mason */ 460e089f05cSChris Mason int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root 461e089f05cSChris Mason *root, struct btrfs_key *key, struct btrfs_path *p, int 462e089f05cSChris Mason ins_len, int cow) 463be0e5c09SChris Mason { 464e20d96d6SChris Mason struct buffer_head *b; 465e20d96d6SChris Mason struct buffer_head *cow_buf; 466234b63a0SChris Mason struct btrfs_node *c; 467be0e5c09SChris Mason int slot; 468be0e5c09SChris Mason int ret; 469be0e5c09SChris Mason int level; 4705c680ed6SChris Mason 47122b0ebdaSChris Mason WARN_ON(p->nodes[0] != NULL); 47222b0ebdaSChris Mason WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex)); 473bb803951SChris Mason again: 474bb803951SChris Mason b = root->node; 475e20d96d6SChris Mason get_bh(b); 476eb60ceacSChris Mason while (b) { 477e20d96d6SChris Mason c = btrfs_buffer_node(b); 478e20d96d6SChris Mason level = btrfs_header_level(&c->header); 47902217ed2SChris Mason if (cow) { 48002217ed2SChris Mason int wret; 481e20d96d6SChris Mason wret = btrfs_cow_block(trans, root, b, 482e20d96d6SChris Mason p->nodes[level + 1], 483e20d96d6SChris Mason p->slots[level + 1], 484e089f05cSChris Mason &cow_buf); 48502217ed2SChris Mason b = cow_buf; 4862c90e5d6SChris Mason c = btrfs_buffer_node(b); 48702217ed2SChris Mason } 48802217ed2SChris Mason BUG_ON(!cow && ins_len); 4892c90e5d6SChris Mason if (level != btrfs_header_level(&c->header)) 4902c90e5d6SChris Mason WARN_ON(1); 4912c90e5d6SChris Mason level = btrfs_header_level(&c->header); 492eb60ceacSChris Mason p->nodes[level] = b; 493123abc88SChris Mason ret = check_block(root, p, level); 494aa5d6bedSChris Mason if (ret) 495aa5d6bedSChris Mason return -1; 496be0e5c09SChris Mason ret = bin_search(c, key, &slot); 4977518a238SChris Mason if (!btrfs_is_leaf(c)) { 498be0e5c09SChris Mason if (ret && slot > 0) 499be0e5c09SChris Mason slot -= 1; 500be0e5c09SChris Mason p->slots[level] = slot; 501*d4dbff95SChris Mason if (ins_len > 0 && btrfs_header_nritems(&c->header) >= 502*d4dbff95SChris Mason BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { 503e089f05cSChris Mason int sret = split_node(trans, root, p, level); 5045c680ed6SChris Mason BUG_ON(sret > 0); 5055c680ed6SChris Mason if (sret) 5065c680ed6SChris Mason return sret; 5075c680ed6SChris Mason b = p->nodes[level]; 508e20d96d6SChris Mason c = btrfs_buffer_node(b); 5095c680ed6SChris Mason slot = p->slots[level]; 510bb803951SChris Mason } else if (ins_len < 0) { 511e089f05cSChris Mason int sret = balance_level(trans, root, p, 512e089f05cSChris Mason level); 513bb803951SChris Mason if (sret) 514bb803951SChris Mason return sret; 515bb803951SChris Mason b = p->nodes[level]; 516bb803951SChris Mason if (!b) 517bb803951SChris Mason goto again; 518e20d96d6SChris Mason c = btrfs_buffer_node(b); 519bb803951SChris Mason slot = p->slots[level]; 5207518a238SChris Mason BUG_ON(btrfs_header_nritems(&c->header) == 1); 5215c680ed6SChris Mason } 5221d4f8a0cSChris Mason b = read_tree_block(root, btrfs_node_blockptr(c, slot)); 523be0e5c09SChris Mason } else { 524234b63a0SChris Mason struct btrfs_leaf *l = (struct btrfs_leaf *)c; 525be0e5c09SChris Mason p->slots[level] = slot; 526123abc88SChris Mason if (ins_len > 0 && btrfs_leaf_free_space(root, l) < 5270783fcfcSChris Mason sizeof(struct btrfs_item) + ins_len) { 528*d4dbff95SChris Mason int sret = split_leaf(trans, root, key, 529*d4dbff95SChris Mason p, ins_len); 5305c680ed6SChris Mason BUG_ON(sret > 0); 5315c680ed6SChris Mason if (sret) 5325c680ed6SChris Mason return sret; 5335c680ed6SChris Mason } 534be0e5c09SChris Mason return ret; 535be0e5c09SChris Mason } 536be0e5c09SChris Mason } 537aa5d6bedSChris Mason return 1; 538be0e5c09SChris Mason } 539be0e5c09SChris Mason 54074123bd7SChris Mason /* 54174123bd7SChris Mason * adjust the pointers going up the tree, starting at level 54274123bd7SChris Mason * making sure the right key of each node is points to 'key'. 54374123bd7SChris Mason * This is used after shifting pointers to the left, so it stops 54474123bd7SChris Mason * fixing up pointers when a given leaf/node is not in slot 0 of the 54574123bd7SChris Mason * higher levels 546aa5d6bedSChris Mason * 547aa5d6bedSChris Mason * If this fails to write a tree block, it returns -1, but continues 548aa5d6bedSChris Mason * fixing up the blocks in ram so the tree is consistent. 54974123bd7SChris Mason */ 550e089f05cSChris Mason static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root 551e089f05cSChris Mason *root, struct btrfs_path *path, struct btrfs_disk_key 552e089f05cSChris Mason *key, int level) 553be0e5c09SChris Mason { 554be0e5c09SChris Mason int i; 555aa5d6bedSChris Mason int ret = 0; 556234b63a0SChris Mason for (i = level; i < BTRFS_MAX_LEVEL; i++) { 557234b63a0SChris Mason struct btrfs_node *t; 558be0e5c09SChris Mason int tslot = path->slots[i]; 559eb60ceacSChris Mason if (!path->nodes[i]) 560be0e5c09SChris Mason break; 561e20d96d6SChris Mason t = btrfs_buffer_node(path->nodes[i]); 562d6025579SChris Mason btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key)); 563d6025579SChris Mason btrfs_mark_buffer_dirty(path->nodes[i]); 564be0e5c09SChris Mason if (tslot != 0) 565be0e5c09SChris Mason break; 566be0e5c09SChris Mason } 567aa5d6bedSChris Mason return ret; 568be0e5c09SChris Mason } 569be0e5c09SChris Mason 57074123bd7SChris Mason /* 57174123bd7SChris Mason * try to push data from one node into the next node left in the 57279f95c82SChris Mason * tree. 573aa5d6bedSChris Mason * 574aa5d6bedSChris Mason * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 575aa5d6bedSChris Mason * error, and > 0 if there was no room in the left hand block. 57674123bd7SChris Mason */ 577e089f05cSChris Mason static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root 578e20d96d6SChris Mason *root, struct buffer_head *dst_buf, struct 579e20d96d6SChris Mason buffer_head *src_buf) 580be0e5c09SChris Mason { 581e20d96d6SChris Mason struct btrfs_node *src = btrfs_buffer_node(src_buf); 582e20d96d6SChris Mason struct btrfs_node *dst = btrfs_buffer_node(dst_buf); 583be0e5c09SChris Mason int push_items = 0; 584bb803951SChris Mason int src_nritems; 585bb803951SChris Mason int dst_nritems; 586aa5d6bedSChris Mason int ret = 0; 587be0e5c09SChris Mason 5887518a238SChris Mason src_nritems = btrfs_header_nritems(&src->header); 5897518a238SChris Mason dst_nritems = btrfs_header_nritems(&dst->header); 590123abc88SChris Mason push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 591eb60ceacSChris Mason if (push_items <= 0) { 592be0e5c09SChris Mason return 1; 593eb60ceacSChris Mason } 594be0e5c09SChris Mason 595be0e5c09SChris Mason if (src_nritems < push_items) 596be0e5c09SChris Mason push_items = src_nritems; 59779f95c82SChris Mason 598d6025579SChris Mason btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs, 599123abc88SChris Mason push_items * sizeof(struct btrfs_key_ptr)); 600bb803951SChris Mason if (push_items < src_nritems) { 601d6025579SChris Mason btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items, 602e2fa7227SChris Mason (src_nritems - push_items) * 603123abc88SChris Mason sizeof(struct btrfs_key_ptr)); 604bb803951SChris Mason } 6057518a238SChris Mason btrfs_set_header_nritems(&src->header, src_nritems - push_items); 6067518a238SChris Mason btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); 607d6025579SChris Mason btrfs_mark_buffer_dirty(src_buf); 608d6025579SChris Mason btrfs_mark_buffer_dirty(dst_buf); 609bb803951SChris Mason return ret; 610be0e5c09SChris Mason } 611be0e5c09SChris Mason 61297571fd0SChris Mason /* 61379f95c82SChris Mason * try to push data from one node into the next node right in the 61479f95c82SChris Mason * tree. 61579f95c82SChris Mason * 61679f95c82SChris Mason * returns 0 if some ptrs were pushed, < 0 if there was some horrible 61779f95c82SChris Mason * error, and > 0 if there was no room in the right hand block. 61879f95c82SChris Mason * 61979f95c82SChris Mason * this will only push up to 1/2 the contents of the left node over 62079f95c82SChris Mason */ 621e089f05cSChris Mason static int balance_node_right(struct btrfs_trans_handle *trans, struct 622e20d96d6SChris Mason btrfs_root *root, struct buffer_head *dst_buf, 623e20d96d6SChris Mason struct buffer_head *src_buf) 62479f95c82SChris Mason { 625e20d96d6SChris Mason struct btrfs_node *src = btrfs_buffer_node(src_buf); 626e20d96d6SChris Mason struct btrfs_node *dst = btrfs_buffer_node(dst_buf); 62779f95c82SChris Mason int push_items = 0; 62879f95c82SChris Mason int max_push; 62979f95c82SChris Mason int src_nritems; 63079f95c82SChris Mason int dst_nritems; 63179f95c82SChris Mason int ret = 0; 63279f95c82SChris Mason 6337518a238SChris Mason src_nritems = btrfs_header_nritems(&src->header); 6347518a238SChris Mason dst_nritems = btrfs_header_nritems(&dst->header); 635123abc88SChris Mason push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems; 63679f95c82SChris Mason if (push_items <= 0) { 63779f95c82SChris Mason return 1; 63879f95c82SChris Mason } 63979f95c82SChris Mason 64079f95c82SChris Mason max_push = src_nritems / 2 + 1; 64179f95c82SChris Mason /* don't try to empty the node */ 64279f95c82SChris Mason if (max_push > src_nritems) 64379f95c82SChris Mason return 1; 64479f95c82SChris Mason if (max_push < push_items) 64579f95c82SChris Mason push_items = max_push; 64679f95c82SChris Mason 647d6025579SChris Mason btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs, 648123abc88SChris Mason dst_nritems * sizeof(struct btrfs_key_ptr)); 649d6025579SChris Mason 650d6025579SChris Mason btrfs_memcpy(root, dst, dst->ptrs, 651d6025579SChris Mason src->ptrs + src_nritems - push_items, 652123abc88SChris Mason push_items * sizeof(struct btrfs_key_ptr)); 65379f95c82SChris Mason 6547518a238SChris Mason btrfs_set_header_nritems(&src->header, src_nritems - push_items); 6557518a238SChris Mason btrfs_set_header_nritems(&dst->header, dst_nritems + push_items); 65679f95c82SChris Mason 657d6025579SChris Mason btrfs_mark_buffer_dirty(src_buf); 658d6025579SChris Mason btrfs_mark_buffer_dirty(dst_buf); 65979f95c82SChris Mason return ret; 66079f95c82SChris Mason } 66179f95c82SChris Mason 66279f95c82SChris Mason /* 66397571fd0SChris Mason * helper function to insert a new root level in the tree. 66497571fd0SChris Mason * A new node is allocated, and a single item is inserted to 66597571fd0SChris Mason * point to the existing root 666aa5d6bedSChris Mason * 667aa5d6bedSChris Mason * returns zero on success or < 0 on failure. 66897571fd0SChris Mason */ 669e089f05cSChris Mason static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root 670e089f05cSChris Mason *root, struct btrfs_path *path, int level) 67174123bd7SChris Mason { 672e20d96d6SChris Mason struct buffer_head *t; 673234b63a0SChris Mason struct btrfs_node *lower; 674234b63a0SChris Mason struct btrfs_node *c; 675e2fa7227SChris Mason struct btrfs_disk_key *lower_key; 6765c680ed6SChris Mason 6775c680ed6SChris Mason BUG_ON(path->nodes[level]); 6785c680ed6SChris Mason BUG_ON(path->nodes[level-1] != root->node); 6795c680ed6SChris Mason 680e089f05cSChris Mason t = btrfs_alloc_free_block(trans, root); 681e20d96d6SChris Mason c = btrfs_buffer_node(t); 682123abc88SChris Mason memset(c, 0, root->blocksize); 6837518a238SChris Mason btrfs_set_header_nritems(&c->header, 1); 6847518a238SChris Mason btrfs_set_header_level(&c->header, level); 685e20d96d6SChris Mason btrfs_set_header_blocknr(&c->header, t->b_blocknr); 6867f5c1516SChris Mason btrfs_set_header_generation(&c->header, trans->transid); 6877518a238SChris Mason btrfs_set_header_parentid(&c->header, 688e20d96d6SChris Mason btrfs_header_parentid(btrfs_buffer_header(root->node))); 689e20d96d6SChris Mason lower = btrfs_buffer_node(path->nodes[level-1]); 6907518a238SChris Mason if (btrfs_is_leaf(lower)) 691234b63a0SChris Mason lower_key = &((struct btrfs_leaf *)lower)->items[0].key; 69274123bd7SChris Mason else 693123abc88SChris Mason lower_key = &lower->ptrs[0].key; 694d6025579SChris Mason btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key, 695d6025579SChris Mason sizeof(struct btrfs_disk_key)); 696e20d96d6SChris Mason btrfs_set_node_blockptr(c, 0, path->nodes[level - 1]->b_blocknr); 697d5719762SChris Mason 698d6025579SChris Mason btrfs_mark_buffer_dirty(t); 699d5719762SChris Mason 700cfaa7295SChris Mason /* the super has an extra ref to root->node */ 701234b63a0SChris Mason btrfs_block_release(root, root->node); 70274123bd7SChris Mason root->node = t; 703e20d96d6SChris Mason get_bh(t); 70474123bd7SChris Mason path->nodes[level] = t; 70574123bd7SChris Mason path->slots[level] = 0; 70674123bd7SChris Mason return 0; 70774123bd7SChris Mason } 7085c680ed6SChris Mason 7095c680ed6SChris Mason /* 7105c680ed6SChris Mason * worker function to insert a single pointer in a node. 7115c680ed6SChris Mason * the node should have enough room for the pointer already 71297571fd0SChris Mason * 7135c680ed6SChris Mason * slot and level indicate where you want the key to go, and 7145c680ed6SChris Mason * blocknr is the block the key points to. 715aa5d6bedSChris Mason * 716aa5d6bedSChris Mason * returns zero on success and < 0 on any error 7175c680ed6SChris Mason */ 718e089f05cSChris Mason static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root 719e089f05cSChris Mason *root, struct btrfs_path *path, struct btrfs_disk_key 720e089f05cSChris Mason *key, u64 blocknr, int slot, int level) 7215c680ed6SChris Mason { 722234b63a0SChris Mason struct btrfs_node *lower; 7235c680ed6SChris Mason int nritems; 7245c680ed6SChris Mason 7255c680ed6SChris Mason BUG_ON(!path->nodes[level]); 726e20d96d6SChris Mason lower = btrfs_buffer_node(path->nodes[level]); 7277518a238SChris Mason nritems = btrfs_header_nritems(&lower->header); 72874123bd7SChris Mason if (slot > nritems) 72974123bd7SChris Mason BUG(); 730123abc88SChris Mason if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root)) 73174123bd7SChris Mason BUG(); 73274123bd7SChris Mason if (slot != nritems) { 733d6025579SChris Mason btrfs_memmove(root, lower, lower->ptrs + slot + 1, 734d6025579SChris Mason lower->ptrs + slot, 735123abc88SChris Mason (nritems - slot) * sizeof(struct btrfs_key_ptr)); 73674123bd7SChris Mason } 737d6025579SChris Mason btrfs_memcpy(root, lower, &lower->ptrs[slot].key, 738d6025579SChris Mason key, sizeof(struct btrfs_disk_key)); 7391d4f8a0cSChris Mason btrfs_set_node_blockptr(lower, slot, blocknr); 7407518a238SChris Mason btrfs_set_header_nritems(&lower->header, nritems + 1); 741d6025579SChris Mason btrfs_mark_buffer_dirty(path->nodes[level]); 74274123bd7SChris Mason return 0; 74374123bd7SChris Mason } 74474123bd7SChris Mason 74597571fd0SChris Mason /* 74697571fd0SChris Mason * split the node at the specified level in path in two. 74797571fd0SChris Mason * The path is corrected to point to the appropriate node after the split 74897571fd0SChris Mason * 74997571fd0SChris Mason * Before splitting this tries to make some room in the node by pushing 75097571fd0SChris Mason * left and right, if either one works, it returns right away. 751aa5d6bedSChris Mason * 752aa5d6bedSChris Mason * returns 0 on success and < 0 on failure 75397571fd0SChris Mason */ 754e089f05cSChris Mason static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 755e089f05cSChris Mason *root, struct btrfs_path *path, int level) 756be0e5c09SChris Mason { 757e20d96d6SChris Mason struct buffer_head *t; 758234b63a0SChris Mason struct btrfs_node *c; 759e20d96d6SChris Mason struct buffer_head *split_buffer; 760234b63a0SChris Mason struct btrfs_node *split; 761be0e5c09SChris Mason int mid; 7625c680ed6SChris Mason int ret; 763aa5d6bedSChris Mason int wret; 7647518a238SChris Mason u32 c_nritems; 765be0e5c09SChris Mason 7665c680ed6SChris Mason t = path->nodes[level]; 767e20d96d6SChris Mason c = btrfs_buffer_node(t); 7685c680ed6SChris Mason if (t == root->node) { 7695c680ed6SChris Mason /* trying to split the root, lets make a new one */ 770e089f05cSChris Mason ret = insert_new_root(trans, root, path, level + 1); 7715c680ed6SChris Mason if (ret) 7725c680ed6SChris Mason return ret; 7735c680ed6SChris Mason } 7747518a238SChris Mason c_nritems = btrfs_header_nritems(&c->header); 775e089f05cSChris Mason split_buffer = btrfs_alloc_free_block(trans, root); 776e20d96d6SChris Mason split = btrfs_buffer_node(split_buffer); 7777518a238SChris Mason btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header)); 7789a6f11edSChris Mason btrfs_set_header_level(&split->header, btrfs_header_level(&c->header)); 779e20d96d6SChris Mason btrfs_set_header_blocknr(&split->header, split_buffer->b_blocknr); 7807f5c1516SChris Mason btrfs_set_header_generation(&split->header, trans->transid); 7817518a238SChris Mason btrfs_set_header_parentid(&split->header, 782e20d96d6SChris Mason btrfs_header_parentid(btrfs_buffer_header(root->node))); 7837518a238SChris Mason mid = (c_nritems + 1) / 2; 784d6025579SChris Mason btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid, 785123abc88SChris Mason (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 7867518a238SChris Mason btrfs_set_header_nritems(&split->header, c_nritems - mid); 7877518a238SChris Mason btrfs_set_header_nritems(&c->header, mid); 788aa5d6bedSChris Mason ret = 0; 789aa5d6bedSChris Mason 790d6025579SChris Mason btrfs_mark_buffer_dirty(t); 791d6025579SChris Mason btrfs_mark_buffer_dirty(split_buffer); 792e089f05cSChris Mason wret = insert_ptr(trans, root, path, &split->ptrs[0].key, 793e20d96d6SChris Mason split_buffer->b_blocknr, path->slots[level + 1] + 1, 794123abc88SChris Mason level + 1); 795aa5d6bedSChris Mason if (wret) 796aa5d6bedSChris Mason ret = wret; 797aa5d6bedSChris Mason 7985de08d7dSChris Mason if (path->slots[level] >= mid) { 7995c680ed6SChris Mason path->slots[level] -= mid; 800234b63a0SChris Mason btrfs_block_release(root, t); 8015c680ed6SChris Mason path->nodes[level] = split_buffer; 8025c680ed6SChris Mason path->slots[level + 1] += 1; 803eb60ceacSChris Mason } else { 804234b63a0SChris Mason btrfs_block_release(root, split_buffer); 805be0e5c09SChris Mason } 806aa5d6bedSChris Mason return ret; 807be0e5c09SChris Mason } 808be0e5c09SChris Mason 80974123bd7SChris Mason /* 81074123bd7SChris Mason * how many bytes are required to store the items in a leaf. start 81174123bd7SChris Mason * and nr indicate which items in the leaf to check. This totals up the 81274123bd7SChris Mason * space used both by the item structs and the item data 81374123bd7SChris Mason */ 814234b63a0SChris Mason static int leaf_space_used(struct btrfs_leaf *l, int start, int nr) 815be0e5c09SChris Mason { 816be0e5c09SChris Mason int data_len; 817*d4dbff95SChris Mason int nritems = btrfs_header_nritems(&l->header); 818*d4dbff95SChris Mason int end = min(nritems, start + nr) - 1; 819be0e5c09SChris Mason 820be0e5c09SChris Mason if (!nr) 821be0e5c09SChris Mason return 0; 8220783fcfcSChris Mason data_len = btrfs_item_end(l->items + start); 8230783fcfcSChris Mason data_len = data_len - btrfs_item_offset(l->items + end); 8240783fcfcSChris Mason data_len += sizeof(struct btrfs_item) * nr; 825*d4dbff95SChris Mason WARN_ON(data_len < 0); 826be0e5c09SChris Mason return data_len; 827be0e5c09SChris Mason } 828be0e5c09SChris Mason 82974123bd7SChris Mason /* 830*d4dbff95SChris Mason * The space between the end of the leaf items and 831*d4dbff95SChris Mason * the start of the leaf data. IOW, how much room 832*d4dbff95SChris Mason * the leaf has left for both items and data 833*d4dbff95SChris Mason */ 834*d4dbff95SChris Mason int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf) 835*d4dbff95SChris Mason { 836*d4dbff95SChris Mason int nritems = btrfs_header_nritems(&leaf->header); 837*d4dbff95SChris Mason return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems); 838*d4dbff95SChris Mason } 839*d4dbff95SChris Mason 840*d4dbff95SChris Mason /* 84100ec4c51SChris Mason * push some data in the path leaf to the right, trying to free up at 84200ec4c51SChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 843aa5d6bedSChris Mason * 844aa5d6bedSChris Mason * returns 1 if the push failed because the other node didn't have enough 845aa5d6bedSChris Mason * room, 0 if everything worked out and < 0 if there were major errors. 84600ec4c51SChris Mason */ 847e089f05cSChris Mason static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 848e089f05cSChris Mason *root, struct btrfs_path *path, int data_size) 84900ec4c51SChris Mason { 850e20d96d6SChris Mason struct buffer_head *left_buf = path->nodes[0]; 851e20d96d6SChris Mason struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf); 852234b63a0SChris Mason struct btrfs_leaf *right; 853e20d96d6SChris Mason struct buffer_head *right_buf; 854e20d96d6SChris Mason struct buffer_head *upper; 855e20d96d6SChris Mason struct btrfs_node *upper_node; 85600ec4c51SChris Mason int slot; 85700ec4c51SChris Mason int i; 85800ec4c51SChris Mason int free_space; 85900ec4c51SChris Mason int push_space = 0; 86000ec4c51SChris Mason int push_items = 0; 8610783fcfcSChris Mason struct btrfs_item *item; 8627518a238SChris Mason u32 left_nritems; 8637518a238SChris Mason u32 right_nritems; 86400ec4c51SChris Mason 86500ec4c51SChris Mason slot = path->slots[1]; 86600ec4c51SChris Mason if (!path->nodes[1]) { 86700ec4c51SChris Mason return 1; 86800ec4c51SChris Mason } 86900ec4c51SChris Mason upper = path->nodes[1]; 870e20d96d6SChris Mason upper_node = btrfs_buffer_node(upper); 871e20d96d6SChris Mason if (slot >= btrfs_header_nritems(&upper_node->header) - 1) { 87200ec4c51SChris Mason return 1; 87300ec4c51SChris Mason } 874e20d96d6SChris Mason right_buf = read_tree_block(root, 875e20d96d6SChris Mason btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1)); 876e20d96d6SChris Mason right = btrfs_buffer_leaf(right_buf); 877123abc88SChris Mason free_space = btrfs_leaf_free_space(root, right); 8780783fcfcSChris Mason if (free_space < data_size + sizeof(struct btrfs_item)) { 879234b63a0SChris Mason btrfs_block_release(root, right_buf); 88000ec4c51SChris Mason return 1; 88100ec4c51SChris Mason } 88202217ed2SChris Mason /* cow and double check */ 883e089f05cSChris Mason btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf); 884e20d96d6SChris Mason right = btrfs_buffer_leaf(right_buf); 885123abc88SChris Mason free_space = btrfs_leaf_free_space(root, right); 8860783fcfcSChris Mason if (free_space < data_size + sizeof(struct btrfs_item)) { 887234b63a0SChris Mason btrfs_block_release(root, right_buf); 88802217ed2SChris Mason return 1; 88902217ed2SChris Mason } 89002217ed2SChris Mason 8917518a238SChris Mason left_nritems = btrfs_header_nritems(&left->header); 8927518a238SChris Mason for (i = left_nritems - 1; i >= 0; i--) { 89300ec4c51SChris Mason item = left->items + i; 89400ec4c51SChris Mason if (path->slots[0] == i) 89500ec4c51SChris Mason push_space += data_size + sizeof(*item); 8960783fcfcSChris Mason if (btrfs_item_size(item) + sizeof(*item) + push_space > 8970783fcfcSChris Mason free_space) 89800ec4c51SChris Mason break; 89900ec4c51SChris Mason push_items++; 9000783fcfcSChris Mason push_space += btrfs_item_size(item) + sizeof(*item); 90100ec4c51SChris Mason } 90200ec4c51SChris Mason if (push_items == 0) { 903234b63a0SChris Mason btrfs_block_release(root, right_buf); 90400ec4c51SChris Mason return 1; 90500ec4c51SChris Mason } 9067518a238SChris Mason right_nritems = btrfs_header_nritems(&right->header); 90700ec4c51SChris Mason /* push left to right */ 9080783fcfcSChris Mason push_space = btrfs_item_end(left->items + left_nritems - push_items); 909123abc88SChris Mason push_space -= leaf_data_end(root, left); 91000ec4c51SChris Mason /* make room in the right data area */ 911d6025579SChris Mason btrfs_memmove(root, right, btrfs_leaf_data(right) + 912d6025579SChris Mason leaf_data_end(root, right) - push_space, 913d6025579SChris Mason btrfs_leaf_data(right) + 914d6025579SChris Mason leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) - 915d6025579SChris Mason leaf_data_end(root, right)); 91600ec4c51SChris Mason /* copy from the left data area */ 917d6025579SChris Mason btrfs_memcpy(root, right, btrfs_leaf_data(right) + 918d6025579SChris Mason BTRFS_LEAF_DATA_SIZE(root) - push_space, 919d6025579SChris Mason btrfs_leaf_data(left) + leaf_data_end(root, left), 920d6025579SChris Mason push_space); 921d6025579SChris Mason btrfs_memmove(root, right, right->items + push_items, right->items, 9220783fcfcSChris Mason right_nritems * sizeof(struct btrfs_item)); 92300ec4c51SChris Mason /* copy the items from left to right */ 924d6025579SChris Mason btrfs_memcpy(root, right, right->items, left->items + 925d6025579SChris Mason left_nritems - push_items, 9260783fcfcSChris Mason push_items * sizeof(struct btrfs_item)); 92700ec4c51SChris Mason 92800ec4c51SChris Mason /* update the item pointers */ 9297518a238SChris Mason right_nritems += push_items; 9307518a238SChris Mason btrfs_set_header_nritems(&right->header, right_nritems); 931123abc88SChris Mason push_space = BTRFS_LEAF_DATA_SIZE(root); 9327518a238SChris Mason for (i = 0; i < right_nritems; i++) { 9330783fcfcSChris Mason btrfs_set_item_offset(right->items + i, push_space - 9340783fcfcSChris Mason btrfs_item_size(right->items + i)); 9350783fcfcSChris Mason push_space = btrfs_item_offset(right->items + i); 93600ec4c51SChris Mason } 9377518a238SChris Mason left_nritems -= push_items; 9387518a238SChris Mason btrfs_set_header_nritems(&left->header, left_nritems); 93900ec4c51SChris Mason 940d6025579SChris Mason btrfs_mark_buffer_dirty(left_buf); 941d6025579SChris Mason btrfs_mark_buffer_dirty(right_buf); 942d6025579SChris Mason btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key, 943e2fa7227SChris Mason &right->items[0].key, sizeof(struct btrfs_disk_key)); 944d6025579SChris Mason btrfs_mark_buffer_dirty(upper); 94502217ed2SChris Mason 94600ec4c51SChris Mason /* then fixup the leaf pointer in the path */ 9477518a238SChris Mason if (path->slots[0] >= left_nritems) { 9487518a238SChris Mason path->slots[0] -= left_nritems; 949234b63a0SChris Mason btrfs_block_release(root, path->nodes[0]); 95000ec4c51SChris Mason path->nodes[0] = right_buf; 95100ec4c51SChris Mason path->slots[1] += 1; 95200ec4c51SChris Mason } else { 953234b63a0SChris Mason btrfs_block_release(root, right_buf); 95400ec4c51SChris Mason } 95500ec4c51SChris Mason return 0; 95600ec4c51SChris Mason } 95700ec4c51SChris Mason /* 95874123bd7SChris Mason * push some data in the path leaf to the left, trying to free up at 95974123bd7SChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 96074123bd7SChris Mason */ 961e089f05cSChris Mason static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 962e089f05cSChris Mason *root, struct btrfs_path *path, int data_size) 963be0e5c09SChris Mason { 964e20d96d6SChris Mason struct buffer_head *right_buf = path->nodes[0]; 965e20d96d6SChris Mason struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf); 966e20d96d6SChris Mason struct buffer_head *t; 967234b63a0SChris Mason struct btrfs_leaf *left; 968be0e5c09SChris Mason int slot; 969be0e5c09SChris Mason int i; 970be0e5c09SChris Mason int free_space; 971be0e5c09SChris Mason int push_space = 0; 972be0e5c09SChris Mason int push_items = 0; 9730783fcfcSChris Mason struct btrfs_item *item; 9747518a238SChris Mason u32 old_left_nritems; 975aa5d6bedSChris Mason int ret = 0; 976aa5d6bedSChris Mason int wret; 977be0e5c09SChris Mason 978be0e5c09SChris Mason slot = path->slots[1]; 979be0e5c09SChris Mason if (slot == 0) { 980be0e5c09SChris Mason return 1; 981be0e5c09SChris Mason } 982be0e5c09SChris Mason if (!path->nodes[1]) { 983be0e5c09SChris Mason return 1; 984be0e5c09SChris Mason } 985e20d96d6SChris Mason t = read_tree_block(root, 986e20d96d6SChris Mason btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1)); 987e20d96d6SChris Mason left = btrfs_buffer_leaf(t); 988123abc88SChris Mason free_space = btrfs_leaf_free_space(root, left); 9890783fcfcSChris Mason if (free_space < data_size + sizeof(struct btrfs_item)) { 990234b63a0SChris Mason btrfs_block_release(root, t); 991be0e5c09SChris Mason return 1; 992be0e5c09SChris Mason } 99302217ed2SChris Mason 99402217ed2SChris Mason /* cow and double check */ 995e089f05cSChris Mason btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t); 996e20d96d6SChris Mason left = btrfs_buffer_leaf(t); 997123abc88SChris Mason free_space = btrfs_leaf_free_space(root, left); 9980783fcfcSChris Mason if (free_space < data_size + sizeof(struct btrfs_item)) { 999234b63a0SChris Mason btrfs_block_release(root, t); 100002217ed2SChris Mason return 1; 100102217ed2SChris Mason } 100202217ed2SChris Mason 10037518a238SChris Mason for (i = 0; i < btrfs_header_nritems(&right->header); i++) { 1004be0e5c09SChris Mason item = right->items + i; 1005be0e5c09SChris Mason if (path->slots[0] == i) 1006be0e5c09SChris Mason push_space += data_size + sizeof(*item); 10070783fcfcSChris Mason if (btrfs_item_size(item) + sizeof(*item) + push_space > 10080783fcfcSChris Mason free_space) 1009be0e5c09SChris Mason break; 1010be0e5c09SChris Mason push_items++; 10110783fcfcSChris Mason push_space += btrfs_item_size(item) + sizeof(*item); 1012be0e5c09SChris Mason } 1013be0e5c09SChris Mason if (push_items == 0) { 1014234b63a0SChris Mason btrfs_block_release(root, t); 1015be0e5c09SChris Mason return 1; 1016be0e5c09SChris Mason } 1017be0e5c09SChris Mason /* push data from right to left */ 1018d6025579SChris Mason btrfs_memcpy(root, left, left->items + 1019d6025579SChris Mason btrfs_header_nritems(&left->header), 10200783fcfcSChris Mason right->items, push_items * sizeof(struct btrfs_item)); 1021123abc88SChris Mason push_space = BTRFS_LEAF_DATA_SIZE(root) - 10220783fcfcSChris Mason btrfs_item_offset(right->items + push_items -1); 1023d6025579SChris Mason btrfs_memcpy(root, left, btrfs_leaf_data(left) + 1024d6025579SChris Mason leaf_data_end(root, left) - push_space, 1025123abc88SChris Mason btrfs_leaf_data(right) + 1026123abc88SChris Mason btrfs_item_offset(right->items + push_items - 1), 1027be0e5c09SChris Mason push_space); 10287518a238SChris Mason old_left_nritems = btrfs_header_nritems(&left->header); 1029eb60ceacSChris Mason BUG_ON(old_left_nritems < 0); 1030eb60ceacSChris Mason 1031be0e5c09SChris Mason for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 1032123abc88SChris Mason u32 ioff = btrfs_item_offset(left->items + i); 1033123abc88SChris Mason btrfs_set_item_offset(left->items + i, ioff - 1034123abc88SChris Mason (BTRFS_LEAF_DATA_SIZE(root) - 10350783fcfcSChris Mason btrfs_item_offset(left->items + 10360783fcfcSChris Mason old_left_nritems - 1))); 1037be0e5c09SChris Mason } 10387518a238SChris Mason btrfs_set_header_nritems(&left->header, old_left_nritems + push_items); 1039be0e5c09SChris Mason 1040be0e5c09SChris Mason /* fixup right node */ 10410783fcfcSChris Mason push_space = btrfs_item_offset(right->items + push_items - 1) - 1042123abc88SChris Mason leaf_data_end(root, right); 1043d6025579SChris Mason btrfs_memmove(root, right, btrfs_leaf_data(right) + 1044d6025579SChris Mason BTRFS_LEAF_DATA_SIZE(root) - push_space, 1045d6025579SChris Mason btrfs_leaf_data(right) + 1046123abc88SChris Mason leaf_data_end(root, right), push_space); 1047d6025579SChris Mason btrfs_memmove(root, right, right->items, right->items + push_items, 10487518a238SChris Mason (btrfs_header_nritems(&right->header) - push_items) * 10490783fcfcSChris Mason sizeof(struct btrfs_item)); 10507518a238SChris Mason btrfs_set_header_nritems(&right->header, 10517518a238SChris Mason btrfs_header_nritems(&right->header) - 10527518a238SChris Mason push_items); 1053123abc88SChris Mason push_space = BTRFS_LEAF_DATA_SIZE(root); 1054eb60ceacSChris Mason 10557518a238SChris Mason for (i = 0; i < btrfs_header_nritems(&right->header); i++) { 10560783fcfcSChris Mason btrfs_set_item_offset(right->items + i, push_space - 10570783fcfcSChris Mason btrfs_item_size(right->items + i)); 10580783fcfcSChris Mason push_space = btrfs_item_offset(right->items + i); 1059be0e5c09SChris Mason } 1060eb60ceacSChris Mason 1061d6025579SChris Mason btrfs_mark_buffer_dirty(t); 1062d6025579SChris Mason btrfs_mark_buffer_dirty(right_buf); 1063eb60ceacSChris Mason 1064e089f05cSChris Mason wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1); 1065aa5d6bedSChris Mason if (wret) 1066aa5d6bedSChris Mason ret = wret; 1067be0e5c09SChris Mason 1068be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 1069be0e5c09SChris Mason if (path->slots[0] < push_items) { 1070be0e5c09SChris Mason path->slots[0] += old_left_nritems; 1071234b63a0SChris Mason btrfs_block_release(root, path->nodes[0]); 1072eb60ceacSChris Mason path->nodes[0] = t; 1073be0e5c09SChris Mason path->slots[1] -= 1; 1074be0e5c09SChris Mason } else { 1075234b63a0SChris Mason btrfs_block_release(root, t); 1076be0e5c09SChris Mason path->slots[0] -= push_items; 1077be0e5c09SChris Mason } 1078eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 1079aa5d6bedSChris Mason return ret; 1080be0e5c09SChris Mason } 1081be0e5c09SChris Mason 108274123bd7SChris Mason /* 108374123bd7SChris Mason * split the path's leaf in two, making sure there is at least data_size 108474123bd7SChris Mason * available for the resulting leaf level of the path. 1085aa5d6bedSChris Mason * 1086aa5d6bedSChris Mason * returns 0 if all went well and < 0 on failure. 108774123bd7SChris Mason */ 1088e089f05cSChris Mason static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root 1089*d4dbff95SChris Mason *root, struct btrfs_key *ins_key, 1090*d4dbff95SChris Mason struct btrfs_path *path, int data_size) 1091be0e5c09SChris Mason { 1092e20d96d6SChris Mason struct buffer_head *l_buf; 1093234b63a0SChris Mason struct btrfs_leaf *l; 10947518a238SChris Mason u32 nritems; 1095eb60ceacSChris Mason int mid; 1096eb60ceacSChris Mason int slot; 1097234b63a0SChris Mason struct btrfs_leaf *right; 1098e20d96d6SChris Mason struct buffer_head *right_buffer; 10990783fcfcSChris Mason int space_needed = data_size + sizeof(struct btrfs_item); 1100be0e5c09SChris Mason int data_copy_size; 1101be0e5c09SChris Mason int rt_data_off; 1102be0e5c09SChris Mason int i; 1103*d4dbff95SChris Mason int ret = 0; 1104aa5d6bedSChris Mason int wret; 1105*d4dbff95SChris Mason int double_split = 0; 1106*d4dbff95SChris Mason struct btrfs_disk_key disk_key; 1107be0e5c09SChris Mason 110840689478SChris Mason /* first try to make some room by pushing left and right */ 1109e089f05cSChris Mason wret = push_leaf_left(trans, root, path, data_size); 1110eaee50e8SChris Mason if (wret < 0) 1111eaee50e8SChris Mason return wret; 1112eaee50e8SChris Mason if (wret) { 1113e089f05cSChris Mason wret = push_leaf_right(trans, root, path, data_size); 1114eaee50e8SChris Mason if (wret < 0) 1115eaee50e8SChris Mason return wret; 1116eaee50e8SChris Mason } 1117eb60ceacSChris Mason l_buf = path->nodes[0]; 1118e20d96d6SChris Mason l = btrfs_buffer_leaf(l_buf); 1119aa5d6bedSChris Mason 1120aa5d6bedSChris Mason /* did the pushes work? */ 1121123abc88SChris Mason if (btrfs_leaf_free_space(root, l) >= 1122123abc88SChris Mason sizeof(struct btrfs_item) + data_size) 1123be0e5c09SChris Mason return 0; 1124aa5d6bedSChris Mason 11255c680ed6SChris Mason if (!path->nodes[1]) { 1126e089f05cSChris Mason ret = insert_new_root(trans, root, path, 1); 11275c680ed6SChris Mason if (ret) 11285c680ed6SChris Mason return ret; 11295c680ed6SChris Mason } 1130eb60ceacSChris Mason slot = path->slots[0]; 11317518a238SChris Mason nritems = btrfs_header_nritems(&l->header); 1132eb60ceacSChris Mason mid = (nritems + 1)/ 2; 1133e089f05cSChris Mason right_buffer = btrfs_alloc_free_block(trans, root); 1134eb60ceacSChris Mason BUG_ON(!right_buffer); 1135e20d96d6SChris Mason right = btrfs_buffer_leaf(right_buffer); 1136123abc88SChris Mason memset(&right->header, 0, sizeof(right->header)); 1137e20d96d6SChris Mason btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); 11387f5c1516SChris Mason btrfs_set_header_generation(&right->header, trans->transid); 11397518a238SChris Mason btrfs_set_header_level(&right->header, 0); 11407518a238SChris Mason btrfs_set_header_parentid(&right->header, 1141e20d96d6SChris Mason btrfs_header_parentid(btrfs_buffer_header(root->node))); 1142*d4dbff95SChris Mason if (mid <= slot) { 1143*d4dbff95SChris Mason if (nritems == 1 || 1144*d4dbff95SChris Mason leaf_space_used(l, mid, nritems - mid) + space_needed > 1145*d4dbff95SChris Mason BTRFS_LEAF_DATA_SIZE(root)) { 1146*d4dbff95SChris Mason if (slot >= nritems) { 1147*d4dbff95SChris Mason btrfs_cpu_key_to_disk(&disk_key, ins_key); 1148*d4dbff95SChris Mason btrfs_set_header_nritems(&right->header, 0); 1149*d4dbff95SChris Mason wret = insert_ptr(trans, root, path, 1150*d4dbff95SChris Mason &disk_key, 1151*d4dbff95SChris Mason right_buffer->b_blocknr, 1152*d4dbff95SChris Mason path->slots[1] + 1, 1); 1153*d4dbff95SChris Mason if (wret) 1154*d4dbff95SChris Mason ret = wret; 1155*d4dbff95SChris Mason btrfs_block_release(root, path->nodes[0]); 1156*d4dbff95SChris Mason path->nodes[0] = right_buffer; 1157*d4dbff95SChris Mason path->slots[0] = 0; 1158*d4dbff95SChris Mason path->slots[1] += 1; 1159*d4dbff95SChris Mason return ret; 1160*d4dbff95SChris Mason } 1161*d4dbff95SChris Mason mid = slot; 1162*d4dbff95SChris Mason double_split = 1; 1163*d4dbff95SChris Mason } 1164*d4dbff95SChris Mason } else { 1165*d4dbff95SChris Mason if (leaf_space_used(l, 0, mid + 1) + space_needed > 1166*d4dbff95SChris Mason BTRFS_LEAF_DATA_SIZE(root)) { 1167*d4dbff95SChris Mason if (slot == 0) { 1168*d4dbff95SChris Mason btrfs_cpu_key_to_disk(&disk_key, ins_key); 1169*d4dbff95SChris Mason btrfs_set_header_nritems(&right->header, 0); 1170*d4dbff95SChris Mason wret = insert_ptr(trans, root, path, 1171*d4dbff95SChris Mason &disk_key, 1172*d4dbff95SChris Mason right_buffer->b_blocknr, 1173*d4dbff95SChris Mason path->slots[1] - 1, 1); 1174*d4dbff95SChris Mason if (wret) 1175*d4dbff95SChris Mason ret = wret; 1176*d4dbff95SChris Mason btrfs_block_release(root, path->nodes[0]); 1177*d4dbff95SChris Mason path->nodes[0] = right_buffer; 1178*d4dbff95SChris Mason path->slots[0] = 0; 1179*d4dbff95SChris Mason path->slots[1] -= 1; 1180*d4dbff95SChris Mason return ret; 1181*d4dbff95SChris Mason } 1182*d4dbff95SChris Mason mid = slot; 1183*d4dbff95SChris Mason double_split = 1; 1184*d4dbff95SChris Mason } 1185*d4dbff95SChris Mason } 1186*d4dbff95SChris Mason btrfs_set_header_nritems(&right->header, nritems - mid); 1187123abc88SChris Mason data_copy_size = btrfs_item_end(l->items + mid) - 1188123abc88SChris Mason leaf_data_end(root, l); 1189d6025579SChris Mason btrfs_memcpy(root, right, right->items, l->items + mid, 11900783fcfcSChris Mason (nritems - mid) * sizeof(struct btrfs_item)); 1191d6025579SChris Mason btrfs_memcpy(root, right, 1192d6025579SChris Mason btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - 1193123abc88SChris Mason data_copy_size, btrfs_leaf_data(l) + 1194123abc88SChris Mason leaf_data_end(root, l), data_copy_size); 1195123abc88SChris Mason rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - 1196123abc88SChris Mason btrfs_item_end(l->items + mid); 119774123bd7SChris Mason 11980783fcfcSChris Mason for (i = 0; i < btrfs_header_nritems(&right->header); i++) { 1199123abc88SChris Mason u32 ioff = btrfs_item_offset(right->items + i); 12000783fcfcSChris Mason btrfs_set_item_offset(right->items + i, ioff + rt_data_off); 12010783fcfcSChris Mason } 120274123bd7SChris Mason 12037518a238SChris Mason btrfs_set_header_nritems(&l->header, mid); 1204aa5d6bedSChris Mason ret = 0; 1205e089f05cSChris Mason wret = insert_ptr(trans, root, path, &right->items[0].key, 1206e20d96d6SChris Mason right_buffer->b_blocknr, path->slots[1] + 1, 1); 1207aa5d6bedSChris Mason if (wret) 1208aa5d6bedSChris Mason ret = wret; 1209d6025579SChris Mason btrfs_mark_buffer_dirty(right_buffer); 1210d6025579SChris Mason btrfs_mark_buffer_dirty(l_buf); 1211eb60ceacSChris Mason BUG_ON(path->slots[0] != slot); 1212be0e5c09SChris Mason if (mid <= slot) { 1213234b63a0SChris Mason btrfs_block_release(root, path->nodes[0]); 1214eb60ceacSChris Mason path->nodes[0] = right_buffer; 1215be0e5c09SChris Mason path->slots[0] -= mid; 1216be0e5c09SChris Mason path->slots[1] += 1; 1217eb60ceacSChris Mason } else 1218234b63a0SChris Mason btrfs_block_release(root, right_buffer); 1219eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 1220*d4dbff95SChris Mason 1221*d4dbff95SChris Mason if (!double_split) 1222*d4dbff95SChris Mason return ret; 1223*d4dbff95SChris Mason right_buffer = btrfs_alloc_free_block(trans, root); 1224*d4dbff95SChris Mason BUG_ON(!right_buffer); 1225*d4dbff95SChris Mason right = btrfs_buffer_leaf(right_buffer); 1226*d4dbff95SChris Mason memset(&right->header, 0, sizeof(right->header)); 1227*d4dbff95SChris Mason btrfs_set_header_blocknr(&right->header, right_buffer->b_blocknr); 1228*d4dbff95SChris Mason btrfs_set_header_generation(&right->header, trans->transid); 1229*d4dbff95SChris Mason btrfs_set_header_level(&right->header, 0); 1230*d4dbff95SChris Mason btrfs_set_header_parentid(&right->header, 1231*d4dbff95SChris Mason btrfs_header_parentid(btrfs_buffer_header(root->node))); 1232*d4dbff95SChris Mason btrfs_cpu_key_to_disk(&disk_key, ins_key); 1233*d4dbff95SChris Mason btrfs_set_header_nritems(&right->header, 0); 1234*d4dbff95SChris Mason wret = insert_ptr(trans, root, path, 1235*d4dbff95SChris Mason &disk_key, 1236*d4dbff95SChris Mason right_buffer->b_blocknr, 1237*d4dbff95SChris Mason path->slots[1], 1); 1238*d4dbff95SChris Mason if (wret) 1239*d4dbff95SChris Mason ret = wret; 1240*d4dbff95SChris Mason btrfs_block_release(root, path->nodes[0]); 1241*d4dbff95SChris Mason path->nodes[0] = right_buffer; 1242*d4dbff95SChris Mason path->slots[0] = 0; 1243*d4dbff95SChris Mason check_node(root, path, 1); 1244*d4dbff95SChris Mason check_leaf(root, path, 0); 1245be0e5c09SChris Mason return ret; 1246be0e5c09SChris Mason } 1247be0e5c09SChris Mason 124874123bd7SChris Mason /* 124974123bd7SChris Mason * Given a key and some data, insert an item into the tree. 125074123bd7SChris Mason * This does all the path init required, making room in the tree if needed. 125174123bd7SChris Mason */ 1252e089f05cSChris Mason int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root 1253e089f05cSChris Mason *root, struct btrfs_path *path, struct btrfs_key 1254e089f05cSChris Mason *cpu_key, u32 data_size) 1255be0e5c09SChris Mason { 1256aa5d6bedSChris Mason int ret = 0; 1257be0e5c09SChris Mason int slot; 1258eb60ceacSChris Mason int slot_orig; 1259234b63a0SChris Mason struct btrfs_leaf *leaf; 1260e20d96d6SChris Mason struct buffer_head *leaf_buf; 12617518a238SChris Mason u32 nritems; 1262be0e5c09SChris Mason unsigned int data_end; 1263e2fa7227SChris Mason struct btrfs_disk_key disk_key; 1264e2fa7227SChris Mason 1265e2fa7227SChris Mason btrfs_cpu_key_to_disk(&disk_key, cpu_key); 1266be0e5c09SChris Mason 126774123bd7SChris Mason /* create a root if there isn't one */ 12685c680ed6SChris Mason if (!root->node) 1269cfaa7295SChris Mason BUG(); 1270e089f05cSChris Mason ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1); 1271eb60ceacSChris Mason if (ret == 0) { 1272f0930a37SChris Mason return -EEXIST; 1273aa5d6bedSChris Mason } 1274ed2ff2cbSChris Mason if (ret < 0) 1275ed2ff2cbSChris Mason goto out; 1276be0e5c09SChris Mason 127762e2749eSChris Mason slot_orig = path->slots[0]; 127862e2749eSChris Mason leaf_buf = path->nodes[0]; 1279e20d96d6SChris Mason leaf = btrfs_buffer_leaf(leaf_buf); 128074123bd7SChris Mason 12817518a238SChris Mason nritems = btrfs_header_nritems(&leaf->header); 1282123abc88SChris Mason data_end = leaf_data_end(root, leaf); 1283eb60ceacSChris Mason 1284123abc88SChris Mason if (btrfs_leaf_free_space(root, leaf) < 1285*d4dbff95SChris Mason sizeof(struct btrfs_item) + data_size) { 1286be0e5c09SChris Mason BUG(); 1287*d4dbff95SChris Mason } 128862e2749eSChris Mason slot = path->slots[0]; 1289eb60ceacSChris Mason BUG_ON(slot < 0); 1290be0e5c09SChris Mason if (slot != nritems) { 1291be0e5c09SChris Mason int i; 12920783fcfcSChris Mason unsigned int old_data = btrfs_item_end(leaf->items + slot); 1293be0e5c09SChris Mason 1294be0e5c09SChris Mason /* 1295be0e5c09SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 1296be0e5c09SChris Mason */ 1297be0e5c09SChris Mason /* first correct the data pointers */ 12980783fcfcSChris Mason for (i = slot; i < nritems; i++) { 1299123abc88SChris Mason u32 ioff = btrfs_item_offset(leaf->items + i); 13000783fcfcSChris Mason btrfs_set_item_offset(leaf->items + i, 13010783fcfcSChris Mason ioff - data_size); 13020783fcfcSChris Mason } 1303be0e5c09SChris Mason 1304be0e5c09SChris Mason /* shift the items */ 1305d6025579SChris Mason btrfs_memmove(root, leaf, leaf->items + slot + 1, 1306d6025579SChris Mason leaf->items + slot, 13070783fcfcSChris Mason (nritems - slot) * sizeof(struct btrfs_item)); 1308be0e5c09SChris Mason 1309be0e5c09SChris Mason /* shift the data */ 1310d6025579SChris Mason btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + 1311d6025579SChris Mason data_end - data_size, btrfs_leaf_data(leaf) + 1312be0e5c09SChris Mason data_end, old_data - data_end); 1313be0e5c09SChris Mason data_end = old_data; 1314be0e5c09SChris Mason } 131562e2749eSChris Mason /* setup the item for the new data */ 1316d6025579SChris Mason btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key, 1317e2fa7227SChris Mason sizeof(struct btrfs_disk_key)); 13180783fcfcSChris Mason btrfs_set_item_offset(leaf->items + slot, data_end - data_size); 13190783fcfcSChris Mason btrfs_set_item_size(leaf->items + slot, data_size); 13207518a238SChris Mason btrfs_set_header_nritems(&leaf->header, nritems + 1); 1321d6025579SChris Mason btrfs_mark_buffer_dirty(leaf_buf); 1322aa5d6bedSChris Mason 1323aa5d6bedSChris Mason ret = 0; 13248e19f2cdSChris Mason if (slot == 0) 1325e089f05cSChris Mason ret = fixup_low_keys(trans, root, path, &disk_key, 1); 1326aa5d6bedSChris Mason 1327123abc88SChris Mason if (btrfs_leaf_free_space(root, leaf) < 0) 1328be0e5c09SChris Mason BUG(); 132962e2749eSChris Mason check_leaf(root, path, 0); 1330ed2ff2cbSChris Mason out: 133162e2749eSChris Mason return ret; 133262e2749eSChris Mason } 133362e2749eSChris Mason 133462e2749eSChris Mason /* 133562e2749eSChris Mason * Given a key and some data, insert an item into the tree. 133662e2749eSChris Mason * This does all the path init required, making room in the tree if needed. 133762e2749eSChris Mason */ 1338e089f05cSChris Mason int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root 1339e089f05cSChris Mason *root, struct btrfs_key *cpu_key, void *data, u32 1340e089f05cSChris Mason data_size) 134162e2749eSChris Mason { 134262e2749eSChris Mason int ret = 0; 13432c90e5d6SChris Mason struct btrfs_path *path; 134462e2749eSChris Mason u8 *ptr; 134562e2749eSChris Mason 13462c90e5d6SChris Mason path = btrfs_alloc_path(); 13472c90e5d6SChris Mason BUG_ON(!path); 13482c90e5d6SChris Mason btrfs_init_path(path); 13492c90e5d6SChris Mason ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 135062e2749eSChris Mason if (!ret) { 13512c90e5d6SChris Mason ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), 13522c90e5d6SChris Mason path->slots[0], u8); 13532c90e5d6SChris Mason btrfs_memcpy(root, path->nodes[0]->b_data, 1354d6025579SChris Mason ptr, data, data_size); 13552c90e5d6SChris Mason btrfs_mark_buffer_dirty(path->nodes[0]); 135662e2749eSChris Mason } 13572c90e5d6SChris Mason btrfs_release_path(root, path); 13582c90e5d6SChris Mason btrfs_free_path(path); 1359aa5d6bedSChris Mason return ret; 1360be0e5c09SChris Mason } 1361be0e5c09SChris Mason 136274123bd7SChris Mason /* 13635de08d7dSChris Mason * delete the pointer from a given node. 136474123bd7SChris Mason * 136574123bd7SChris Mason * If the delete empties a node, the node is removed from the tree, 136674123bd7SChris Mason * continuing all the way the root if required. The root is converted into 136774123bd7SChris Mason * a leaf if all the nodes are emptied. 136874123bd7SChris Mason */ 1369e089f05cSChris Mason static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1370e089f05cSChris Mason struct btrfs_path *path, int level, int slot) 1371be0e5c09SChris Mason { 1372234b63a0SChris Mason struct btrfs_node *node; 1373e20d96d6SChris Mason struct buffer_head *parent = path->nodes[level]; 13747518a238SChris Mason u32 nritems; 1375aa5d6bedSChris Mason int ret = 0; 1376bb803951SChris Mason int wret; 1377be0e5c09SChris Mason 1378e20d96d6SChris Mason node = btrfs_buffer_node(parent); 13797518a238SChris Mason nritems = btrfs_header_nritems(&node->header); 1380be0e5c09SChris Mason if (slot != nritems -1) { 1381d6025579SChris Mason btrfs_memmove(root, node, node->ptrs + slot, 1382d6025579SChris Mason node->ptrs + slot + 1, 1383d6025579SChris Mason sizeof(struct btrfs_key_ptr) * 1384d6025579SChris Mason (nritems - slot - 1)); 1385be0e5c09SChris Mason } 13867518a238SChris Mason nritems--; 13877518a238SChris Mason btrfs_set_header_nritems(&node->header, nritems); 13887518a238SChris Mason if (nritems == 0 && parent == root->node) { 1389e20d96d6SChris Mason struct btrfs_header *header = btrfs_buffer_header(root->node); 1390e20d96d6SChris Mason BUG_ON(btrfs_header_level(header) != 1); 1391eb60ceacSChris Mason /* just turn the root into a leaf and break */ 1392e20d96d6SChris Mason btrfs_set_header_level(header, 0); 1393bb803951SChris Mason } else if (slot == 0) { 1394e089f05cSChris Mason wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key, 1395123abc88SChris Mason level + 1); 13960f70abe2SChris Mason if (wret) 13970f70abe2SChris Mason ret = wret; 1398be0e5c09SChris Mason } 1399d6025579SChris Mason btrfs_mark_buffer_dirty(parent); 1400aa5d6bedSChris Mason return ret; 1401be0e5c09SChris Mason } 1402be0e5c09SChris Mason 140374123bd7SChris Mason /* 140474123bd7SChris Mason * delete the item at the leaf level in path. If that empties 140574123bd7SChris Mason * the leaf, remove it from the tree 140674123bd7SChris Mason */ 1407e089f05cSChris Mason int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1408e089f05cSChris Mason struct btrfs_path *path) 1409be0e5c09SChris Mason { 1410be0e5c09SChris Mason int slot; 1411234b63a0SChris Mason struct btrfs_leaf *leaf; 1412e20d96d6SChris Mason struct buffer_head *leaf_buf; 1413be0e5c09SChris Mason int doff; 1414be0e5c09SChris Mason int dsize; 1415aa5d6bedSChris Mason int ret = 0; 1416aa5d6bedSChris Mason int wret; 14177518a238SChris Mason u32 nritems; 1418be0e5c09SChris Mason 1419eb60ceacSChris Mason leaf_buf = path->nodes[0]; 1420e20d96d6SChris Mason leaf = btrfs_buffer_leaf(leaf_buf); 14214920c9acSChris Mason slot = path->slots[0]; 14220783fcfcSChris Mason doff = btrfs_item_offset(leaf->items + slot); 14230783fcfcSChris Mason dsize = btrfs_item_size(leaf->items + slot); 14247518a238SChris Mason nritems = btrfs_header_nritems(&leaf->header); 1425be0e5c09SChris Mason 14267518a238SChris Mason if (slot != nritems - 1) { 1427be0e5c09SChris Mason int i; 1428123abc88SChris Mason int data_end = leaf_data_end(root, leaf); 1429d6025579SChris Mason btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) + 1430d6025579SChris Mason data_end + dsize, 1431123abc88SChris Mason btrfs_leaf_data(leaf) + data_end, 1432be0e5c09SChris Mason doff - data_end); 14330783fcfcSChris Mason for (i = slot + 1; i < nritems; i++) { 1434123abc88SChris Mason u32 ioff = btrfs_item_offset(leaf->items + i); 14350783fcfcSChris Mason btrfs_set_item_offset(leaf->items + i, ioff + dsize); 14360783fcfcSChris Mason } 1437d6025579SChris Mason btrfs_memmove(root, leaf, leaf->items + slot, 1438d6025579SChris Mason leaf->items + slot + 1, 14390783fcfcSChris Mason sizeof(struct btrfs_item) * 14407518a238SChris Mason (nritems - slot - 1)); 1441be0e5c09SChris Mason } 14427518a238SChris Mason btrfs_set_header_nritems(&leaf->header, nritems - 1); 14437518a238SChris Mason nritems--; 144474123bd7SChris Mason /* delete the leaf if we've emptied it */ 14457518a238SChris Mason if (nritems == 0) { 1446eb60ceacSChris Mason if (leaf_buf == root->node) { 14477518a238SChris Mason btrfs_set_header_level(&leaf->header, 0); 14489a8dd150SChris Mason } else { 1449e089f05cSChris Mason clean_tree_block(trans, root, leaf_buf); 1450d6025579SChris Mason wait_on_buffer(leaf_buf); 1451e089f05cSChris Mason wret = del_ptr(trans, root, path, 1, path->slots[1]); 1452aa5d6bedSChris Mason if (wret) 1453aa5d6bedSChris Mason ret = wret; 1454e089f05cSChris Mason wret = btrfs_free_extent(trans, root, 1455e20d96d6SChris Mason leaf_buf->b_blocknr, 1, 1); 14560f70abe2SChris Mason if (wret) 14570f70abe2SChris Mason ret = wret; 14589a8dd150SChris Mason } 1459be0e5c09SChris Mason } else { 14607518a238SChris Mason int used = leaf_space_used(leaf, 0, nritems); 1461aa5d6bedSChris Mason if (slot == 0) { 1462e089f05cSChris Mason wret = fixup_low_keys(trans, root, path, 1463aa5d6bedSChris Mason &leaf->items[0].key, 1); 1464aa5d6bedSChris Mason if (wret) 1465aa5d6bedSChris Mason ret = wret; 1466aa5d6bedSChris Mason } 1467aa5d6bedSChris Mason 146874123bd7SChris Mason /* delete the leaf if it is mostly empty */ 1469123abc88SChris Mason if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) { 1470be0e5c09SChris Mason /* push_leaf_left fixes the path. 1471be0e5c09SChris Mason * make sure the path still points to our leaf 1472be0e5c09SChris Mason * for possible call to del_ptr below 1473be0e5c09SChris Mason */ 14744920c9acSChris Mason slot = path->slots[1]; 1475e20d96d6SChris Mason get_bh(leaf_buf); 1476e089f05cSChris Mason wret = push_leaf_left(trans, root, path, 1); 1477aa5d6bedSChris Mason if (wret < 0) 1478aa5d6bedSChris Mason ret = wret; 1479f0930a37SChris Mason if (path->nodes[0] == leaf_buf && 14807518a238SChris Mason btrfs_header_nritems(&leaf->header)) { 1481e089f05cSChris Mason wret = push_leaf_right(trans, root, path, 1); 1482aa5d6bedSChris Mason if (wret < 0) 1483aa5d6bedSChris Mason ret = wret; 1484aa5d6bedSChris Mason } 14857518a238SChris Mason if (btrfs_header_nritems(&leaf->header) == 0) { 1486e20d96d6SChris Mason u64 blocknr = leaf_buf->b_blocknr; 1487e089f05cSChris Mason clean_tree_block(trans, root, leaf_buf); 1488d6025579SChris Mason wait_on_buffer(leaf_buf); 1489e089f05cSChris Mason wret = del_ptr(trans, root, path, 1, slot); 1490aa5d6bedSChris Mason if (wret) 1491aa5d6bedSChris Mason ret = wret; 1492234b63a0SChris Mason btrfs_block_release(root, leaf_buf); 1493e089f05cSChris Mason wret = btrfs_free_extent(trans, root, blocknr, 1494e089f05cSChris Mason 1, 1); 14950f70abe2SChris Mason if (wret) 14960f70abe2SChris Mason ret = wret; 14975de08d7dSChris Mason } else { 1498d6025579SChris Mason btrfs_mark_buffer_dirty(leaf_buf); 1499234b63a0SChris Mason btrfs_block_release(root, leaf_buf); 1500be0e5c09SChris Mason } 1501d5719762SChris Mason } else { 1502d6025579SChris Mason btrfs_mark_buffer_dirty(leaf_buf); 1503be0e5c09SChris Mason } 15049a8dd150SChris Mason } 1505aa5d6bedSChris Mason return ret; 15069a8dd150SChris Mason } 15079a8dd150SChris Mason 150897571fd0SChris Mason /* 150997571fd0SChris Mason * walk up the tree as far as required to find the next leaf. 15100f70abe2SChris Mason * returns 0 if it found something or 1 if there are no greater leaves. 15110f70abe2SChris Mason * returns < 0 on io errors. 151297571fd0SChris Mason */ 1513234b63a0SChris Mason int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) 1514d97e63b6SChris Mason { 1515d97e63b6SChris Mason int slot; 1516d97e63b6SChris Mason int level = 1; 1517d97e63b6SChris Mason u64 blocknr; 1518e20d96d6SChris Mason struct buffer_head *c; 1519e20d96d6SChris Mason struct btrfs_node *c_node; 1520e20d96d6SChris Mason struct buffer_head *next = NULL; 1521d97e63b6SChris Mason 1522234b63a0SChris Mason while(level < BTRFS_MAX_LEVEL) { 1523d97e63b6SChris Mason if (!path->nodes[level]) 15240f70abe2SChris Mason return 1; 1525d97e63b6SChris Mason slot = path->slots[level] + 1; 1526d97e63b6SChris Mason c = path->nodes[level]; 1527e20d96d6SChris Mason c_node = btrfs_buffer_node(c); 1528e20d96d6SChris Mason if (slot >= btrfs_header_nritems(&c_node->header)) { 1529d97e63b6SChris Mason level++; 1530d97e63b6SChris Mason continue; 1531d97e63b6SChris Mason } 1532e20d96d6SChris Mason blocknr = btrfs_node_blockptr(c_node, slot); 1533cfaa7295SChris Mason if (next) 1534234b63a0SChris Mason btrfs_block_release(root, next); 1535d97e63b6SChris Mason next = read_tree_block(root, blocknr); 1536d97e63b6SChris Mason break; 1537d97e63b6SChris Mason } 1538d97e63b6SChris Mason path->slots[level] = slot; 1539d97e63b6SChris Mason while(1) { 1540d97e63b6SChris Mason level--; 1541d97e63b6SChris Mason c = path->nodes[level]; 1542234b63a0SChris Mason btrfs_block_release(root, c); 1543d97e63b6SChris Mason path->nodes[level] = next; 1544d97e63b6SChris Mason path->slots[level] = 0; 1545d97e63b6SChris Mason if (!level) 1546d97e63b6SChris Mason break; 15471d4f8a0cSChris Mason next = read_tree_block(root, 1548e20d96d6SChris Mason btrfs_node_blockptr(btrfs_buffer_node(next), 0)); 1549d97e63b6SChris Mason } 1550d97e63b6SChris Mason return 0; 1551d97e63b6SChris Mason } 1552