1be0e5c09SChris Mason #include <stdio.h> 2be0e5c09SChris Mason #include <stdlib.h> 3be0e5c09SChris Mason #include "kerncompat.h" 4eb60ceacSChris Mason #include "radix-tree.h" 5eb60ceacSChris Mason #include "ctree.h" 6eb60ceacSChris Mason #include "disk-io.h" 7be0e5c09SChris Mason 85c680ed6SChris Mason #define SEARCH_READ 0 95c680ed6SChris Mason #define SEARCH_WRITE 1 105c680ed6SChris Mason 11d97e63b6SChris Mason static int refill_alloc_extent(struct ctree_root *root); 125c680ed6SChris Mason int split_node(struct ctree_root *root, struct ctree_path *path, int level); 135c680ed6SChris Mason int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size); 14d97e63b6SChris Mason 15be0e5c09SChris Mason static inline void init_path(struct ctree_path *p) 16be0e5c09SChris Mason { 17be0e5c09SChris Mason memset(p, 0, sizeof(*p)); 18be0e5c09SChris Mason } 19be0e5c09SChris Mason 20eb60ceacSChris Mason static void release_path(struct ctree_root *root, struct ctree_path *p) 21eb60ceacSChris Mason { 22eb60ceacSChris Mason int i; 23eb60ceacSChris Mason for (i = 0; i < MAX_LEVEL; i++) { 24eb60ceacSChris Mason if (!p->nodes[i]) 25eb60ceacSChris Mason break; 26eb60ceacSChris Mason tree_block_release(root, p->nodes[i]); 27eb60ceacSChris Mason } 28eb60ceacSChris Mason } 29eb60ceacSChris Mason 3074123bd7SChris Mason /* 3174123bd7SChris Mason * The leaf data grows from end-to-front in the node. 3274123bd7SChris Mason * this returns the address of the start of the last item, 3374123bd7SChris Mason * which is the stop of the leaf data stack 3474123bd7SChris Mason */ 35be0e5c09SChris Mason static inline unsigned int leaf_data_end(struct leaf *leaf) 36be0e5c09SChris Mason { 37be0e5c09SChris Mason unsigned int nr = leaf->header.nritems; 38be0e5c09SChris Mason if (nr == 0) 39d97e63b6SChris Mason return sizeof(leaf->data); 40be0e5c09SChris Mason return leaf->items[nr-1].offset; 41be0e5c09SChris Mason } 42be0e5c09SChris Mason 4374123bd7SChris Mason /* 4474123bd7SChris Mason * The space between the end of the leaf items and 4574123bd7SChris Mason * the start of the leaf data. IOW, how much room 4674123bd7SChris Mason * the leaf has left for both items and data 4774123bd7SChris Mason */ 48be0e5c09SChris Mason static inline int leaf_free_space(struct leaf *leaf) 49be0e5c09SChris Mason { 50be0e5c09SChris Mason int data_end = leaf_data_end(leaf); 51be0e5c09SChris Mason int nritems = leaf->header.nritems; 52be0e5c09SChris Mason char *items_end = (char *)(leaf->items + nritems + 1); 53be0e5c09SChris Mason return (char *)(leaf->data + data_end) - (char *)items_end; 54be0e5c09SChris Mason } 55be0e5c09SChris Mason 5674123bd7SChris Mason /* 5774123bd7SChris Mason * compare two keys in a memcmp fashion 5874123bd7SChris Mason */ 59be0e5c09SChris Mason int comp_keys(struct key *k1, struct key *k2) 60be0e5c09SChris Mason { 61be0e5c09SChris Mason if (k1->objectid > k2->objectid) 62be0e5c09SChris Mason return 1; 63be0e5c09SChris Mason if (k1->objectid < k2->objectid) 64be0e5c09SChris Mason return -1; 65be0e5c09SChris Mason if (k1->flags > k2->flags) 66be0e5c09SChris Mason return 1; 67be0e5c09SChris Mason if (k1->flags < k2->flags) 68be0e5c09SChris Mason return -1; 69be0e5c09SChris Mason if (k1->offset > k2->offset) 70be0e5c09SChris Mason return 1; 71be0e5c09SChris Mason if (k1->offset < k2->offset) 72be0e5c09SChris Mason return -1; 73be0e5c09SChris Mason return 0; 74be0e5c09SChris Mason } 7574123bd7SChris Mason 7674123bd7SChris Mason /* 7774123bd7SChris Mason * search for key in the array p. items p are item_size apart 7874123bd7SChris Mason * and there are 'max' items in p 7974123bd7SChris Mason * the slot in the array is returned via slot, and it points to 8074123bd7SChris Mason * the place where you would insert key if it is not found in 8174123bd7SChris Mason * the array. 8274123bd7SChris Mason * 8374123bd7SChris Mason * slot may point to max if the key is bigger than all of the keys 8474123bd7SChris Mason */ 85be0e5c09SChris Mason int generic_bin_search(char *p, int item_size, struct key *key, 86be0e5c09SChris Mason int max, int *slot) 87be0e5c09SChris Mason { 88be0e5c09SChris Mason int low = 0; 89be0e5c09SChris Mason int high = max; 90be0e5c09SChris Mason int mid; 91be0e5c09SChris Mason int ret; 92be0e5c09SChris Mason struct key *tmp; 93be0e5c09SChris Mason 94be0e5c09SChris Mason while(low < high) { 95be0e5c09SChris Mason mid = (low + high) / 2; 96be0e5c09SChris Mason tmp = (struct key *)(p + mid * item_size); 97be0e5c09SChris Mason ret = comp_keys(tmp, key); 98be0e5c09SChris Mason 99be0e5c09SChris Mason if (ret < 0) 100be0e5c09SChris Mason low = mid + 1; 101be0e5c09SChris Mason else if (ret > 0) 102be0e5c09SChris Mason high = mid; 103be0e5c09SChris Mason else { 104be0e5c09SChris Mason *slot = mid; 105be0e5c09SChris Mason return 0; 106be0e5c09SChris Mason } 107be0e5c09SChris Mason } 108be0e5c09SChris Mason *slot = low; 109be0e5c09SChris Mason return 1; 110be0e5c09SChris Mason } 111be0e5c09SChris Mason 112be0e5c09SChris Mason int bin_search(struct node *c, struct key *key, int *slot) 113be0e5c09SChris Mason { 114be0e5c09SChris Mason if (is_leaf(c->header.flags)) { 115be0e5c09SChris Mason struct leaf *l = (struct leaf *)c; 116be0e5c09SChris Mason return generic_bin_search((void *)l->items, sizeof(struct item), 117be0e5c09SChris Mason key, c->header.nritems, slot); 118be0e5c09SChris Mason } else { 119be0e5c09SChris Mason return generic_bin_search((void *)c->keys, sizeof(struct key), 120be0e5c09SChris Mason key, c->header.nritems, slot); 121be0e5c09SChris Mason } 122be0e5c09SChris Mason return -1; 123be0e5c09SChris Mason } 124be0e5c09SChris Mason 12574123bd7SChris Mason /* 12674123bd7SChris Mason * look for key in the tree. path is filled in with nodes along the way 12774123bd7SChris Mason * if key is found, we return zero and you can find the item in the leaf 12874123bd7SChris Mason * level of the path (level 0) 12974123bd7SChris Mason * 13074123bd7SChris Mason * If the key isn't found, the path points to the slot where it should 13174123bd7SChris Mason * be inserted. 13274123bd7SChris Mason */ 1335c680ed6SChris Mason int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p, int ins_len) 134be0e5c09SChris Mason { 135eb60ceacSChris Mason struct tree_buffer *b = root->node; 136eb60ceacSChris Mason struct node *c; 137be0e5c09SChris Mason int slot; 138be0e5c09SChris Mason int ret; 139be0e5c09SChris Mason int level; 1405c680ed6SChris Mason 141eb60ceacSChris Mason b->count++; 142eb60ceacSChris Mason while (b) { 143eb60ceacSChris Mason c = &b->node; 144be0e5c09SChris Mason level = node_level(c->header.flags); 145eb60ceacSChris Mason p->nodes[level] = b; 146be0e5c09SChris Mason ret = bin_search(c, key, &slot); 147be0e5c09SChris Mason if (!is_leaf(c->header.flags)) { 148be0e5c09SChris Mason if (ret && slot > 0) 149be0e5c09SChris Mason slot -= 1; 150be0e5c09SChris Mason p->slots[level] = slot; 1515c680ed6SChris Mason if (ins_len && c->header.nritems == NODEPTRS_PER_BLOCK) { 1525c680ed6SChris Mason int sret = split_node(root, p, level); 1535c680ed6SChris Mason BUG_ON(sret > 0); 1545c680ed6SChris Mason if (sret) 1555c680ed6SChris Mason return sret; 1565c680ed6SChris Mason b = p->nodes[level]; 1575c680ed6SChris Mason c = &b->node; 1585c680ed6SChris Mason slot = p->slots[level]; 1595c680ed6SChris Mason } 160eb60ceacSChris Mason b = read_tree_block(root, c->blockptrs[slot]); 161be0e5c09SChris Mason continue; 162be0e5c09SChris Mason } else { 1635c680ed6SChris Mason struct leaf *l = (struct leaf *)c; 164be0e5c09SChris Mason p->slots[level] = slot; 1655c680ed6SChris Mason if (ins_len && leaf_free_space(l) < sizeof(struct item) + ins_len) { 1665c680ed6SChris Mason int sret = split_leaf(root, p, ins_len); 1675c680ed6SChris Mason BUG_ON(sret > 0); 1685c680ed6SChris Mason if (sret) 1695c680ed6SChris Mason return sret; 1705c680ed6SChris Mason } 171be0e5c09SChris Mason return ret; 172be0e5c09SChris Mason } 173be0e5c09SChris Mason } 174be0e5c09SChris Mason return -1; 175be0e5c09SChris Mason } 176be0e5c09SChris Mason 17774123bd7SChris Mason /* 17874123bd7SChris Mason * adjust the pointers going up the tree, starting at level 17974123bd7SChris Mason * making sure the right key of each node is points to 'key'. 18074123bd7SChris Mason * This is used after shifting pointers to the left, so it stops 18174123bd7SChris Mason * fixing up pointers when a given leaf/node is not in slot 0 of the 18274123bd7SChris Mason * higher levels 18374123bd7SChris Mason */ 184eb60ceacSChris Mason static void fixup_low_keys(struct ctree_root *root, 185eb60ceacSChris Mason struct ctree_path *path, struct key *key, 186be0e5c09SChris Mason int level) 187be0e5c09SChris Mason { 188be0e5c09SChris Mason int i; 189be0e5c09SChris Mason for (i = level; i < MAX_LEVEL; i++) { 190eb60ceacSChris Mason struct node *t; 191be0e5c09SChris Mason int tslot = path->slots[i]; 192eb60ceacSChris Mason if (!path->nodes[i]) 193be0e5c09SChris Mason break; 194eb60ceacSChris Mason t = &path->nodes[i]->node; 195be0e5c09SChris Mason memcpy(t->keys + tslot, key, sizeof(*key)); 196eb60ceacSChris Mason write_tree_block(root, path->nodes[i]); 197be0e5c09SChris Mason if (tslot != 0) 198be0e5c09SChris Mason break; 199be0e5c09SChris Mason } 200be0e5c09SChris Mason } 201be0e5c09SChris Mason 20274123bd7SChris Mason /* 20374123bd7SChris Mason * try to push data from one node into the next node left in the 20474123bd7SChris Mason * tree. The src node is found at specified level in the path. 20574123bd7SChris Mason * If some bytes were pushed, return 0, otherwise return 1. 20674123bd7SChris Mason * 20774123bd7SChris Mason * Lower nodes/leaves in the path are not touched, higher nodes may 20874123bd7SChris Mason * be modified to reflect the push. 20974123bd7SChris Mason * 21074123bd7SChris Mason * The path is altered to reflect the push. 21174123bd7SChris Mason */ 212be0e5c09SChris Mason int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) 213be0e5c09SChris Mason { 214be0e5c09SChris Mason int slot; 215be0e5c09SChris Mason struct node *left; 216be0e5c09SChris Mason struct node *right; 217be0e5c09SChris Mason int push_items = 0; 218be0e5c09SChris Mason int left_nritems; 219be0e5c09SChris Mason int right_nritems; 220eb60ceacSChris Mason struct tree_buffer *t; 221eb60ceacSChris Mason struct tree_buffer *right_buf; 222be0e5c09SChris Mason 223be0e5c09SChris Mason if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) 224be0e5c09SChris Mason return 1; 225be0e5c09SChris Mason slot = path->slots[level + 1]; 226be0e5c09SChris Mason if (slot == 0) 227be0e5c09SChris Mason return 1; 228be0e5c09SChris Mason 229eb60ceacSChris Mason t = read_tree_block(root, 230eb60ceacSChris Mason path->nodes[level + 1]->node.blockptrs[slot - 1]); 231eb60ceacSChris Mason left = &t->node; 232eb60ceacSChris Mason right_buf = path->nodes[level]; 233eb60ceacSChris Mason right = &right_buf->node; 234be0e5c09SChris Mason left_nritems = left->header.nritems; 235be0e5c09SChris Mason right_nritems = right->header.nritems; 236be0e5c09SChris Mason push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); 237eb60ceacSChris Mason if (push_items <= 0) { 238eb60ceacSChris Mason tree_block_release(root, t); 239be0e5c09SChris Mason return 1; 240eb60ceacSChris Mason } 241be0e5c09SChris Mason 242be0e5c09SChris Mason if (right_nritems < push_items) 243be0e5c09SChris Mason push_items = right_nritems; 244be0e5c09SChris Mason memcpy(left->keys + left_nritems, right->keys, 245be0e5c09SChris Mason push_items * sizeof(struct key)); 246be0e5c09SChris Mason memcpy(left->blockptrs + left_nritems, right->blockptrs, 247be0e5c09SChris Mason push_items * sizeof(u64)); 248be0e5c09SChris Mason memmove(right->keys, right->keys + push_items, 249be0e5c09SChris Mason (right_nritems - push_items) * sizeof(struct key)); 250be0e5c09SChris Mason memmove(right->blockptrs, right->blockptrs + push_items, 251be0e5c09SChris Mason (right_nritems - push_items) * sizeof(u64)); 252be0e5c09SChris Mason right->header.nritems -= push_items; 253be0e5c09SChris Mason left->header.nritems += push_items; 254be0e5c09SChris Mason 255be0e5c09SChris Mason /* adjust the pointers going up the tree */ 256eb60ceacSChris Mason fixup_low_keys(root, path, right->keys, level + 1); 257eb60ceacSChris Mason 258eb60ceacSChris Mason write_tree_block(root, t); 259eb60ceacSChris Mason write_tree_block(root, right_buf); 260be0e5c09SChris Mason 261be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 262be0e5c09SChris Mason if (path->slots[level] < push_items) { 263be0e5c09SChris Mason path->slots[level] += left_nritems; 264eb60ceacSChris Mason tree_block_release(root, path->nodes[level]); 265eb60ceacSChris Mason path->nodes[level] = t; 266be0e5c09SChris Mason path->slots[level + 1] -= 1; 267be0e5c09SChris Mason } else { 268be0e5c09SChris Mason path->slots[level] -= push_items; 269eb60ceacSChris Mason tree_block_release(root, t); 270be0e5c09SChris Mason } 271be0e5c09SChris Mason return 0; 272be0e5c09SChris Mason } 273be0e5c09SChris Mason 27474123bd7SChris Mason /* 27574123bd7SChris Mason * try to push data from one node into the next node right in the 27674123bd7SChris Mason * tree. The src node is found at specified level in the path. 27774123bd7SChris Mason * If some bytes were pushed, return 0, otherwise return 1. 27874123bd7SChris Mason * 27974123bd7SChris Mason * Lower nodes/leaves in the path are not touched, higher nodes may 28074123bd7SChris Mason * be modified to reflect the push. 28174123bd7SChris Mason * 28274123bd7SChris Mason * The path is altered to reflect the push. 28374123bd7SChris Mason */ 284be0e5c09SChris Mason int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) 285be0e5c09SChris Mason { 286be0e5c09SChris Mason int slot; 287eb60ceacSChris Mason struct tree_buffer *t; 288eb60ceacSChris Mason struct tree_buffer *src_buffer; 289be0e5c09SChris Mason struct node *dst; 290be0e5c09SChris Mason struct node *src; 291be0e5c09SChris Mason int push_items = 0; 292be0e5c09SChris Mason int dst_nritems; 293be0e5c09SChris Mason int src_nritems; 294be0e5c09SChris Mason 29574123bd7SChris Mason /* can't push from the root */ 296be0e5c09SChris Mason if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) 297be0e5c09SChris Mason return 1; 29874123bd7SChris Mason 29974123bd7SChris Mason /* only try to push inside the node higher up */ 300be0e5c09SChris Mason slot = path->slots[level + 1]; 301be0e5c09SChris Mason if (slot == NODEPTRS_PER_BLOCK - 1) 302be0e5c09SChris Mason return 1; 303be0e5c09SChris Mason 304eb60ceacSChris Mason if (slot >= path->nodes[level + 1]->node.header.nritems -1) 305be0e5c09SChris Mason return 1; 306be0e5c09SChris Mason 307eb60ceacSChris Mason t = read_tree_block(root, 308eb60ceacSChris Mason path->nodes[level + 1]->node.blockptrs[slot + 1]); 309eb60ceacSChris Mason dst = &t->node; 310eb60ceacSChris Mason src_buffer = path->nodes[level]; 311eb60ceacSChris Mason src = &src_buffer->node; 312be0e5c09SChris Mason dst_nritems = dst->header.nritems; 313be0e5c09SChris Mason src_nritems = src->header.nritems; 314be0e5c09SChris Mason push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); 315eb60ceacSChris Mason if (push_items <= 0) { 316eb60ceacSChris Mason tree_block_release(root, t); 317be0e5c09SChris Mason return 1; 318eb60ceacSChris Mason } 319be0e5c09SChris Mason 320be0e5c09SChris Mason if (src_nritems < push_items) 321be0e5c09SChris Mason push_items = src_nritems; 322be0e5c09SChris Mason memmove(dst->keys + push_items, dst->keys, 323be0e5c09SChris Mason dst_nritems * sizeof(struct key)); 324be0e5c09SChris Mason memcpy(dst->keys, src->keys + src_nritems - push_items, 325be0e5c09SChris Mason push_items * sizeof(struct key)); 326be0e5c09SChris Mason 327be0e5c09SChris Mason memmove(dst->blockptrs + push_items, dst->blockptrs, 328be0e5c09SChris Mason dst_nritems * sizeof(u64)); 329be0e5c09SChris Mason memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, 330be0e5c09SChris Mason push_items * sizeof(u64)); 331be0e5c09SChris Mason 332be0e5c09SChris Mason src->header.nritems -= push_items; 333be0e5c09SChris Mason dst->header.nritems += push_items; 334be0e5c09SChris Mason 335be0e5c09SChris Mason /* adjust the pointers going up the tree */ 336eb60ceacSChris Mason memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1, 337be0e5c09SChris Mason dst->keys, sizeof(struct key)); 338eb60ceacSChris Mason 339eb60ceacSChris Mason write_tree_block(root, path->nodes[level + 1]); 340eb60ceacSChris Mason write_tree_block(root, t); 341eb60ceacSChris Mason write_tree_block(root, src_buffer); 342eb60ceacSChris Mason 34374123bd7SChris Mason /* then fixup the pointers in the path */ 344be0e5c09SChris Mason if (path->slots[level] >= src->header.nritems) { 345be0e5c09SChris Mason path->slots[level] -= src->header.nritems; 346eb60ceacSChris Mason tree_block_release(root, path->nodes[level]); 347eb60ceacSChris Mason path->nodes[level] = t; 348be0e5c09SChris Mason path->slots[level + 1] += 1; 349eb60ceacSChris Mason } else { 350eb60ceacSChris Mason tree_block_release(root, t); 351be0e5c09SChris Mason } 352be0e5c09SChris Mason return 0; 353be0e5c09SChris Mason } 354be0e5c09SChris Mason 3555c680ed6SChris Mason static int insert_new_root(struct ctree_root *root, struct ctree_path *path, int level) 35674123bd7SChris Mason { 35774123bd7SChris Mason struct tree_buffer *t; 3585c680ed6SChris Mason struct node *lower; 3595c680ed6SChris Mason struct node *c; 3605c680ed6SChris Mason struct key *lower_key; 3615c680ed6SChris Mason 3625c680ed6SChris Mason BUG_ON(path->nodes[level]); 3635c680ed6SChris Mason BUG_ON(path->nodes[level-1] != root->node); 3645c680ed6SChris Mason 36574123bd7SChris Mason t = alloc_free_block(root); 36674123bd7SChris Mason c = &t->node; 36774123bd7SChris Mason memset(c, 0, sizeof(c)); 3685c680ed6SChris Mason c->header.nritems = 1; 36974123bd7SChris Mason c->header.flags = node_level(level); 37074123bd7SChris Mason c->header.blocknr = t->blocknr; 371cfaa7295SChris Mason c->header.parentid = root->node->node.header.parentid; 37274123bd7SChris Mason lower = &path->nodes[level-1]->node; 37374123bd7SChris Mason if (is_leaf(lower->header.flags)) 37474123bd7SChris Mason lower_key = &((struct leaf *)lower)->items[0].key; 37574123bd7SChris Mason else 37674123bd7SChris Mason lower_key = lower->keys; 37774123bd7SChris Mason memcpy(c->keys, lower_key, sizeof(struct key)); 37874123bd7SChris Mason c->blockptrs[0] = path->nodes[level-1]->blocknr; 379cfaa7295SChris Mason /* the super has an extra ref to root->node */ 38074123bd7SChris Mason tree_block_release(root, root->node); 38174123bd7SChris Mason root->node = t; 38274123bd7SChris Mason t->count++; 38374123bd7SChris Mason write_tree_block(root, t); 38474123bd7SChris Mason path->nodes[level] = t; 38574123bd7SChris Mason path->slots[level] = 0; 38674123bd7SChris Mason return 0; 38774123bd7SChris Mason } 3885c680ed6SChris Mason 3895c680ed6SChris Mason /* 3905c680ed6SChris Mason * worker function to insert a single pointer in a node. 3915c680ed6SChris Mason * the node should have enough room for the pointer already 3925c680ed6SChris Mason * slot and level indicate where you want the key to go, and 3935c680ed6SChris Mason * blocknr is the block the key points to. 3945c680ed6SChris Mason */ 3955c680ed6SChris Mason int insert_ptr(struct ctree_root *root, 3965c680ed6SChris Mason struct ctree_path *path, struct key *key, 3975c680ed6SChris Mason u64 blocknr, int slot, int level) 3985c680ed6SChris Mason { 3995c680ed6SChris Mason struct node *lower; 4005c680ed6SChris Mason int nritems; 4015c680ed6SChris Mason 4025c680ed6SChris Mason BUG_ON(!path->nodes[level]); 40374123bd7SChris Mason lower = &path->nodes[level]->node; 40474123bd7SChris Mason nritems = lower->header.nritems; 40574123bd7SChris Mason if (slot > nritems) 40674123bd7SChris Mason BUG(); 40774123bd7SChris Mason if (nritems == NODEPTRS_PER_BLOCK) 40874123bd7SChris Mason BUG(); 40974123bd7SChris Mason if (slot != nritems) { 41074123bd7SChris Mason memmove(lower->keys + slot + 1, lower->keys + slot, 41174123bd7SChris Mason (nritems - slot) * sizeof(struct key)); 41274123bd7SChris Mason memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, 41374123bd7SChris Mason (nritems - slot) * sizeof(u64)); 41474123bd7SChris Mason } 41574123bd7SChris Mason memcpy(lower->keys + slot, key, sizeof(struct key)); 41674123bd7SChris Mason lower->blockptrs[slot] = blocknr; 41774123bd7SChris Mason lower->header.nritems++; 41874123bd7SChris Mason if (lower->keys[1].objectid == 0) 41974123bd7SChris Mason BUG(); 42074123bd7SChris Mason write_tree_block(root, path->nodes[level]); 42174123bd7SChris Mason return 0; 42274123bd7SChris Mason } 42374123bd7SChris Mason 4245c680ed6SChris Mason int split_node(struct ctree_root *root, struct ctree_path *path, int level) 425be0e5c09SChris Mason { 4265c680ed6SChris Mason struct tree_buffer *t; 4275c680ed6SChris Mason struct node *c; 4285c680ed6SChris Mason struct tree_buffer *split_buffer; 4295c680ed6SChris Mason struct node *split; 430be0e5c09SChris Mason int mid; 4315c680ed6SChris Mason int ret; 432be0e5c09SChris Mason 4335c680ed6SChris Mason ret = push_node_left(root, path, level); 4345c680ed6SChris Mason if (!ret) 4355c680ed6SChris Mason return 0; 4365c680ed6SChris Mason ret = push_node_right(root, path, level); 4375c680ed6SChris Mason if (!ret) 4385c680ed6SChris Mason return 0; 4395c680ed6SChris Mason t = path->nodes[level]; 440eb60ceacSChris Mason c = &t->node; 4415c680ed6SChris Mason if (t == root->node) { 4425c680ed6SChris Mason /* trying to split the root, lets make a new one */ 4435c680ed6SChris Mason ret = insert_new_root(root, path, level + 1); 4445c680ed6SChris Mason if (ret) 4455c680ed6SChris Mason return ret; 4465c680ed6SChris Mason } 4475c680ed6SChris Mason split_buffer = alloc_free_block(root); 4485c680ed6SChris Mason split = &split_buffer->node; 4495c680ed6SChris Mason split->header.flags = c->header.flags; 4505c680ed6SChris Mason split->header.blocknr = split_buffer->blocknr; 4515c680ed6SChris Mason split->header.parentid = root->node->node.header.parentid; 452be0e5c09SChris Mason mid = (c->header.nritems + 1) / 2; 4535c680ed6SChris Mason memcpy(split->keys, c->keys + mid, 454be0e5c09SChris Mason (c->header.nritems - mid) * sizeof(struct key)); 4555c680ed6SChris Mason memcpy(split->blockptrs, c->blockptrs + mid, 456be0e5c09SChris Mason (c->header.nritems - mid) * sizeof(u64)); 4575c680ed6SChris Mason split->header.nritems = c->header.nritems - mid; 458be0e5c09SChris Mason c->header.nritems = mid; 459eb60ceacSChris Mason write_tree_block(root, t); 4605c680ed6SChris Mason write_tree_block(root, split_buffer); 4615c680ed6SChris Mason insert_ptr(root, path, split->keys, split_buffer->blocknr, 4625c680ed6SChris Mason path->slots[level + 1] + 1, level + 1); 4635c680ed6SChris Mason if (path->slots[level] > mid) { 4645c680ed6SChris Mason path->slots[level] -= mid; 4655c680ed6SChris Mason tree_block_release(root, t); 4665c680ed6SChris Mason path->nodes[level] = split_buffer; 4675c680ed6SChris Mason path->slots[level + 1] += 1; 468eb60ceacSChris Mason } else { 4695c680ed6SChris Mason tree_block_release(root, split_buffer); 470be0e5c09SChris Mason } 4715c680ed6SChris Mason return 0; 472be0e5c09SChris Mason } 473be0e5c09SChris Mason 47474123bd7SChris Mason /* 47574123bd7SChris Mason * how many bytes are required to store the items in a leaf. start 47674123bd7SChris Mason * and nr indicate which items in the leaf to check. This totals up the 47774123bd7SChris Mason * space used both by the item structs and the item data 47874123bd7SChris Mason */ 479be0e5c09SChris Mason int leaf_space_used(struct leaf *l, int start, int nr) 480be0e5c09SChris Mason { 481be0e5c09SChris Mason int data_len; 482be0e5c09SChris Mason int end = start + nr - 1; 483be0e5c09SChris Mason 484be0e5c09SChris Mason if (!nr) 485be0e5c09SChris Mason return 0; 486be0e5c09SChris Mason data_len = l->items[start].offset + l->items[start].size; 487be0e5c09SChris Mason data_len = data_len - l->items[end].offset; 488be0e5c09SChris Mason data_len += sizeof(struct item) * nr; 489be0e5c09SChris Mason return data_len; 490be0e5c09SChris Mason } 491be0e5c09SChris Mason 49274123bd7SChris Mason /* 49374123bd7SChris Mason * push some data in the path leaf to the left, trying to free up at 49474123bd7SChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 49574123bd7SChris Mason */ 496be0e5c09SChris Mason int push_leaf_left(struct ctree_root *root, struct ctree_path *path, 497be0e5c09SChris Mason int data_size) 498be0e5c09SChris Mason { 499eb60ceacSChris Mason struct tree_buffer *right_buf = path->nodes[0]; 500eb60ceacSChris Mason struct leaf *right = &right_buf->leaf; 501eb60ceacSChris Mason struct tree_buffer *t; 502be0e5c09SChris Mason struct leaf *left; 503be0e5c09SChris Mason int slot; 504be0e5c09SChris Mason int i; 505be0e5c09SChris Mason int free_space; 506be0e5c09SChris Mason int push_space = 0; 507be0e5c09SChris Mason int push_items = 0; 508be0e5c09SChris Mason struct item *item; 509be0e5c09SChris Mason int old_left_nritems; 510be0e5c09SChris Mason 511be0e5c09SChris Mason slot = path->slots[1]; 512be0e5c09SChris Mason if (slot == 0) { 513be0e5c09SChris Mason return 1; 514be0e5c09SChris Mason } 515be0e5c09SChris Mason if (!path->nodes[1]) { 516be0e5c09SChris Mason return 1; 517be0e5c09SChris Mason } 518eb60ceacSChris Mason t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]); 519eb60ceacSChris Mason left = &t->leaf; 520be0e5c09SChris Mason free_space = leaf_free_space(left); 521be0e5c09SChris Mason if (free_space < data_size + sizeof(struct item)) { 522eb60ceacSChris Mason tree_block_release(root, t); 523be0e5c09SChris Mason return 1; 524be0e5c09SChris Mason } 525be0e5c09SChris Mason for (i = 0; i < right->header.nritems; i++) { 526be0e5c09SChris Mason item = right->items + i; 527be0e5c09SChris Mason if (path->slots[0] == i) 528be0e5c09SChris Mason push_space += data_size + sizeof(*item); 529be0e5c09SChris Mason if (item->size + sizeof(*item) + push_space > free_space) 530be0e5c09SChris Mason break; 531be0e5c09SChris Mason push_items++; 532be0e5c09SChris Mason push_space += item->size + sizeof(*item); 533be0e5c09SChris Mason } 534be0e5c09SChris Mason if (push_items == 0) { 535eb60ceacSChris Mason tree_block_release(root, t); 536be0e5c09SChris Mason return 1; 537be0e5c09SChris Mason } 538be0e5c09SChris Mason /* push data from right to left */ 539be0e5c09SChris Mason memcpy(left->items + left->header.nritems, 540be0e5c09SChris Mason right->items, push_items * sizeof(struct item)); 541be0e5c09SChris Mason push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; 542be0e5c09SChris Mason memcpy(left->data + leaf_data_end(left) - push_space, 543be0e5c09SChris Mason right->data + right->items[push_items - 1].offset, 544be0e5c09SChris Mason push_space); 545be0e5c09SChris Mason old_left_nritems = left->header.nritems; 546eb60ceacSChris Mason BUG_ON(old_left_nritems < 0); 547eb60ceacSChris Mason 548be0e5c09SChris Mason for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { 549be0e5c09SChris Mason left->items[i].offset -= LEAF_DATA_SIZE - 550be0e5c09SChris Mason left->items[old_left_nritems -1].offset; 551be0e5c09SChris Mason } 552be0e5c09SChris Mason left->header.nritems += push_items; 553be0e5c09SChris Mason 554be0e5c09SChris Mason /* fixup right node */ 555be0e5c09SChris Mason push_space = right->items[push_items-1].offset - leaf_data_end(right); 556be0e5c09SChris Mason memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + 557be0e5c09SChris Mason leaf_data_end(right), push_space); 558be0e5c09SChris Mason memmove(right->items, right->items + push_items, 559be0e5c09SChris Mason (right->header.nritems - push_items) * sizeof(struct item)); 560be0e5c09SChris Mason right->header.nritems -= push_items; 561be0e5c09SChris Mason push_space = LEAF_DATA_SIZE; 562eb60ceacSChris Mason 563be0e5c09SChris Mason for (i = 0; i < right->header.nritems; i++) { 564be0e5c09SChris Mason right->items[i].offset = push_space - right->items[i].size; 565be0e5c09SChris Mason push_space = right->items[i].offset; 566be0e5c09SChris Mason } 567eb60ceacSChris Mason 568eb60ceacSChris Mason write_tree_block(root, t); 569eb60ceacSChris Mason write_tree_block(root, right_buf); 570eb60ceacSChris Mason 571eb60ceacSChris Mason fixup_low_keys(root, path, &right->items[0].key, 1); 572be0e5c09SChris Mason 573be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 574be0e5c09SChris Mason if (path->slots[0] < push_items) { 575be0e5c09SChris Mason path->slots[0] += old_left_nritems; 576eb60ceacSChris Mason tree_block_release(root, path->nodes[0]); 577eb60ceacSChris Mason path->nodes[0] = t; 578be0e5c09SChris Mason path->slots[1] -= 1; 579be0e5c09SChris Mason } else { 580eb60ceacSChris Mason tree_block_release(root, t); 581be0e5c09SChris Mason path->slots[0] -= push_items; 582be0e5c09SChris Mason } 583eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 584be0e5c09SChris Mason return 0; 585be0e5c09SChris Mason } 586be0e5c09SChris Mason 58774123bd7SChris Mason /* 58874123bd7SChris Mason * split the path's leaf in two, making sure there is at least data_size 58974123bd7SChris Mason * available for the resulting leaf level of the path. 59074123bd7SChris Mason */ 591be0e5c09SChris Mason int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) 592be0e5c09SChris Mason { 593eb60ceacSChris Mason struct tree_buffer *l_buf = path->nodes[0]; 594eb60ceacSChris Mason struct leaf *l = &l_buf->leaf; 595eb60ceacSChris Mason int nritems; 596eb60ceacSChris Mason int mid; 597eb60ceacSChris Mason int slot; 598be0e5c09SChris Mason struct leaf *right; 599eb60ceacSChris Mason struct tree_buffer *right_buffer; 600be0e5c09SChris Mason int space_needed = data_size + sizeof(struct item); 601be0e5c09SChris Mason int data_copy_size; 602be0e5c09SChris Mason int rt_data_off; 603be0e5c09SChris Mason int i; 604be0e5c09SChris Mason int ret; 605be0e5c09SChris Mason 606be0e5c09SChris Mason if (push_leaf_left(root, path, data_size) == 0) { 607eb60ceacSChris Mason l_buf = path->nodes[0]; 608eb60ceacSChris Mason l = &l_buf->leaf; 609eb60ceacSChris Mason if (leaf_free_space(l) >= sizeof(struct item) + data_size) 610be0e5c09SChris Mason return 0; 611be0e5c09SChris Mason } 6125c680ed6SChris Mason if (!path->nodes[1]) { 6135c680ed6SChris Mason ret = insert_new_root(root, path, 1); 6145c680ed6SChris Mason if (ret) 6155c680ed6SChris Mason return ret; 6165c680ed6SChris Mason } 617eb60ceacSChris Mason slot = path->slots[0]; 618eb60ceacSChris Mason nritems = l->header.nritems; 619eb60ceacSChris Mason mid = (nritems + 1)/ 2; 620eb60ceacSChris Mason 621eb60ceacSChris Mason right_buffer = alloc_free_block(root); 622eb60ceacSChris Mason BUG_ON(!right_buffer); 623eb60ceacSChris Mason BUG_ON(mid == nritems); 624eb60ceacSChris Mason right = &right_buffer->leaf; 625be0e5c09SChris Mason memset(right, 0, sizeof(*right)); 626be0e5c09SChris Mason if (mid <= slot) { 627be0e5c09SChris Mason if (leaf_space_used(l, mid, nritems - mid) + space_needed > 628be0e5c09SChris Mason LEAF_DATA_SIZE) 629be0e5c09SChris Mason BUG(); 630be0e5c09SChris Mason } else { 631be0e5c09SChris Mason if (leaf_space_used(l, 0, mid + 1) + space_needed > 632be0e5c09SChris Mason LEAF_DATA_SIZE) 633be0e5c09SChris Mason BUG(); 634be0e5c09SChris Mason } 635be0e5c09SChris Mason right->header.nritems = nritems - mid; 636eb60ceacSChris Mason right->header.blocknr = right_buffer->blocknr; 637eb60ceacSChris Mason right->header.flags = node_level(0); 638cfaa7295SChris Mason right->header.parentid = root->node->node.header.parentid; 639be0e5c09SChris Mason data_copy_size = l->items[mid].offset + l->items[mid].size - 640be0e5c09SChris Mason leaf_data_end(l); 641be0e5c09SChris Mason memcpy(right->items, l->items + mid, 642be0e5c09SChris Mason (nritems - mid) * sizeof(struct item)); 643be0e5c09SChris Mason memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, 644be0e5c09SChris Mason l->data + leaf_data_end(l), data_copy_size); 645be0e5c09SChris Mason rt_data_off = LEAF_DATA_SIZE - 646be0e5c09SChris Mason (l->items[mid].offset + l->items[mid].size); 64774123bd7SChris Mason 64874123bd7SChris Mason for (i = 0; i < right->header.nritems; i++) 649be0e5c09SChris Mason right->items[i].offset += rt_data_off; 65074123bd7SChris Mason 651be0e5c09SChris Mason l->header.nritems = mid; 652be0e5c09SChris Mason ret = insert_ptr(root, path, &right->items[0].key, 6535c680ed6SChris Mason right_buffer->blocknr, path->slots[1] + 1, 1); 654eb60ceacSChris Mason write_tree_block(root, right_buffer); 655eb60ceacSChris Mason write_tree_block(root, l_buf); 656eb60ceacSChris Mason 657eb60ceacSChris Mason BUG_ON(path->slots[0] != slot); 658be0e5c09SChris Mason if (mid <= slot) { 659eb60ceacSChris Mason tree_block_release(root, path->nodes[0]); 660eb60ceacSChris Mason path->nodes[0] = right_buffer; 661be0e5c09SChris Mason path->slots[0] -= mid; 662be0e5c09SChris Mason path->slots[1] += 1; 663eb60ceacSChris Mason } else 664eb60ceacSChris Mason tree_block_release(root, right_buffer); 665eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 666be0e5c09SChris Mason return ret; 667be0e5c09SChris Mason } 668be0e5c09SChris Mason 66974123bd7SChris Mason /* 67074123bd7SChris Mason * Given a key and some data, insert an item into the tree. 67174123bd7SChris Mason * This does all the path init required, making room in the tree if needed. 67274123bd7SChris Mason */ 673be0e5c09SChris Mason int insert_item(struct ctree_root *root, struct key *key, 674be0e5c09SChris Mason void *data, int data_size) 675be0e5c09SChris Mason { 676be0e5c09SChris Mason int ret; 677be0e5c09SChris Mason int slot; 678eb60ceacSChris Mason int slot_orig; 679be0e5c09SChris Mason struct leaf *leaf; 680eb60ceacSChris Mason struct tree_buffer *leaf_buf; 681be0e5c09SChris Mason unsigned int nritems; 682be0e5c09SChris Mason unsigned int data_end; 683be0e5c09SChris Mason struct ctree_path path; 684be0e5c09SChris Mason 685cfaa7295SChris Mason refill_alloc_extent(root); 686cfaa7295SChris Mason 68774123bd7SChris Mason /* create a root if there isn't one */ 6885c680ed6SChris Mason if (!root->node) 689cfaa7295SChris Mason BUG(); 690be0e5c09SChris Mason init_path(&path); 6915c680ed6SChris Mason ret = search_slot(root, key, &path, data_size); 692eb60ceacSChris Mason if (ret == 0) { 693eb60ceacSChris Mason release_path(root, &path); 694be0e5c09SChris Mason return -EEXIST; 695eb60ceacSChris Mason } 696be0e5c09SChris Mason 697eb60ceacSChris Mason slot_orig = path.slots[0]; 698eb60ceacSChris Mason leaf_buf = path.nodes[0]; 699eb60ceacSChris Mason leaf = &leaf_buf->leaf; 70074123bd7SChris Mason 701be0e5c09SChris Mason nritems = leaf->header.nritems; 702be0e5c09SChris Mason data_end = leaf_data_end(leaf); 703eb60ceacSChris Mason 704be0e5c09SChris Mason if (leaf_free_space(leaf) < sizeof(struct item) + data_size) 705be0e5c09SChris Mason BUG(); 706be0e5c09SChris Mason 707be0e5c09SChris Mason slot = path.slots[0]; 708eb60ceacSChris Mason BUG_ON(slot < 0); 709be0e5c09SChris Mason if (slot == 0) 710eb60ceacSChris Mason fixup_low_keys(root, &path, key, 1); 711be0e5c09SChris Mason if (slot != nritems) { 712be0e5c09SChris Mason int i; 713be0e5c09SChris Mason unsigned int old_data = leaf->items[slot].offset + 714be0e5c09SChris Mason leaf->items[slot].size; 715be0e5c09SChris Mason 716be0e5c09SChris Mason /* 717be0e5c09SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 718be0e5c09SChris Mason */ 719be0e5c09SChris Mason /* first correct the data pointers */ 720be0e5c09SChris Mason for (i = slot; i < nritems; i++) 721be0e5c09SChris Mason leaf->items[i].offset -= data_size; 722be0e5c09SChris Mason 723be0e5c09SChris Mason /* shift the items */ 724be0e5c09SChris Mason memmove(leaf->items + slot + 1, leaf->items + slot, 725be0e5c09SChris Mason (nritems - slot) * sizeof(struct item)); 726be0e5c09SChris Mason 727be0e5c09SChris Mason /* shift the data */ 728be0e5c09SChris Mason memmove(leaf->data + data_end - data_size, leaf->data + 729be0e5c09SChris Mason data_end, old_data - data_end); 730be0e5c09SChris Mason data_end = old_data; 731be0e5c09SChris Mason } 73274123bd7SChris Mason /* copy the new data in */ 733be0e5c09SChris Mason memcpy(&leaf->items[slot].key, key, sizeof(struct key)); 734be0e5c09SChris Mason leaf->items[slot].offset = data_end - data_size; 735be0e5c09SChris Mason leaf->items[slot].size = data_size; 736be0e5c09SChris Mason memcpy(leaf->data + data_end - data_size, data, data_size); 737be0e5c09SChris Mason leaf->header.nritems += 1; 738eb60ceacSChris Mason write_tree_block(root, leaf_buf); 739be0e5c09SChris Mason if (leaf_free_space(leaf) < 0) 740be0e5c09SChris Mason BUG(); 741eb60ceacSChris Mason release_path(root, &path); 742be0e5c09SChris Mason return 0; 743be0e5c09SChris Mason } 744be0e5c09SChris Mason 74574123bd7SChris Mason /* 74674123bd7SChris Mason * delete the pointer from a given level in the path. The path is not 74774123bd7SChris Mason * fixed up, so after calling this it is not valid at that level. 74874123bd7SChris Mason * 74974123bd7SChris Mason * If the delete empties a node, the node is removed from the tree, 75074123bd7SChris Mason * continuing all the way the root if required. The root is converted into 75174123bd7SChris Mason * a leaf if all the nodes are emptied. 75274123bd7SChris Mason */ 753be0e5c09SChris Mason int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) 754be0e5c09SChris Mason { 755be0e5c09SChris Mason int slot; 756eb60ceacSChris Mason struct tree_buffer *t; 757be0e5c09SChris Mason struct node *node; 758be0e5c09SChris Mason int nritems; 759be0e5c09SChris Mason 760be0e5c09SChris Mason while(1) { 761eb60ceacSChris Mason t = path->nodes[level]; 762eb60ceacSChris Mason if (!t) 763be0e5c09SChris Mason break; 764eb60ceacSChris Mason node = &t->node; 765be0e5c09SChris Mason slot = path->slots[level]; 766be0e5c09SChris Mason nritems = node->header.nritems; 767be0e5c09SChris Mason 768be0e5c09SChris Mason if (slot != nritems -1) { 769be0e5c09SChris Mason memmove(node->keys + slot, node->keys + slot + 1, 770be0e5c09SChris Mason sizeof(struct key) * (nritems - slot - 1)); 771be0e5c09SChris Mason memmove(node->blockptrs + slot, 772be0e5c09SChris Mason node->blockptrs + slot + 1, 773be0e5c09SChris Mason sizeof(u64) * (nritems - slot - 1)); 774be0e5c09SChris Mason } 775be0e5c09SChris Mason node->header.nritems--; 776eb60ceacSChris Mason write_tree_block(root, t); 777be0e5c09SChris Mason if (node->header.nritems != 0) { 778be0e5c09SChris Mason int tslot; 779be0e5c09SChris Mason if (slot == 0) 780eb60ceacSChris Mason fixup_low_keys(root, path, node->keys, 781eb60ceacSChris Mason level + 1); 782be0e5c09SChris Mason tslot = path->slots[level+1]; 783eb60ceacSChris Mason t->count++; 784be0e5c09SChris Mason push_node_left(root, path, level); 785be0e5c09SChris Mason if (node->header.nritems) { 786be0e5c09SChris Mason push_node_right(root, path, level); 787be0e5c09SChris Mason } 788eb60ceacSChris Mason if (node->header.nritems) { 789eb60ceacSChris Mason tree_block_release(root, t); 790be0e5c09SChris Mason break; 791eb60ceacSChris Mason } 792eb60ceacSChris Mason tree_block_release(root, t); 7934920c9acSChris Mason path->slots[level+1] = tslot; 794be0e5c09SChris Mason } 795eb60ceacSChris Mason if (t == root->node) { 796eb60ceacSChris Mason /* just turn the root into a leaf and break */ 797eb60ceacSChris Mason root->node->node.header.flags = node_level(0); 798eb60ceacSChris Mason write_tree_block(root, t); 799be0e5c09SChris Mason break; 800be0e5c09SChris Mason } 801be0e5c09SChris Mason level++; 802be0e5c09SChris Mason if (!path->nodes[level]) 803be0e5c09SChris Mason BUG(); 804be0e5c09SChris Mason } 805be0e5c09SChris Mason return 0; 806be0e5c09SChris Mason } 807be0e5c09SChris Mason 80874123bd7SChris Mason /* 80974123bd7SChris Mason * delete the item at the leaf level in path. If that empties 81074123bd7SChris Mason * the leaf, remove it from the tree 81174123bd7SChris Mason */ 8124920c9acSChris Mason int del_item(struct ctree_root *root, struct ctree_path *path) 813be0e5c09SChris Mason { 814be0e5c09SChris Mason int slot; 815be0e5c09SChris Mason struct leaf *leaf; 816eb60ceacSChris Mason struct tree_buffer *leaf_buf; 817be0e5c09SChris Mason int doff; 818be0e5c09SChris Mason int dsize; 819be0e5c09SChris Mason 820eb60ceacSChris Mason leaf_buf = path->nodes[0]; 821eb60ceacSChris Mason leaf = &leaf_buf->leaf; 8224920c9acSChris Mason slot = path->slots[0]; 823be0e5c09SChris Mason doff = leaf->items[slot].offset; 824be0e5c09SChris Mason dsize = leaf->items[slot].size; 825be0e5c09SChris Mason 826be0e5c09SChris Mason if (slot != leaf->header.nritems - 1) { 827be0e5c09SChris Mason int i; 828be0e5c09SChris Mason int data_end = leaf_data_end(leaf); 829be0e5c09SChris Mason memmove(leaf->data + data_end + dsize, 830be0e5c09SChris Mason leaf->data + data_end, 831be0e5c09SChris Mason doff - data_end); 832be0e5c09SChris Mason for (i = slot + 1; i < leaf->header.nritems; i++) 833be0e5c09SChris Mason leaf->items[i].offset += dsize; 834be0e5c09SChris Mason memmove(leaf->items + slot, leaf->items + slot + 1, 835be0e5c09SChris Mason sizeof(struct item) * 836be0e5c09SChris Mason (leaf->header.nritems - slot - 1)); 837be0e5c09SChris Mason } 838be0e5c09SChris Mason leaf->header.nritems -= 1; 83974123bd7SChris Mason /* delete the leaf if we've emptied it */ 840be0e5c09SChris Mason if (leaf->header.nritems == 0) { 841eb60ceacSChris Mason if (leaf_buf == root->node) { 842eb60ceacSChris Mason leaf->header.flags = node_level(0); 843eb60ceacSChris Mason write_tree_block(root, leaf_buf); 844eb60ceacSChris Mason } else 8454920c9acSChris Mason del_ptr(root, path, 1); 846be0e5c09SChris Mason } else { 847be0e5c09SChris Mason if (slot == 0) 848eb60ceacSChris Mason fixup_low_keys(root, path, &leaf->items[0].key, 1); 849eb60ceacSChris Mason write_tree_block(root, leaf_buf); 85074123bd7SChris Mason /* delete the leaf if it is mostly empty */ 851be0e5c09SChris Mason if (leaf_space_used(leaf, 0, leaf->header.nritems) < 852be0e5c09SChris Mason LEAF_DATA_SIZE / 4) { 853be0e5c09SChris Mason /* push_leaf_left fixes the path. 854be0e5c09SChris Mason * make sure the path still points to our leaf 855be0e5c09SChris Mason * for possible call to del_ptr below 856be0e5c09SChris Mason */ 8574920c9acSChris Mason slot = path->slots[1]; 858eb60ceacSChris Mason leaf_buf->count++; 8594920c9acSChris Mason push_leaf_left(root, path, 1); 860be0e5c09SChris Mason if (leaf->header.nritems == 0) { 8614920c9acSChris Mason path->slots[1] = slot; 8624920c9acSChris Mason del_ptr(root, path, 1); 863be0e5c09SChris Mason } 864eb60ceacSChris Mason tree_block_release(root, leaf_buf); 865be0e5c09SChris Mason } 866be0e5c09SChris Mason } 867be0e5c09SChris Mason return 0; 868be0e5c09SChris Mason } 869be0e5c09SChris Mason 870d97e63b6SChris Mason int next_leaf(struct ctree_root *root, struct ctree_path *path) 871d97e63b6SChris Mason { 872d97e63b6SChris Mason int slot; 873d97e63b6SChris Mason int level = 1; 874d97e63b6SChris Mason u64 blocknr; 875d97e63b6SChris Mason struct tree_buffer *c; 876cfaa7295SChris Mason struct tree_buffer *next = NULL; 877d97e63b6SChris Mason 878d97e63b6SChris Mason while(level < MAX_LEVEL) { 879d97e63b6SChris Mason if (!path->nodes[level]) 880d97e63b6SChris Mason return -1; 881d97e63b6SChris Mason slot = path->slots[level] + 1; 882d97e63b6SChris Mason c = path->nodes[level]; 883d97e63b6SChris Mason if (slot >= c->node.header.nritems) { 884d97e63b6SChris Mason level++; 885d97e63b6SChris Mason continue; 886d97e63b6SChris Mason } 887d97e63b6SChris Mason blocknr = c->node.blockptrs[slot]; 888cfaa7295SChris Mason if (next) 889cfaa7295SChris Mason tree_block_release(root, next); 890d97e63b6SChris Mason next = read_tree_block(root, blocknr); 891d97e63b6SChris Mason break; 892d97e63b6SChris Mason } 893d97e63b6SChris Mason path->slots[level] = slot; 894d97e63b6SChris Mason while(1) { 895d97e63b6SChris Mason level--; 896d97e63b6SChris Mason c = path->nodes[level]; 897d97e63b6SChris Mason tree_block_release(root, c); 898d97e63b6SChris Mason path->nodes[level] = next; 899d97e63b6SChris Mason path->slots[level] = 0; 900d97e63b6SChris Mason if (!level) 901d97e63b6SChris Mason break; 902d97e63b6SChris Mason next = read_tree_block(root, next->node.blockptrs[0]); 903d97e63b6SChris Mason } 904d97e63b6SChris Mason return 0; 905d97e63b6SChris Mason } 906d97e63b6SChris Mason 907cfaa7295SChris Mason int alloc_extent(struct ctree_root *orig_root, u64 num_blocks, u64 search_start, 908d97e63b6SChris Mason u64 search_end, u64 owner, struct key *ins) 909d97e63b6SChris Mason { 910d97e63b6SChris Mason struct ctree_path path; 911d97e63b6SChris Mason struct key *key; 912d97e63b6SChris Mason int ret; 913d97e63b6SChris Mason u64 hole_size = 0; 914d97e63b6SChris Mason int slot = 0; 915d97e63b6SChris Mason u64 last_block; 916d97e63b6SChris Mason int start_found = 0; 917d97e63b6SChris Mason struct leaf *l; 918d97e63b6SChris Mason struct extent_item extent_item; 919cfaa7295SChris Mason struct ctree_root * root = orig_root->extent_root; 920d97e63b6SChris Mason 921d97e63b6SChris Mason init_path(&path); 922d97e63b6SChris Mason ins->objectid = search_start; 923d97e63b6SChris Mason ins->offset = 0; 924d97e63b6SChris Mason ins->flags = 0; 925d97e63b6SChris Mason 9265c680ed6SChris Mason ret = search_slot(root, ins, &path, sizeof(struct extent_item)); 927d97e63b6SChris Mason while (1) { 928d97e63b6SChris Mason l = &path.nodes[0]->leaf; 929d97e63b6SChris Mason slot = path.slots[0]; 930d97e63b6SChris Mason if (!l) { 931d97e63b6SChris Mason // FIXME allocate root 932d97e63b6SChris Mason } 933d97e63b6SChris Mason if (slot >= l->header.nritems) { 934d97e63b6SChris Mason ret = next_leaf(root, &path); 935d97e63b6SChris Mason if (ret == 0) 936d97e63b6SChris Mason continue; 937d97e63b6SChris Mason if (!start_found) { 938d97e63b6SChris Mason ins->objectid = search_start; 939d97e63b6SChris Mason ins->offset = num_blocks; 940d97e63b6SChris Mason hole_size = search_end - search_start; 941d97e63b6SChris Mason goto insert; 942d97e63b6SChris Mason } 943d97e63b6SChris Mason ins->objectid = last_block; 944d97e63b6SChris Mason ins->offset = num_blocks; 945d97e63b6SChris Mason hole_size = search_end - last_block; 946d97e63b6SChris Mason goto insert; 947d97e63b6SChris Mason } 948d97e63b6SChris Mason key = &l->items[slot].key; 949d97e63b6SChris Mason if (start_found) { 950d97e63b6SChris Mason hole_size = key->objectid - last_block; 951d97e63b6SChris Mason if (hole_size > num_blocks) { 952d97e63b6SChris Mason ins->objectid = last_block; 953d97e63b6SChris Mason ins->offset = num_blocks; 954d97e63b6SChris Mason goto insert; 955d97e63b6SChris Mason } 956d97e63b6SChris Mason } else 957d97e63b6SChris Mason start_found = 1; 958d97e63b6SChris Mason last_block = key->objectid + key->offset; 959d97e63b6SChris Mason path.slots[0]++; 960d97e63b6SChris Mason } 961d97e63b6SChris Mason // FIXME -ENOSPC 962d97e63b6SChris Mason insert: 963cfaa7295SChris Mason release_path(root, &path); 964d97e63b6SChris Mason extent_item.refs = 1; 965d97e63b6SChris Mason extent_item.owner = owner; 966cfaa7295SChris Mason if (root == orig_root && root->reserve_extent->num_blocks == 0) { 967cfaa7295SChris Mason root->reserve_extent->blocknr = ins->objectid; 968cfaa7295SChris Mason root->reserve_extent->num_blocks = ins->offset; 969cfaa7295SChris Mason root->reserve_extent->num_used = 0; 970cfaa7295SChris Mason } 971cfaa7295SChris Mason ret = insert_item(root->extent_root, ins, &extent_item, sizeof(extent_item)); 972d97e63b6SChris Mason return ret; 973d97e63b6SChris Mason } 974d97e63b6SChris Mason 975d97e63b6SChris Mason static int refill_alloc_extent(struct ctree_root *root) 976d97e63b6SChris Mason { 977d97e63b6SChris Mason struct alloc_extent *ae = root->alloc_extent; 978d97e63b6SChris Mason struct key key; 979d97e63b6SChris Mason int ret; 980d97e63b6SChris Mason int min_blocks = MAX_LEVEL * 2; 981d97e63b6SChris Mason 982d97e63b6SChris Mason if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used > 983d97e63b6SChris Mason min_blocks) 984d97e63b6SChris Mason return 0; 985d97e63b6SChris Mason ae = root->reserve_extent; 986d97e63b6SChris Mason if (ae->num_blocks > ae->num_used) { 987d97e63b6SChris Mason if (root->alloc_extent->num_blocks == 0) { 988d97e63b6SChris Mason /* we should swap reserve/alloc_extent when alloc 989d97e63b6SChris Mason * fills up 990d97e63b6SChris Mason */ 991d97e63b6SChris Mason BUG(); 992d97e63b6SChris Mason } 993d97e63b6SChris Mason if (ae->num_blocks - ae->num_used < min_blocks) 994d97e63b6SChris Mason BUG(); 995d97e63b6SChris Mason return 0; 996d97e63b6SChris Mason } 997cfaa7295SChris Mason ret = alloc_extent(root, 998cfaa7295SChris Mason min_blocks * 2, 0, (unsigned long)-1, 999cfaa7295SChris Mason root->node->node.header.parentid, &key); 1000d97e63b6SChris Mason ae->blocknr = key.objectid; 1001d97e63b6SChris Mason ae->num_blocks = key.offset; 1002d97e63b6SChris Mason ae->num_used = 0; 1003d97e63b6SChris Mason return ret; 1004d97e63b6SChris Mason } 1005d97e63b6SChris Mason 1006be0e5c09SChris Mason void print_leaf(struct leaf *l) 1007be0e5c09SChris Mason { 1008be0e5c09SChris Mason int i; 1009be0e5c09SChris Mason int nr = l->header.nritems; 1010be0e5c09SChris Mason struct item *item; 1011cfaa7295SChris Mason struct extent_item *ei; 1012eb60ceacSChris Mason printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, 1013be0e5c09SChris Mason leaf_free_space(l)); 1014be0e5c09SChris Mason fflush(stdout); 1015be0e5c09SChris Mason for (i = 0 ; i < nr ; i++) { 1016be0e5c09SChris Mason item = l->items + i; 1017be0e5c09SChris Mason printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", 1018be0e5c09SChris Mason i, 1019be0e5c09SChris Mason item->key.objectid, item->key.flags, item->key.offset, 1020be0e5c09SChris Mason item->offset, item->size); 1021be0e5c09SChris Mason fflush(stdout); 1022be0e5c09SChris Mason printf("\t\titem data %.*s\n", item->size, l->data+item->offset); 1023cfaa7295SChris Mason ei = (struct extent_item *)(l->data + item->offset); 1024cfaa7295SChris Mason printf("\t\textent data %u %lu\n", ei->refs, ei->owner); 1025be0e5c09SChris Mason fflush(stdout); 1026be0e5c09SChris Mason } 1027be0e5c09SChris Mason } 1028eb60ceacSChris Mason void print_tree(struct ctree_root *root, struct tree_buffer *t) 1029be0e5c09SChris Mason { 1030be0e5c09SChris Mason int i; 1031be0e5c09SChris Mason int nr; 1032eb60ceacSChris Mason struct node *c; 1033be0e5c09SChris Mason 1034eb60ceacSChris Mason if (!t) 1035be0e5c09SChris Mason return; 1036eb60ceacSChris Mason c = &t->node; 1037be0e5c09SChris Mason nr = c->header.nritems; 1038eb60ceacSChris Mason if (c->header.blocknr != t->blocknr) 1039eb60ceacSChris Mason BUG(); 1040be0e5c09SChris Mason if (is_leaf(c->header.flags)) { 1041be0e5c09SChris Mason print_leaf((struct leaf *)c); 1042be0e5c09SChris Mason return; 1043be0e5c09SChris Mason } 1044eb60ceacSChris Mason printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr, 1045be0e5c09SChris Mason node_level(c->header.flags), c->header.nritems, 1046be0e5c09SChris Mason NODEPTRS_PER_BLOCK - c->header.nritems); 1047be0e5c09SChris Mason fflush(stdout); 1048be0e5c09SChris Mason for (i = 0; i < nr; i++) { 1049eb60ceacSChris Mason printf("\tkey %d (%lu %u %lu) block %lu\n", 1050be0e5c09SChris Mason i, 1051be0e5c09SChris Mason c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, 1052be0e5c09SChris Mason c->blockptrs[i]); 1053be0e5c09SChris Mason fflush(stdout); 1054be0e5c09SChris Mason } 1055be0e5c09SChris Mason for (i = 0; i < nr; i++) { 1056eb60ceacSChris Mason struct tree_buffer *next_buf = read_tree_block(root, 1057eb60ceacSChris Mason c->blockptrs[i]); 1058eb60ceacSChris Mason struct node *next = &next_buf->node; 1059be0e5c09SChris Mason if (is_leaf(next->header.flags) && 1060be0e5c09SChris Mason node_level(c->header.flags) != 1) 1061be0e5c09SChris Mason BUG(); 1062be0e5c09SChris Mason if (node_level(next->header.flags) != 1063be0e5c09SChris Mason node_level(c->header.flags) - 1) 1064be0e5c09SChris Mason BUG(); 1065eb60ceacSChris Mason print_tree(root, next_buf); 1066eb60ceacSChris Mason tree_block_release(root, next_buf); 1067be0e5c09SChris Mason } 1068be0e5c09SChris Mason 1069be0e5c09SChris Mason } 1070be0e5c09SChris Mason 1071be0e5c09SChris Mason /* for testing only */ 1072be0e5c09SChris Mason int next_key(int i, int max_key) { 10735c680ed6SChris Mason // return rand() % max_key; 10745c680ed6SChris Mason return i; 1075be0e5c09SChris Mason } 1076be0e5c09SChris Mason 1077be0e5c09SChris Mason int main() { 1078eb60ceacSChris Mason struct ctree_root *root; 1079be0e5c09SChris Mason struct key ins; 10804920c9acSChris Mason struct key last = { (u64)-1, 0, 0}; 1081be0e5c09SChris Mason char *buf; 1082be0e5c09SChris Mason int i; 1083be0e5c09SChris Mason int num; 1084be0e5c09SChris Mason int ret; 1085cfaa7295SChris Mason int run_size = 10000; 1086be0e5c09SChris Mason int max_key = 100000000; 1087be0e5c09SChris Mason int tree_size = 0; 1088be0e5c09SChris Mason struct ctree_path path; 1089cfaa7295SChris Mason struct ctree_super_block super; 1090be0e5c09SChris Mason 1091eb60ceacSChris Mason radix_tree_init(); 1092eb60ceacSChris Mason 1093eb60ceacSChris Mason 1094cfaa7295SChris Mason root = open_ctree("dbfile", &super); 1095cfaa7295SChris Mason printf("root tree\n"); 1096cfaa7295SChris Mason print_tree(root, root->node); 1097cfaa7295SChris Mason printf("map tree\n"); 1098cfaa7295SChris Mason print_tree(root->extent_root, root->extent_root->node); 1099be0e5c09SChris Mason 1100be0e5c09SChris Mason srand(55); 1101be0e5c09SChris Mason for (i = 0; i < run_size; i++) { 1102be0e5c09SChris Mason buf = malloc(64); 1103be0e5c09SChris Mason num = next_key(i, max_key); 1104be0e5c09SChris Mason // num = i; 1105be0e5c09SChris Mason sprintf(buf, "string-%d", num); 1106be0e5c09SChris Mason // printf("insert %d\n", num); 1107be0e5c09SChris Mason ins.objectid = num; 1108be0e5c09SChris Mason ins.offset = 0; 1109be0e5c09SChris Mason ins.flags = 0; 1110eb60ceacSChris Mason ret = insert_item(root, &ins, buf, strlen(buf)); 1111be0e5c09SChris Mason if (!ret) 1112be0e5c09SChris Mason tree_size++; 1113be0e5c09SChris Mason } 1114d97e63b6SChris Mason printf("root used: %lu\n", root->alloc_extent->num_used); 1115d97e63b6SChris Mason printf("root tree\n"); 1116cfaa7295SChris Mason // print_tree(root, root->node); 1117d97e63b6SChris Mason printf("map tree\n"); 1118d97e63b6SChris Mason printf("map used: %lu\n", root->extent_root->alloc_extent->num_used); 1119cfaa7295SChris Mason // print_tree(root->extent_root, root->extent_root->node); 1120cfaa7295SChris Mason write_ctree_super(root, &super); 1121eb60ceacSChris Mason close_ctree(root); 1122cfaa7295SChris Mason 1123cfaa7295SChris Mason root = open_ctree("dbfile", &super); 1124eb60ceacSChris Mason printf("starting search\n"); 1125be0e5c09SChris Mason srand(55); 1126be0e5c09SChris Mason for (i = 0; i < run_size; i++) { 1127be0e5c09SChris Mason num = next_key(i, max_key); 1128be0e5c09SChris Mason ins.objectid = num; 1129be0e5c09SChris Mason init_path(&path); 11305c680ed6SChris Mason ret = search_slot(root, &ins, &path, 0); 1131be0e5c09SChris Mason if (ret) { 1132eb60ceacSChris Mason print_tree(root, root->node); 1133be0e5c09SChris Mason printf("unable to find %d\n", num); 1134be0e5c09SChris Mason exit(1); 1135be0e5c09SChris Mason } 1136eb60ceacSChris Mason release_path(root, &path); 1137be0e5c09SChris Mason } 1138cfaa7295SChris Mason write_ctree_super(root, &super); 1139eb60ceacSChris Mason close_ctree(root); 1140cfaa7295SChris Mason root = open_ctree("dbfile", &super); 1141eb60ceacSChris Mason printf("node %p level %d total ptrs %d free spc %lu\n", root->node, 1142eb60ceacSChris Mason node_level(root->node->node.header.flags), 1143eb60ceacSChris Mason root->node->node.header.nritems, 1144eb60ceacSChris Mason NODEPTRS_PER_BLOCK - root->node->node.header.nritems); 1145eb60ceacSChris Mason printf("all searches good, deleting some items\n"); 1146be0e5c09SChris Mason i = 0; 1147be0e5c09SChris Mason srand(55); 11484920c9acSChris Mason for (i = 0 ; i < run_size/4; i++) { 1149be0e5c09SChris Mason num = next_key(i, max_key); 1150be0e5c09SChris Mason ins.objectid = num; 11514920c9acSChris Mason init_path(&path); 11525c680ed6SChris Mason ret = search_slot(root, &ins, &path, 0); 11534920c9acSChris Mason if (ret) 11544920c9acSChris Mason continue; 1155eb60ceacSChris Mason ret = del_item(root, &path); 11564920c9acSChris Mason if (ret != 0) 11574920c9acSChris Mason BUG(); 1158eb60ceacSChris Mason release_path(root, &path); 11594920c9acSChris Mason tree_size--; 11604920c9acSChris Mason } 11614920c9acSChris Mason srand(128); 11624920c9acSChris Mason for (i = 0; i < run_size; i++) { 11634920c9acSChris Mason buf = malloc(64); 11644920c9acSChris Mason num = next_key(i, max_key); 11654920c9acSChris Mason sprintf(buf, "string-%d", num); 11664920c9acSChris Mason ins.objectid = num; 1167eb60ceacSChris Mason ret = insert_item(root, &ins, buf, strlen(buf)); 11684920c9acSChris Mason if (!ret) 11694920c9acSChris Mason tree_size++; 11704920c9acSChris Mason } 1171cfaa7295SChris Mason write_ctree_super(root, &super); 1172eb60ceacSChris Mason close_ctree(root); 1173cfaa7295SChris Mason root = open_ctree("dbfile", &super); 1174eb60ceacSChris Mason printf("starting search2\n"); 1175eb60ceacSChris Mason srand(128); 1176eb60ceacSChris Mason for (i = 0; i < run_size; i++) { 1177eb60ceacSChris Mason num = next_key(i, max_key); 1178eb60ceacSChris Mason ins.objectid = num; 1179eb60ceacSChris Mason init_path(&path); 11805c680ed6SChris Mason ret = search_slot(root, &ins, &path, 0); 1181eb60ceacSChris Mason if (ret) { 1182eb60ceacSChris Mason print_tree(root, root->node); 1183eb60ceacSChris Mason printf("unable to find %d\n", num); 1184eb60ceacSChris Mason exit(1); 1185eb60ceacSChris Mason } 1186eb60ceacSChris Mason release_path(root, &path); 1187eb60ceacSChris Mason } 1188eb60ceacSChris Mason printf("starting big long delete run\n"); 1189eb60ceacSChris Mason while(root->node && root->node->node.header.nritems > 0) { 11904920c9acSChris Mason struct leaf *leaf; 11914920c9acSChris Mason int slot; 11924920c9acSChris Mason ins.objectid = (u64)-1; 11934920c9acSChris Mason init_path(&path); 11945c680ed6SChris Mason ret = search_slot(root, &ins, &path, 0); 11954920c9acSChris Mason if (ret == 0) 11964920c9acSChris Mason BUG(); 11974920c9acSChris Mason 1198eb60ceacSChris Mason leaf = &path.nodes[0]->leaf; 11994920c9acSChris Mason slot = path.slots[0]; 12004920c9acSChris Mason if (slot != leaf->header.nritems) 12014920c9acSChris Mason BUG(); 12024920c9acSChris Mason while(path.slots[0] > 0) { 12034920c9acSChris Mason path.slots[0] -= 1; 12044920c9acSChris Mason slot = path.slots[0]; 1205eb60ceacSChris Mason leaf = &path.nodes[0]->leaf; 12064920c9acSChris Mason 12074920c9acSChris Mason if (comp_keys(&last, &leaf->items[slot].key) <= 0) 12084920c9acSChris Mason BUG(); 12094920c9acSChris Mason memcpy(&last, &leaf->items[slot].key, sizeof(last)); 1210eb60ceacSChris Mason ret = del_item(root, &path); 1211eb60ceacSChris Mason if (ret != 0) { 1212eb60ceacSChris Mason printf("del_item returned %d\n", ret); 12134920c9acSChris Mason BUG(); 1214eb60ceacSChris Mason } 12154920c9acSChris Mason tree_size--; 12164920c9acSChris Mason } 1217eb60ceacSChris Mason release_path(root, &path); 1218be0e5c09SChris Mason } 1219cfaa7295SChris Mason write_ctree_super(root, &super); 1220eb60ceacSChris Mason close_ctree(root); 12214920c9acSChris Mason printf("tree size is now %d\n", tree_size); 1222be0e5c09SChris Mason return 0; 1223be0e5c09SChris Mason } 1224