1be0e5c09SChris Mason #include <stdio.h> 2be0e5c09SChris Mason #include <stdlib.h> 3be0e5c09SChris Mason #include "kerncompat.h" 4eb60ceacSChris Mason #include "radix-tree.h" 5eb60ceacSChris Mason #include "ctree.h" 6eb60ceacSChris Mason #include "disk-io.h" 7be0e5c09SChris Mason 8d97e63b6SChris Mason static int refill_alloc_extent(struct ctree_root *root); 9d97e63b6SChris Mason 10be0e5c09SChris Mason static inline void init_path(struct ctree_path *p) 11be0e5c09SChris Mason { 12be0e5c09SChris Mason memset(p, 0, sizeof(*p)); 13be0e5c09SChris Mason } 14be0e5c09SChris Mason 15eb60ceacSChris Mason static void release_path(struct ctree_root *root, struct ctree_path *p) 16eb60ceacSChris Mason { 17eb60ceacSChris Mason int i; 18eb60ceacSChris Mason for (i = 0; i < MAX_LEVEL; i++) { 19eb60ceacSChris Mason if (!p->nodes[i]) 20eb60ceacSChris Mason break; 21eb60ceacSChris Mason tree_block_release(root, p->nodes[i]); 22eb60ceacSChris Mason } 23eb60ceacSChris Mason } 24eb60ceacSChris Mason 2574123bd7SChris Mason /* 2674123bd7SChris Mason * The leaf data grows from end-to-front in the node. 2774123bd7SChris Mason * this returns the address of the start of the last item, 2874123bd7SChris Mason * which is the stop of the leaf data stack 2974123bd7SChris Mason */ 30be0e5c09SChris Mason static inline unsigned int leaf_data_end(struct leaf *leaf) 31be0e5c09SChris Mason { 32be0e5c09SChris Mason unsigned int nr = leaf->header.nritems; 33be0e5c09SChris Mason if (nr == 0) 34d97e63b6SChris Mason return sizeof(leaf->data); 35be0e5c09SChris Mason return leaf->items[nr-1].offset; 36be0e5c09SChris Mason } 37be0e5c09SChris Mason 3874123bd7SChris Mason /* 3974123bd7SChris Mason * The space between the end of the leaf items and 4074123bd7SChris Mason * the start of the leaf data. IOW, how much room 4174123bd7SChris Mason * the leaf has left for both items and data 4274123bd7SChris Mason */ 43be0e5c09SChris Mason static inline int leaf_free_space(struct leaf *leaf) 44be0e5c09SChris Mason { 45be0e5c09SChris Mason int data_end = leaf_data_end(leaf); 46be0e5c09SChris Mason int nritems = leaf->header.nritems; 47be0e5c09SChris Mason char *items_end = (char *)(leaf->items + nritems + 1); 48be0e5c09SChris Mason return (char *)(leaf->data + data_end) - (char *)items_end; 49be0e5c09SChris Mason } 50be0e5c09SChris Mason 5174123bd7SChris Mason /* 5274123bd7SChris Mason * compare two keys in a memcmp fashion 5374123bd7SChris Mason */ 54be0e5c09SChris Mason int comp_keys(struct key *k1, struct key *k2) 55be0e5c09SChris Mason { 56be0e5c09SChris Mason if (k1->objectid > k2->objectid) 57be0e5c09SChris Mason return 1; 58be0e5c09SChris Mason if (k1->objectid < k2->objectid) 59be0e5c09SChris Mason return -1; 60be0e5c09SChris Mason if (k1->flags > k2->flags) 61be0e5c09SChris Mason return 1; 62be0e5c09SChris Mason if (k1->flags < k2->flags) 63be0e5c09SChris Mason return -1; 64be0e5c09SChris Mason if (k1->offset > k2->offset) 65be0e5c09SChris Mason return 1; 66be0e5c09SChris Mason if (k1->offset < k2->offset) 67be0e5c09SChris Mason return -1; 68be0e5c09SChris Mason return 0; 69be0e5c09SChris Mason } 7074123bd7SChris Mason 7174123bd7SChris Mason /* 7274123bd7SChris Mason * search for key in the array p. items p are item_size apart 7374123bd7SChris Mason * and there are 'max' items in p 7474123bd7SChris Mason * the slot in the array is returned via slot, and it points to 7574123bd7SChris Mason * the place where you would insert key if it is not found in 7674123bd7SChris Mason * the array. 7774123bd7SChris Mason * 7874123bd7SChris Mason * slot may point to max if the key is bigger than all of the keys 7974123bd7SChris Mason */ 80be0e5c09SChris Mason int generic_bin_search(char *p, int item_size, struct key *key, 81be0e5c09SChris Mason int max, int *slot) 82be0e5c09SChris Mason { 83be0e5c09SChris Mason int low = 0; 84be0e5c09SChris Mason int high = max; 85be0e5c09SChris Mason int mid; 86be0e5c09SChris Mason int ret; 87be0e5c09SChris Mason struct key *tmp; 88be0e5c09SChris Mason 89be0e5c09SChris Mason while(low < high) { 90be0e5c09SChris Mason mid = (low + high) / 2; 91be0e5c09SChris Mason tmp = (struct key *)(p + mid * item_size); 92be0e5c09SChris Mason ret = comp_keys(tmp, key); 93be0e5c09SChris Mason 94be0e5c09SChris Mason if (ret < 0) 95be0e5c09SChris Mason low = mid + 1; 96be0e5c09SChris Mason else if (ret > 0) 97be0e5c09SChris Mason high = mid; 98be0e5c09SChris Mason else { 99be0e5c09SChris Mason *slot = mid; 100be0e5c09SChris Mason return 0; 101be0e5c09SChris Mason } 102be0e5c09SChris Mason } 103be0e5c09SChris Mason *slot = low; 104be0e5c09SChris Mason return 1; 105be0e5c09SChris Mason } 106be0e5c09SChris Mason 107be0e5c09SChris Mason int bin_search(struct node *c, struct key *key, int *slot) 108be0e5c09SChris Mason { 109be0e5c09SChris Mason if (is_leaf(c->header.flags)) { 110be0e5c09SChris Mason struct leaf *l = (struct leaf *)c; 111be0e5c09SChris Mason return generic_bin_search((void *)l->items, sizeof(struct item), 112be0e5c09SChris Mason key, c->header.nritems, slot); 113be0e5c09SChris Mason } else { 114be0e5c09SChris Mason return generic_bin_search((void *)c->keys, sizeof(struct key), 115be0e5c09SChris Mason key, c->header.nritems, slot); 116be0e5c09SChris Mason } 117be0e5c09SChris Mason return -1; 118be0e5c09SChris Mason } 119be0e5c09SChris Mason 12074123bd7SChris Mason /* 12174123bd7SChris Mason * look for key in the tree. path is filled in with nodes along the way 12274123bd7SChris Mason * if key is found, we return zero and you can find the item in the leaf 12374123bd7SChris Mason * level of the path (level 0) 12474123bd7SChris Mason * 12574123bd7SChris Mason * If the key isn't found, the path points to the slot where it should 12674123bd7SChris Mason * be inserted. 12774123bd7SChris Mason */ 128be0e5c09SChris Mason int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) 129be0e5c09SChris Mason { 130eb60ceacSChris Mason struct tree_buffer *b = root->node; 131eb60ceacSChris Mason struct node *c; 132eb60ceacSChris Mason 133be0e5c09SChris Mason int slot; 134be0e5c09SChris Mason int ret; 135be0e5c09SChris Mason int level; 136eb60ceacSChris Mason b->count++; 137eb60ceacSChris Mason while (b) { 138eb60ceacSChris Mason c = &b->node; 139be0e5c09SChris Mason level = node_level(c->header.flags); 140eb60ceacSChris Mason p->nodes[level] = b; 141be0e5c09SChris Mason ret = bin_search(c, key, &slot); 142be0e5c09SChris Mason if (!is_leaf(c->header.flags)) { 143be0e5c09SChris Mason if (ret && slot > 0) 144be0e5c09SChris Mason slot -= 1; 145be0e5c09SChris Mason p->slots[level] = slot; 146eb60ceacSChris Mason b = read_tree_block(root, c->blockptrs[slot]); 147be0e5c09SChris Mason continue; 148be0e5c09SChris Mason } else { 149be0e5c09SChris Mason p->slots[level] = slot; 150be0e5c09SChris Mason return ret; 151be0e5c09SChris Mason } 152be0e5c09SChris Mason } 153be0e5c09SChris Mason return -1; 154be0e5c09SChris Mason } 155be0e5c09SChris Mason 15674123bd7SChris Mason /* 15774123bd7SChris Mason * adjust the pointers going up the tree, starting at level 15874123bd7SChris Mason * making sure the right key of each node is points to 'key'. 15974123bd7SChris Mason * This is used after shifting pointers to the left, so it stops 16074123bd7SChris Mason * fixing up pointers when a given leaf/node is not in slot 0 of the 16174123bd7SChris Mason * higher levels 16274123bd7SChris Mason */ 163eb60ceacSChris Mason static void fixup_low_keys(struct ctree_root *root, 164eb60ceacSChris Mason struct ctree_path *path, struct key *key, 165be0e5c09SChris Mason int level) 166be0e5c09SChris Mason { 167be0e5c09SChris Mason int i; 168be0e5c09SChris Mason for (i = level; i < MAX_LEVEL; i++) { 169eb60ceacSChris Mason struct node *t; 170be0e5c09SChris Mason int tslot = path->slots[i]; 171eb60ceacSChris Mason if (!path->nodes[i]) 172be0e5c09SChris Mason break; 173eb60ceacSChris Mason t = &path->nodes[i]->node; 174be0e5c09SChris Mason memcpy(t->keys + tslot, key, sizeof(*key)); 175eb60ceacSChris Mason write_tree_block(root, path->nodes[i]); 176be0e5c09SChris Mason if (tslot != 0) 177be0e5c09SChris Mason break; 178be0e5c09SChris Mason } 179be0e5c09SChris Mason } 180be0e5c09SChris Mason 18174123bd7SChris Mason /* 18274123bd7SChris Mason * try to push data from one node into the next node left in the 18374123bd7SChris Mason * tree. The src node is found at specified level in the path. 18474123bd7SChris Mason * If some bytes were pushed, return 0, otherwise return 1. 18574123bd7SChris Mason * 18674123bd7SChris Mason * Lower nodes/leaves in the path are not touched, higher nodes may 18774123bd7SChris Mason * be modified to reflect the push. 18874123bd7SChris Mason * 18974123bd7SChris Mason * The path is altered to reflect the push. 19074123bd7SChris Mason */ 191be0e5c09SChris Mason int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) 192be0e5c09SChris Mason { 193be0e5c09SChris Mason int slot; 194be0e5c09SChris Mason struct node *left; 195be0e5c09SChris Mason struct node *right; 196be0e5c09SChris Mason int push_items = 0; 197be0e5c09SChris Mason int left_nritems; 198be0e5c09SChris Mason int right_nritems; 199eb60ceacSChris Mason struct tree_buffer *t; 200eb60ceacSChris Mason struct tree_buffer *right_buf; 201be0e5c09SChris Mason 202be0e5c09SChris Mason if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) 203be0e5c09SChris Mason return 1; 204be0e5c09SChris Mason slot = path->slots[level + 1]; 205be0e5c09SChris Mason if (slot == 0) 206be0e5c09SChris Mason return 1; 207be0e5c09SChris Mason 208eb60ceacSChris Mason t = read_tree_block(root, 209eb60ceacSChris Mason path->nodes[level + 1]->node.blockptrs[slot - 1]); 210eb60ceacSChris Mason left = &t->node; 211eb60ceacSChris Mason right_buf = path->nodes[level]; 212eb60ceacSChris Mason right = &right_buf->node; 213be0e5c09SChris Mason left_nritems = left->header.nritems; 214be0e5c09SChris Mason right_nritems = right->header.nritems; 215be0e5c09SChris Mason push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); 216eb60ceacSChris Mason if (push_items <= 0) { 217eb60ceacSChris Mason tree_block_release(root, t); 218be0e5c09SChris Mason return 1; 219eb60ceacSChris Mason } 220be0e5c09SChris Mason 221be0e5c09SChris Mason if (right_nritems < push_items) 222be0e5c09SChris Mason push_items = right_nritems; 223be0e5c09SChris Mason memcpy(left->keys + left_nritems, right->keys, 224be0e5c09SChris Mason push_items * sizeof(struct key)); 225be0e5c09SChris Mason memcpy(left->blockptrs + left_nritems, right->blockptrs, 226be0e5c09SChris Mason push_items * sizeof(u64)); 227be0e5c09SChris Mason memmove(right->keys, right->keys + push_items, 228be0e5c09SChris Mason (right_nritems - push_items) * sizeof(struct key)); 229be0e5c09SChris Mason memmove(right->blockptrs, right->blockptrs + push_items, 230be0e5c09SChris Mason (right_nritems - push_items) * sizeof(u64)); 231be0e5c09SChris Mason right->header.nritems -= push_items; 232be0e5c09SChris Mason left->header.nritems += push_items; 233be0e5c09SChris Mason 234be0e5c09SChris Mason /* adjust the pointers going up the tree */ 235eb60ceacSChris Mason fixup_low_keys(root, path, right->keys, level + 1); 236eb60ceacSChris Mason 237eb60ceacSChris Mason write_tree_block(root, t); 238eb60ceacSChris Mason write_tree_block(root, right_buf); 239be0e5c09SChris Mason 240be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 241be0e5c09SChris Mason if (path->slots[level] < push_items) { 242be0e5c09SChris Mason path->slots[level] += left_nritems; 243eb60ceacSChris Mason tree_block_release(root, path->nodes[level]); 244eb60ceacSChris Mason path->nodes[level] = t; 245be0e5c09SChris Mason path->slots[level + 1] -= 1; 246be0e5c09SChris Mason } else { 247be0e5c09SChris Mason path->slots[level] -= push_items; 248eb60ceacSChris Mason tree_block_release(root, t); 249be0e5c09SChris Mason } 250be0e5c09SChris Mason return 0; 251be0e5c09SChris Mason } 252be0e5c09SChris Mason 25374123bd7SChris Mason /* 25474123bd7SChris Mason * try to push data from one node into the next node right in the 25574123bd7SChris Mason * tree. The src node is found at specified level in the path. 25674123bd7SChris Mason * If some bytes were pushed, return 0, otherwise return 1. 25774123bd7SChris Mason * 25874123bd7SChris Mason * Lower nodes/leaves in the path are not touched, higher nodes may 25974123bd7SChris Mason * be modified to reflect the push. 26074123bd7SChris Mason * 26174123bd7SChris Mason * The path is altered to reflect the push. 26274123bd7SChris Mason */ 263be0e5c09SChris Mason int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) 264be0e5c09SChris Mason { 265be0e5c09SChris Mason int slot; 266eb60ceacSChris Mason struct tree_buffer *t; 267eb60ceacSChris Mason struct tree_buffer *src_buffer; 268be0e5c09SChris Mason struct node *dst; 269be0e5c09SChris Mason struct node *src; 270be0e5c09SChris Mason int push_items = 0; 271be0e5c09SChris Mason int dst_nritems; 272be0e5c09SChris Mason int src_nritems; 273be0e5c09SChris Mason 27474123bd7SChris Mason /* can't push from the root */ 275be0e5c09SChris Mason if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) 276be0e5c09SChris Mason return 1; 27774123bd7SChris Mason 27874123bd7SChris Mason /* only try to push inside the node higher up */ 279be0e5c09SChris Mason slot = path->slots[level + 1]; 280be0e5c09SChris Mason if (slot == NODEPTRS_PER_BLOCK - 1) 281be0e5c09SChris Mason return 1; 282be0e5c09SChris Mason 283eb60ceacSChris Mason if (slot >= path->nodes[level + 1]->node.header.nritems -1) 284be0e5c09SChris Mason return 1; 285be0e5c09SChris Mason 286eb60ceacSChris Mason t = read_tree_block(root, 287eb60ceacSChris Mason path->nodes[level + 1]->node.blockptrs[slot + 1]); 288eb60ceacSChris Mason dst = &t->node; 289eb60ceacSChris Mason src_buffer = path->nodes[level]; 290eb60ceacSChris Mason src = &src_buffer->node; 291be0e5c09SChris Mason dst_nritems = dst->header.nritems; 292be0e5c09SChris Mason src_nritems = src->header.nritems; 293be0e5c09SChris Mason push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); 294eb60ceacSChris Mason if (push_items <= 0) { 295eb60ceacSChris Mason tree_block_release(root, t); 296be0e5c09SChris Mason return 1; 297eb60ceacSChris Mason } 298be0e5c09SChris Mason 299be0e5c09SChris Mason if (src_nritems < push_items) 300be0e5c09SChris Mason push_items = src_nritems; 301be0e5c09SChris Mason memmove(dst->keys + push_items, dst->keys, 302be0e5c09SChris Mason dst_nritems * sizeof(struct key)); 303be0e5c09SChris Mason memcpy(dst->keys, src->keys + src_nritems - push_items, 304be0e5c09SChris Mason push_items * sizeof(struct key)); 305be0e5c09SChris Mason 306be0e5c09SChris Mason memmove(dst->blockptrs + push_items, dst->blockptrs, 307be0e5c09SChris Mason dst_nritems * sizeof(u64)); 308be0e5c09SChris Mason memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, 309be0e5c09SChris Mason push_items * sizeof(u64)); 310be0e5c09SChris Mason 311be0e5c09SChris Mason src->header.nritems -= push_items; 312be0e5c09SChris Mason dst->header.nritems += push_items; 313be0e5c09SChris Mason 314be0e5c09SChris Mason /* adjust the pointers going up the tree */ 315eb60ceacSChris Mason memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1, 316be0e5c09SChris Mason dst->keys, sizeof(struct key)); 317eb60ceacSChris Mason 318eb60ceacSChris Mason write_tree_block(root, path->nodes[level + 1]); 319eb60ceacSChris Mason write_tree_block(root, t); 320eb60ceacSChris Mason write_tree_block(root, src_buffer); 321eb60ceacSChris Mason 32274123bd7SChris Mason /* then fixup the pointers in the path */ 323be0e5c09SChris Mason if (path->slots[level] >= src->header.nritems) { 324be0e5c09SChris Mason path->slots[level] -= src->header.nritems; 325eb60ceacSChris Mason tree_block_release(root, path->nodes[level]); 326eb60ceacSChris Mason path->nodes[level] = t; 327be0e5c09SChris Mason path->slots[level + 1] += 1; 328eb60ceacSChris Mason } else { 329eb60ceacSChris Mason tree_block_release(root, t); 330be0e5c09SChris Mason } 331be0e5c09SChris Mason return 0; 332be0e5c09SChris Mason } 333be0e5c09SChris Mason 33474123bd7SChris Mason /* 33574123bd7SChris Mason * worker function to insert a single pointer in a node. 33674123bd7SChris Mason * the node should have enough room for the pointer already 33774123bd7SChris Mason * slot and level indicate where you want the key to go, and 33874123bd7SChris Mason * blocknr is the block the key points to. 33974123bd7SChris Mason */ 34074123bd7SChris Mason int __insert_ptr(struct ctree_root *root, 34174123bd7SChris Mason struct ctree_path *path, struct key *key, 34274123bd7SChris Mason u64 blocknr, int slot, int level) 34374123bd7SChris Mason { 34474123bd7SChris Mason struct node *c; 34574123bd7SChris Mason struct node *lower; 34674123bd7SChris Mason struct key *lower_key; 34774123bd7SChris Mason int nritems; 34874123bd7SChris Mason /* need a new root */ 34974123bd7SChris Mason if (!path->nodes[level]) { 35074123bd7SChris Mason struct tree_buffer *t; 35174123bd7SChris Mason t = alloc_free_block(root); 35274123bd7SChris Mason c = &t->node; 35374123bd7SChris Mason memset(c, 0, sizeof(c)); 35474123bd7SChris Mason c->header.nritems = 2; 35574123bd7SChris Mason c->header.flags = node_level(level); 35674123bd7SChris Mason c->header.blocknr = t->blocknr; 35774123bd7SChris Mason lower = &path->nodes[level-1]->node; 35874123bd7SChris Mason if (is_leaf(lower->header.flags)) 35974123bd7SChris Mason lower_key = &((struct leaf *)lower)->items[0].key; 36074123bd7SChris Mason else 36174123bd7SChris Mason lower_key = lower->keys; 36274123bd7SChris Mason memcpy(c->keys, lower_key, sizeof(struct key)); 36374123bd7SChris Mason memcpy(c->keys + 1, key, sizeof(struct key)); 36474123bd7SChris Mason c->blockptrs[0] = path->nodes[level-1]->blocknr; 36574123bd7SChris Mason c->blockptrs[1] = blocknr; 36674123bd7SChris Mason /* the path has an extra ref to root->node */ 36774123bd7SChris Mason tree_block_release(root, root->node); 36874123bd7SChris Mason root->node = t; 36974123bd7SChris Mason t->count++; 37074123bd7SChris Mason write_tree_block(root, t); 37174123bd7SChris Mason path->nodes[level] = t; 37274123bd7SChris Mason path->slots[level] = 0; 37374123bd7SChris Mason if (c->keys[1].objectid == 0) 37474123bd7SChris Mason BUG(); 37574123bd7SChris Mason return 0; 37674123bd7SChris Mason } 37774123bd7SChris Mason lower = &path->nodes[level]->node; 37874123bd7SChris Mason nritems = lower->header.nritems; 37974123bd7SChris Mason if (slot > nritems) 38074123bd7SChris Mason BUG(); 38174123bd7SChris Mason if (nritems == NODEPTRS_PER_BLOCK) 38274123bd7SChris Mason BUG(); 38374123bd7SChris Mason if (slot != nritems) { 38474123bd7SChris Mason memmove(lower->keys + slot + 1, lower->keys + slot, 38574123bd7SChris Mason (nritems - slot) * sizeof(struct key)); 38674123bd7SChris Mason memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, 38774123bd7SChris Mason (nritems - slot) * sizeof(u64)); 38874123bd7SChris Mason } 38974123bd7SChris Mason memcpy(lower->keys + slot, key, sizeof(struct key)); 39074123bd7SChris Mason lower->blockptrs[slot] = blocknr; 39174123bd7SChris Mason lower->header.nritems++; 39274123bd7SChris Mason if (lower->keys[1].objectid == 0) 39374123bd7SChris Mason BUG(); 39474123bd7SChris Mason write_tree_block(root, path->nodes[level]); 39574123bd7SChris Mason return 0; 39674123bd7SChris Mason } 39774123bd7SChris Mason 39874123bd7SChris Mason 39974123bd7SChris Mason /* 40074123bd7SChris Mason * insert a key,blocknr pair into the tree at a given level 40174123bd7SChris Mason * If the node at that level in the path doesn't have room, 40274123bd7SChris Mason * it is split or shifted as appropriate. 40374123bd7SChris Mason */ 404be0e5c09SChris Mason int insert_ptr(struct ctree_root *root, 405be0e5c09SChris Mason struct ctree_path *path, struct key *key, 406be0e5c09SChris Mason u64 blocknr, int level) 407be0e5c09SChris Mason { 408eb60ceacSChris Mason struct tree_buffer *t = path->nodes[level]; 409eb60ceacSChris Mason struct node *c = &path->nodes[level]->node; 410be0e5c09SChris Mason struct node *b; 411eb60ceacSChris Mason struct tree_buffer *b_buffer; 412eb60ceacSChris Mason struct tree_buffer *bal[MAX_LEVEL]; 413be0e5c09SChris Mason int bal_level = level; 414be0e5c09SChris Mason int mid; 415be0e5c09SChris Mason int bal_start = -1; 416be0e5c09SChris Mason 41774123bd7SChris Mason /* 41874123bd7SChris Mason * check to see if we need to make room in the node for this 41974123bd7SChris Mason * pointer. If we do, keep walking the tree, making sure there 42074123bd7SChris Mason * is enough room in each level for the required insertions. 42174123bd7SChris Mason * 42274123bd7SChris Mason * The bal array is filled in with any nodes to be inserted 42374123bd7SChris Mason * due to splitting. Once we've done all the splitting required 42474123bd7SChris Mason * do the inserts based on the data in the bal array. 42574123bd7SChris Mason */ 426d97e63b6SChris Mason memset(bal, 0, sizeof(bal)); 427eb60ceacSChris Mason while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) { 428eb60ceacSChris Mason c = &t->node; 429be0e5c09SChris Mason if (push_node_left(root, path, 430be0e5c09SChris Mason node_level(c->header.flags)) == 0) 431be0e5c09SChris Mason break; 432be0e5c09SChris Mason if (push_node_right(root, path, 433be0e5c09SChris Mason node_level(c->header.flags)) == 0) 434be0e5c09SChris Mason break; 435be0e5c09SChris Mason bal_start = bal_level; 436be0e5c09SChris Mason if (bal_level == MAX_LEVEL - 1) 437be0e5c09SChris Mason BUG(); 438eb60ceacSChris Mason b_buffer = alloc_free_block(root); 439eb60ceacSChris Mason b = &b_buffer->node; 440be0e5c09SChris Mason b->header.flags = c->header.flags; 441eb60ceacSChris Mason b->header.blocknr = b_buffer->blocknr; 442be0e5c09SChris Mason mid = (c->header.nritems + 1) / 2; 443be0e5c09SChris Mason memcpy(b->keys, c->keys + mid, 444be0e5c09SChris Mason (c->header.nritems - mid) * sizeof(struct key)); 445be0e5c09SChris Mason memcpy(b->blockptrs, c->blockptrs + mid, 446be0e5c09SChris Mason (c->header.nritems - mid) * sizeof(u64)); 447be0e5c09SChris Mason b->header.nritems = c->header.nritems - mid; 448be0e5c09SChris Mason c->header.nritems = mid; 449eb60ceacSChris Mason 450eb60ceacSChris Mason write_tree_block(root, t); 451eb60ceacSChris Mason write_tree_block(root, b_buffer); 452eb60ceacSChris Mason 453eb60ceacSChris Mason bal[bal_level] = b_buffer; 454be0e5c09SChris Mason if (bal_level == MAX_LEVEL - 1) 455be0e5c09SChris Mason break; 456be0e5c09SChris Mason bal_level += 1; 457eb60ceacSChris Mason t = path->nodes[bal_level]; 458be0e5c09SChris Mason } 45974123bd7SChris Mason /* 46074123bd7SChris Mason * bal_start tells us the first level in the tree that needed to 46174123bd7SChris Mason * be split. Go through the bal array inserting the new nodes 46274123bd7SChris Mason * as needed. The path is fixed as we go. 46374123bd7SChris Mason */ 464be0e5c09SChris Mason while(bal_start > 0) { 465eb60ceacSChris Mason b_buffer = bal[bal_start]; 466eb60ceacSChris Mason c = &path->nodes[bal_start]->node; 467eb60ceacSChris Mason __insert_ptr(root, path, b_buffer->node.keys, b_buffer->blocknr, 468be0e5c09SChris Mason path->slots[bal_start + 1] + 1, bal_start + 1); 469be0e5c09SChris Mason if (path->slots[bal_start] >= c->header.nritems) { 470be0e5c09SChris Mason path->slots[bal_start] -= c->header.nritems; 471eb60ceacSChris Mason tree_block_release(root, path->nodes[bal_start]); 472eb60ceacSChris Mason path->nodes[bal_start] = b_buffer; 473be0e5c09SChris Mason path->slots[bal_start + 1] += 1; 474eb60ceacSChris Mason } else { 475eb60ceacSChris Mason tree_block_release(root, b_buffer); 476be0e5c09SChris Mason } 477be0e5c09SChris Mason bal_start--; 478be0e5c09SChris Mason if (!bal[bal_start]) 479be0e5c09SChris Mason break; 480be0e5c09SChris Mason } 48174123bd7SChris Mason /* Now that the tree has room, insert the requested pointer */ 482be0e5c09SChris Mason return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1, 483be0e5c09SChris Mason level); 484be0e5c09SChris Mason } 485be0e5c09SChris Mason 48674123bd7SChris Mason /* 48774123bd7SChris Mason * how many bytes are required to store the items in a leaf. start 48874123bd7SChris Mason * and nr indicate which items in the leaf to check. This totals up the 48974123bd7SChris Mason * space used both by the item structs and the item data 49074123bd7SChris Mason */ 491be0e5c09SChris Mason int leaf_space_used(struct leaf *l, int start, int nr) 492be0e5c09SChris Mason { 493be0e5c09SChris Mason int data_len; 494be0e5c09SChris Mason int end = start + nr - 1; 495be0e5c09SChris Mason 496be0e5c09SChris Mason if (!nr) 497be0e5c09SChris Mason return 0; 498be0e5c09SChris Mason data_len = l->items[start].offset + l->items[start].size; 499be0e5c09SChris Mason data_len = data_len - l->items[end].offset; 500be0e5c09SChris Mason data_len += sizeof(struct item) * nr; 501be0e5c09SChris Mason return data_len; 502be0e5c09SChris Mason } 503be0e5c09SChris Mason 50474123bd7SChris Mason /* 50574123bd7SChris Mason * push some data in the path leaf to the left, trying to free up at 50674123bd7SChris Mason * least data_size bytes. returns zero if the push worked, nonzero otherwise 50774123bd7SChris Mason */ 508be0e5c09SChris Mason int push_leaf_left(struct ctree_root *root, struct ctree_path *path, 509be0e5c09SChris Mason int data_size) 510be0e5c09SChris Mason { 511eb60ceacSChris Mason struct tree_buffer *right_buf = path->nodes[0]; 512eb60ceacSChris Mason struct leaf *right = &right_buf->leaf; 513eb60ceacSChris Mason struct tree_buffer *t; 514be0e5c09SChris Mason struct leaf *left; 515be0e5c09SChris Mason int slot; 516be0e5c09SChris Mason int i; 517be0e5c09SChris Mason int free_space; 518be0e5c09SChris Mason int push_space = 0; 519be0e5c09SChris Mason int push_items = 0; 520be0e5c09SChris Mason struct item *item; 521be0e5c09SChris Mason int old_left_nritems; 522be0e5c09SChris Mason 523be0e5c09SChris Mason slot = path->slots[1]; 524be0e5c09SChris Mason if (slot == 0) { 525be0e5c09SChris Mason return 1; 526be0e5c09SChris Mason } 527be0e5c09SChris Mason if (!path->nodes[1]) { 528be0e5c09SChris Mason return 1; 529be0e5c09SChris Mason } 530eb60ceacSChris Mason t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]); 531eb60ceacSChris Mason left = &t->leaf; 532be0e5c09SChris Mason free_space = leaf_free_space(left); 533be0e5c09SChris Mason if (free_space < data_size + sizeof(struct item)) { 534eb60ceacSChris Mason tree_block_release(root, t); 535be0e5c09SChris Mason return 1; 536be0e5c09SChris Mason } 537be0e5c09SChris Mason for (i = 0; i < right->header.nritems; i++) { 538be0e5c09SChris Mason item = right->items + i; 539be0e5c09SChris Mason if (path->slots[0] == i) 540be0e5c09SChris Mason push_space += data_size + sizeof(*item); 541be0e5c09SChris Mason if (item->size + sizeof(*item) + push_space > free_space) 542be0e5c09SChris Mason break; 543be0e5c09SChris Mason push_items++; 544be0e5c09SChris Mason push_space += item->size + sizeof(*item); 545be0e5c09SChris Mason } 546be0e5c09SChris Mason if (push_items == 0) { 547eb60ceacSChris Mason tree_block_release(root, t); 548be0e5c09SChris Mason return 1; 549be0e5c09SChris Mason } 550be0e5c09SChris Mason /* push data from right to left */ 551be0e5c09SChris Mason memcpy(left->items + left->header.nritems, 552be0e5c09SChris Mason right->items, push_items * sizeof(struct item)); 553be0e5c09SChris Mason push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; 554be0e5c09SChris Mason memcpy(left->data + leaf_data_end(left) - push_space, 555be0e5c09SChris Mason right->data + right->items[push_items - 1].offset, 556be0e5c09SChris Mason push_space); 557be0e5c09SChris Mason old_left_nritems = left->header.nritems; 558eb60ceacSChris Mason BUG_ON(old_left_nritems < 0); 559eb60ceacSChris Mason 560be0e5c09SChris Mason for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { 561be0e5c09SChris Mason left->items[i].offset -= LEAF_DATA_SIZE - 562be0e5c09SChris Mason left->items[old_left_nritems -1].offset; 563be0e5c09SChris Mason } 564be0e5c09SChris Mason left->header.nritems += push_items; 565be0e5c09SChris Mason 566be0e5c09SChris Mason /* fixup right node */ 567be0e5c09SChris Mason push_space = right->items[push_items-1].offset - leaf_data_end(right); 568be0e5c09SChris Mason memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + 569be0e5c09SChris Mason leaf_data_end(right), push_space); 570be0e5c09SChris Mason memmove(right->items, right->items + push_items, 571be0e5c09SChris Mason (right->header.nritems - push_items) * sizeof(struct item)); 572be0e5c09SChris Mason right->header.nritems -= push_items; 573be0e5c09SChris Mason push_space = LEAF_DATA_SIZE; 574eb60ceacSChris Mason 575be0e5c09SChris Mason for (i = 0; i < right->header.nritems; i++) { 576be0e5c09SChris Mason right->items[i].offset = push_space - right->items[i].size; 577be0e5c09SChris Mason push_space = right->items[i].offset; 578be0e5c09SChris Mason } 579eb60ceacSChris Mason 580eb60ceacSChris Mason write_tree_block(root, t); 581eb60ceacSChris Mason write_tree_block(root, right_buf); 582eb60ceacSChris Mason 583eb60ceacSChris Mason fixup_low_keys(root, path, &right->items[0].key, 1); 584be0e5c09SChris Mason 585be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 586be0e5c09SChris Mason if (path->slots[0] < push_items) { 587be0e5c09SChris Mason path->slots[0] += old_left_nritems; 588eb60ceacSChris Mason tree_block_release(root, path->nodes[0]); 589eb60ceacSChris Mason path->nodes[0] = t; 590be0e5c09SChris Mason path->slots[1] -= 1; 591be0e5c09SChris Mason } else { 592eb60ceacSChris Mason tree_block_release(root, t); 593be0e5c09SChris Mason path->slots[0] -= push_items; 594be0e5c09SChris Mason } 595eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 596be0e5c09SChris Mason return 0; 597be0e5c09SChris Mason } 598be0e5c09SChris Mason 59974123bd7SChris Mason /* 60074123bd7SChris Mason * split the path's leaf in two, making sure there is at least data_size 60174123bd7SChris Mason * available for the resulting leaf level of the path. 60274123bd7SChris Mason */ 603be0e5c09SChris Mason int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) 604be0e5c09SChris Mason { 605eb60ceacSChris Mason struct tree_buffer *l_buf = path->nodes[0]; 606eb60ceacSChris Mason struct leaf *l = &l_buf->leaf; 607eb60ceacSChris Mason int nritems; 608eb60ceacSChris Mason int mid; 609eb60ceacSChris Mason int slot; 610be0e5c09SChris Mason struct leaf *right; 611eb60ceacSChris Mason struct tree_buffer *right_buffer; 612be0e5c09SChris Mason int space_needed = data_size + sizeof(struct item); 613be0e5c09SChris Mason int data_copy_size; 614be0e5c09SChris Mason int rt_data_off; 615be0e5c09SChris Mason int i; 616be0e5c09SChris Mason int ret; 617be0e5c09SChris Mason 618be0e5c09SChris Mason if (push_leaf_left(root, path, data_size) == 0) { 619eb60ceacSChris Mason l_buf = path->nodes[0]; 620eb60ceacSChris Mason l = &l_buf->leaf; 621eb60ceacSChris Mason if (leaf_free_space(l) >= sizeof(struct item) + data_size) 622be0e5c09SChris Mason return 0; 623be0e5c09SChris Mason } 624eb60ceacSChris Mason slot = path->slots[0]; 625eb60ceacSChris Mason nritems = l->header.nritems; 626eb60ceacSChris Mason mid = (nritems + 1)/ 2; 627eb60ceacSChris Mason 628eb60ceacSChris Mason right_buffer = alloc_free_block(root); 629eb60ceacSChris Mason BUG_ON(!right_buffer); 630eb60ceacSChris Mason BUG_ON(mid == nritems); 631eb60ceacSChris Mason right = &right_buffer->leaf; 632be0e5c09SChris Mason memset(right, 0, sizeof(*right)); 633be0e5c09SChris Mason if (mid <= slot) { 634be0e5c09SChris Mason if (leaf_space_used(l, mid, nritems - mid) + space_needed > 635be0e5c09SChris Mason LEAF_DATA_SIZE) 636be0e5c09SChris Mason BUG(); 637be0e5c09SChris Mason } else { 638be0e5c09SChris Mason if (leaf_space_used(l, 0, mid + 1) + space_needed > 639be0e5c09SChris Mason LEAF_DATA_SIZE) 640be0e5c09SChris Mason BUG(); 641be0e5c09SChris Mason } 642be0e5c09SChris Mason right->header.nritems = nritems - mid; 643eb60ceacSChris Mason right->header.blocknr = right_buffer->blocknr; 644eb60ceacSChris Mason right->header.flags = node_level(0); 645be0e5c09SChris Mason data_copy_size = l->items[mid].offset + l->items[mid].size - 646be0e5c09SChris Mason leaf_data_end(l); 647be0e5c09SChris Mason memcpy(right->items, l->items + mid, 648be0e5c09SChris Mason (nritems - mid) * sizeof(struct item)); 649be0e5c09SChris Mason memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, 650be0e5c09SChris Mason l->data + leaf_data_end(l), data_copy_size); 651be0e5c09SChris Mason rt_data_off = LEAF_DATA_SIZE - 652be0e5c09SChris Mason (l->items[mid].offset + l->items[mid].size); 65374123bd7SChris Mason 65474123bd7SChris Mason for (i = 0; i < right->header.nritems; i++) 655be0e5c09SChris Mason right->items[i].offset += rt_data_off; 65674123bd7SChris Mason 657be0e5c09SChris Mason l->header.nritems = mid; 658be0e5c09SChris Mason ret = insert_ptr(root, path, &right->items[0].key, 659eb60ceacSChris Mason right_buffer->blocknr, 1); 660eb60ceacSChris Mason 661eb60ceacSChris Mason write_tree_block(root, right_buffer); 662eb60ceacSChris Mason write_tree_block(root, l_buf); 663eb60ceacSChris Mason 664eb60ceacSChris Mason BUG_ON(path->slots[0] != slot); 665be0e5c09SChris Mason if (mid <= slot) { 666eb60ceacSChris Mason tree_block_release(root, path->nodes[0]); 667eb60ceacSChris Mason path->nodes[0] = right_buffer; 668be0e5c09SChris Mason path->slots[0] -= mid; 669be0e5c09SChris Mason path->slots[1] += 1; 670eb60ceacSChris Mason } else 671eb60ceacSChris Mason tree_block_release(root, right_buffer); 672eb60ceacSChris Mason BUG_ON(path->slots[0] < 0); 673be0e5c09SChris Mason return ret; 674be0e5c09SChris Mason } 675be0e5c09SChris Mason 67674123bd7SChris Mason /* 67774123bd7SChris Mason * Given a key and some data, insert an item into the tree. 67874123bd7SChris Mason * This does all the path init required, making room in the tree if needed. 67974123bd7SChris Mason */ 680be0e5c09SChris Mason int insert_item(struct ctree_root *root, struct key *key, 681be0e5c09SChris Mason void *data, int data_size) 682be0e5c09SChris Mason { 683be0e5c09SChris Mason int ret; 684be0e5c09SChris Mason int slot; 685eb60ceacSChris Mason int slot_orig; 686be0e5c09SChris Mason struct leaf *leaf; 687eb60ceacSChris Mason struct tree_buffer *leaf_buf; 688be0e5c09SChris Mason unsigned int nritems; 689be0e5c09SChris Mason unsigned int data_end; 690be0e5c09SChris Mason struct ctree_path path; 691be0e5c09SChris Mason 69274123bd7SChris Mason /* create a root if there isn't one */ 693eb60ceacSChris Mason if (!root->node) { 694eb60ceacSChris Mason struct tree_buffer *t; 695eb60ceacSChris Mason t = alloc_free_block(root); 696eb60ceacSChris Mason BUG_ON(!t); 697eb60ceacSChris Mason t->node.header.nritems = 0; 698eb60ceacSChris Mason t->node.header.flags = node_level(0); 699eb60ceacSChris Mason t->node.header.blocknr = t->blocknr; 700eb60ceacSChris Mason root->node = t; 701eb60ceacSChris Mason write_tree_block(root, t); 702eb60ceacSChris Mason } 703be0e5c09SChris Mason init_path(&path); 704be0e5c09SChris Mason ret = search_slot(root, key, &path); 705eb60ceacSChris Mason if (ret == 0) { 706eb60ceacSChris Mason release_path(root, &path); 707be0e5c09SChris Mason return -EEXIST; 708eb60ceacSChris Mason } 709be0e5c09SChris Mason 710eb60ceacSChris Mason slot_orig = path.slots[0]; 711eb60ceacSChris Mason leaf_buf = path.nodes[0]; 712eb60ceacSChris Mason leaf = &leaf_buf->leaf; 71374123bd7SChris Mason 71474123bd7SChris Mason /* make room if needed */ 715eb60ceacSChris Mason if (leaf_free_space(leaf) < sizeof(struct item) + data_size) { 716be0e5c09SChris Mason split_leaf(root, &path, data_size); 717eb60ceacSChris Mason leaf_buf = path.nodes[0]; 718eb60ceacSChris Mason leaf = &path.nodes[0]->leaf; 719eb60ceacSChris Mason } 720be0e5c09SChris Mason nritems = leaf->header.nritems; 721be0e5c09SChris Mason data_end = leaf_data_end(leaf); 722eb60ceacSChris Mason 723be0e5c09SChris Mason if (leaf_free_space(leaf) < sizeof(struct item) + data_size) 724be0e5c09SChris Mason BUG(); 725be0e5c09SChris Mason 726be0e5c09SChris Mason slot = path.slots[0]; 727eb60ceacSChris Mason BUG_ON(slot < 0); 728be0e5c09SChris Mason if (slot == 0) 729eb60ceacSChris Mason fixup_low_keys(root, &path, key, 1); 730be0e5c09SChris Mason if (slot != nritems) { 731be0e5c09SChris Mason int i; 732be0e5c09SChris Mason unsigned int old_data = leaf->items[slot].offset + 733be0e5c09SChris Mason leaf->items[slot].size; 734be0e5c09SChris Mason 735be0e5c09SChris Mason /* 736be0e5c09SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 737be0e5c09SChris Mason */ 738be0e5c09SChris Mason /* first correct the data pointers */ 739be0e5c09SChris Mason for (i = slot; i < nritems; i++) 740be0e5c09SChris Mason leaf->items[i].offset -= data_size; 741be0e5c09SChris Mason 742be0e5c09SChris Mason /* shift the items */ 743be0e5c09SChris Mason memmove(leaf->items + slot + 1, leaf->items + slot, 744be0e5c09SChris Mason (nritems - slot) * sizeof(struct item)); 745be0e5c09SChris Mason 746be0e5c09SChris Mason /* shift the data */ 747be0e5c09SChris Mason memmove(leaf->data + data_end - data_size, leaf->data + 748be0e5c09SChris Mason data_end, old_data - data_end); 749be0e5c09SChris Mason data_end = old_data; 750be0e5c09SChris Mason } 75174123bd7SChris Mason /* copy the new data in */ 752be0e5c09SChris Mason memcpy(&leaf->items[slot].key, key, sizeof(struct key)); 753be0e5c09SChris Mason leaf->items[slot].offset = data_end - data_size; 754be0e5c09SChris Mason leaf->items[slot].size = data_size; 755be0e5c09SChris Mason memcpy(leaf->data + data_end - data_size, data, data_size); 756be0e5c09SChris Mason leaf->header.nritems += 1; 757eb60ceacSChris Mason write_tree_block(root, leaf_buf); 758be0e5c09SChris Mason if (leaf_free_space(leaf) < 0) 759be0e5c09SChris Mason BUG(); 760eb60ceacSChris Mason release_path(root, &path); 761d97e63b6SChris Mason refill_alloc_extent(root); 762be0e5c09SChris Mason return 0; 763be0e5c09SChris Mason } 764be0e5c09SChris Mason 76574123bd7SChris Mason /* 76674123bd7SChris Mason * delete the pointer from a given level in the path. The path is not 76774123bd7SChris Mason * fixed up, so after calling this it is not valid at that level. 76874123bd7SChris Mason * 76974123bd7SChris Mason * If the delete empties a node, the node is removed from the tree, 77074123bd7SChris Mason * continuing all the way the root if required. The root is converted into 77174123bd7SChris Mason * a leaf if all the nodes are emptied. 77274123bd7SChris Mason */ 773be0e5c09SChris Mason int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) 774be0e5c09SChris Mason { 775be0e5c09SChris Mason int slot; 776eb60ceacSChris Mason struct tree_buffer *t; 777be0e5c09SChris Mason struct node *node; 778be0e5c09SChris Mason int nritems; 779be0e5c09SChris Mason 780be0e5c09SChris Mason while(1) { 781eb60ceacSChris Mason t = path->nodes[level]; 782eb60ceacSChris Mason if (!t) 783be0e5c09SChris Mason break; 784eb60ceacSChris Mason node = &t->node; 785be0e5c09SChris Mason slot = path->slots[level]; 786be0e5c09SChris Mason nritems = node->header.nritems; 787be0e5c09SChris Mason 788be0e5c09SChris Mason if (slot != nritems -1) { 789be0e5c09SChris Mason memmove(node->keys + slot, node->keys + slot + 1, 790be0e5c09SChris Mason sizeof(struct key) * (nritems - slot - 1)); 791be0e5c09SChris Mason memmove(node->blockptrs + slot, 792be0e5c09SChris Mason node->blockptrs + slot + 1, 793be0e5c09SChris Mason sizeof(u64) * (nritems - slot - 1)); 794be0e5c09SChris Mason } 795be0e5c09SChris Mason node->header.nritems--; 796eb60ceacSChris Mason write_tree_block(root, t); 797be0e5c09SChris Mason if (node->header.nritems != 0) { 798be0e5c09SChris Mason int tslot; 799be0e5c09SChris Mason if (slot == 0) 800eb60ceacSChris Mason fixup_low_keys(root, path, node->keys, 801eb60ceacSChris Mason level + 1); 802be0e5c09SChris Mason tslot = path->slots[level+1]; 803eb60ceacSChris Mason t->count++; 804be0e5c09SChris Mason push_node_left(root, path, level); 805be0e5c09SChris Mason if (node->header.nritems) { 806be0e5c09SChris Mason push_node_right(root, path, level); 807be0e5c09SChris Mason } 808eb60ceacSChris Mason if (node->header.nritems) { 809eb60ceacSChris Mason tree_block_release(root, t); 810be0e5c09SChris Mason break; 811eb60ceacSChris Mason } 812eb60ceacSChris Mason tree_block_release(root, t); 8134920c9acSChris Mason path->slots[level+1] = tslot; 814be0e5c09SChris Mason } 815eb60ceacSChris Mason if (t == root->node) { 816eb60ceacSChris Mason /* just turn the root into a leaf and break */ 817eb60ceacSChris Mason root->node->node.header.flags = node_level(0); 818eb60ceacSChris Mason write_tree_block(root, t); 819be0e5c09SChris Mason break; 820be0e5c09SChris Mason } 821be0e5c09SChris Mason level++; 822be0e5c09SChris Mason if (!path->nodes[level]) 823be0e5c09SChris Mason BUG(); 824be0e5c09SChris Mason } 825be0e5c09SChris Mason return 0; 826be0e5c09SChris Mason } 827be0e5c09SChris Mason 82874123bd7SChris Mason /* 82974123bd7SChris Mason * delete the item at the leaf level in path. If that empties 83074123bd7SChris Mason * the leaf, remove it from the tree 83174123bd7SChris Mason */ 8324920c9acSChris Mason int del_item(struct ctree_root *root, struct ctree_path *path) 833be0e5c09SChris Mason { 834be0e5c09SChris Mason int slot; 835be0e5c09SChris Mason struct leaf *leaf; 836eb60ceacSChris Mason struct tree_buffer *leaf_buf; 837be0e5c09SChris Mason int doff; 838be0e5c09SChris Mason int dsize; 839be0e5c09SChris Mason 840eb60ceacSChris Mason leaf_buf = path->nodes[0]; 841eb60ceacSChris Mason leaf = &leaf_buf->leaf; 8424920c9acSChris Mason slot = path->slots[0]; 843be0e5c09SChris Mason doff = leaf->items[slot].offset; 844be0e5c09SChris Mason dsize = leaf->items[slot].size; 845be0e5c09SChris Mason 846be0e5c09SChris Mason if (slot != leaf->header.nritems - 1) { 847be0e5c09SChris Mason int i; 848be0e5c09SChris Mason int data_end = leaf_data_end(leaf); 849be0e5c09SChris Mason memmove(leaf->data + data_end + dsize, 850be0e5c09SChris Mason leaf->data + data_end, 851be0e5c09SChris Mason doff - data_end); 852be0e5c09SChris Mason for (i = slot + 1; i < leaf->header.nritems; i++) 853be0e5c09SChris Mason leaf->items[i].offset += dsize; 854be0e5c09SChris Mason memmove(leaf->items + slot, leaf->items + slot + 1, 855be0e5c09SChris Mason sizeof(struct item) * 856be0e5c09SChris Mason (leaf->header.nritems - slot - 1)); 857be0e5c09SChris Mason } 858be0e5c09SChris Mason leaf->header.nritems -= 1; 85974123bd7SChris Mason /* delete the leaf if we've emptied it */ 860be0e5c09SChris Mason if (leaf->header.nritems == 0) { 861eb60ceacSChris Mason if (leaf_buf == root->node) { 862eb60ceacSChris Mason leaf->header.flags = node_level(0); 863eb60ceacSChris Mason write_tree_block(root, leaf_buf); 864eb60ceacSChris Mason } else 8654920c9acSChris Mason del_ptr(root, path, 1); 866be0e5c09SChris Mason } else { 867be0e5c09SChris Mason if (slot == 0) 868eb60ceacSChris Mason fixup_low_keys(root, path, &leaf->items[0].key, 1); 869eb60ceacSChris Mason write_tree_block(root, leaf_buf); 87074123bd7SChris Mason /* delete the leaf if it is mostly empty */ 871be0e5c09SChris Mason if (leaf_space_used(leaf, 0, leaf->header.nritems) < 872be0e5c09SChris Mason LEAF_DATA_SIZE / 4) { 873be0e5c09SChris Mason /* push_leaf_left fixes the path. 874be0e5c09SChris Mason * make sure the path still points to our leaf 875be0e5c09SChris Mason * for possible call to del_ptr below 876be0e5c09SChris Mason */ 8774920c9acSChris Mason slot = path->slots[1]; 878eb60ceacSChris Mason leaf_buf->count++; 8794920c9acSChris Mason push_leaf_left(root, path, 1); 880be0e5c09SChris Mason if (leaf->header.nritems == 0) { 8814920c9acSChris Mason path->slots[1] = slot; 8824920c9acSChris Mason del_ptr(root, path, 1); 883be0e5c09SChris Mason } 884eb60ceacSChris Mason tree_block_release(root, leaf_buf); 885be0e5c09SChris Mason } 886be0e5c09SChris Mason } 887be0e5c09SChris Mason return 0; 888be0e5c09SChris Mason } 889be0e5c09SChris Mason 890d97e63b6SChris Mason int next_leaf(struct ctree_root *root, struct ctree_path *path) 891d97e63b6SChris Mason { 892d97e63b6SChris Mason int slot; 893d97e63b6SChris Mason int level = 1; 894d97e63b6SChris Mason u64 blocknr; 895d97e63b6SChris Mason struct tree_buffer *c; 896d97e63b6SChris Mason struct tree_buffer *next; 897d97e63b6SChris Mason 898d97e63b6SChris Mason while(level < MAX_LEVEL) { 899d97e63b6SChris Mason if (!path->nodes[level]) 900d97e63b6SChris Mason return -1; 901d97e63b6SChris Mason slot = path->slots[level] + 1; 902d97e63b6SChris Mason c = path->nodes[level]; 903d97e63b6SChris Mason if (slot >= c->node.header.nritems) { 904d97e63b6SChris Mason level++; 905d97e63b6SChris Mason continue; 906d97e63b6SChris Mason } 907d97e63b6SChris Mason blocknr = c->node.blockptrs[slot]; 908d97e63b6SChris Mason next = read_tree_block(root, blocknr); 909d97e63b6SChris Mason break; 910d97e63b6SChris Mason } 911d97e63b6SChris Mason path->slots[level] = slot; 912d97e63b6SChris Mason while(1) { 913d97e63b6SChris Mason level--; 914d97e63b6SChris Mason c = path->nodes[level]; 915d97e63b6SChris Mason tree_block_release(root, c); 916d97e63b6SChris Mason path->nodes[level] = next; 917d97e63b6SChris Mason path->slots[level] = 0; 918d97e63b6SChris Mason if (!level) 919d97e63b6SChris Mason break; 920d97e63b6SChris Mason next = read_tree_block(root, next->node.blockptrs[0]); 921d97e63b6SChris Mason } 922d97e63b6SChris Mason return 0; 923d97e63b6SChris Mason } 924d97e63b6SChris Mason 925d97e63b6SChris Mason int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start, 926d97e63b6SChris Mason u64 search_end, u64 owner, struct key *ins) 927d97e63b6SChris Mason { 928d97e63b6SChris Mason struct ctree_path path; 929d97e63b6SChris Mason struct key *key; 930d97e63b6SChris Mason int ret; 931d97e63b6SChris Mason u64 hole_size = 0; 932d97e63b6SChris Mason int slot = 0; 933d97e63b6SChris Mason u64 last_block; 934d97e63b6SChris Mason int start_found = 0; 935d97e63b6SChris Mason struct leaf *l; 936d97e63b6SChris Mason struct extent_item extent_item; 937d97e63b6SChris Mason 938d97e63b6SChris Mason init_path(&path); 939d97e63b6SChris Mason ins->objectid = search_start; 940d97e63b6SChris Mason ins->offset = 0; 941d97e63b6SChris Mason ins->flags = 0; 942d97e63b6SChris Mason 943d97e63b6SChris Mason ret = search_slot(root, ins, &path); 944d97e63b6SChris Mason while (1) { 945d97e63b6SChris Mason l = &path.nodes[0]->leaf; 946d97e63b6SChris Mason slot = path.slots[0]; 947d97e63b6SChris Mason if (!l) { 948d97e63b6SChris Mason // FIXME allocate root 949d97e63b6SChris Mason } 950d97e63b6SChris Mason if (slot >= l->header.nritems) { 951d97e63b6SChris Mason ret = next_leaf(root, &path); 952d97e63b6SChris Mason if (ret == 0) 953d97e63b6SChris Mason continue; 954d97e63b6SChris Mason if (!start_found) { 955d97e63b6SChris Mason ins->objectid = search_start; 956d97e63b6SChris Mason ins->offset = num_blocks; 957d97e63b6SChris Mason hole_size = search_end - search_start; 958d97e63b6SChris Mason goto insert; 959d97e63b6SChris Mason } 960d97e63b6SChris Mason ins->objectid = last_block; 961d97e63b6SChris Mason ins->offset = num_blocks; 962d97e63b6SChris Mason hole_size = search_end - last_block; 963d97e63b6SChris Mason goto insert; 964d97e63b6SChris Mason } 965d97e63b6SChris Mason key = &l->items[slot].key; 966d97e63b6SChris Mason if (start_found) { 967d97e63b6SChris Mason hole_size = key->objectid - last_block; 968d97e63b6SChris Mason if (hole_size > num_blocks) { 969d97e63b6SChris Mason ins->objectid = last_block; 970d97e63b6SChris Mason ins->offset = num_blocks; 971d97e63b6SChris Mason goto insert; 972d97e63b6SChris Mason } 973d97e63b6SChris Mason } else 974d97e63b6SChris Mason start_found = 1; 975d97e63b6SChris Mason last_block = key->objectid + key->offset; 976d97e63b6SChris Mason path.slots[0]++; 977d97e63b6SChris Mason printf("last block is not %lu\n", last_block); 978d97e63b6SChris Mason } 979d97e63b6SChris Mason // FIXME -ENOSPC 980d97e63b6SChris Mason insert: 981d97e63b6SChris Mason extent_item.refs = 1; 982d97e63b6SChris Mason extent_item.owner = owner; 983d97e63b6SChris Mason ret = insert_item(root, ins, &extent_item, sizeof(extent_item)); 984d97e63b6SChris Mason return ret; 985d97e63b6SChris Mason } 986d97e63b6SChris Mason 987d97e63b6SChris Mason static int refill_alloc_extent(struct ctree_root *root) 988d97e63b6SChris Mason { 989d97e63b6SChris Mason struct alloc_extent *ae = root->alloc_extent; 990d97e63b6SChris Mason struct key key; 991d97e63b6SChris Mason int ret; 992d97e63b6SChris Mason int min_blocks = MAX_LEVEL * 2; 993d97e63b6SChris Mason 994d97e63b6SChris Mason printf("refill alloc root %p, numused %lu total %lu\n", root, ae->num_used, ae->num_blocks); 995d97e63b6SChris Mason if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used > 996d97e63b6SChris Mason min_blocks) 997d97e63b6SChris Mason return 0; 998d97e63b6SChris Mason ae = root->reserve_extent; 999d97e63b6SChris Mason if (ae->num_blocks > ae->num_used) { 1000d97e63b6SChris Mason if (root->alloc_extent->num_blocks == 0) { 1001d97e63b6SChris Mason /* we should swap reserve/alloc_extent when alloc 1002d97e63b6SChris Mason * fills up 1003d97e63b6SChris Mason */ 1004d97e63b6SChris Mason BUG(); 1005d97e63b6SChris Mason } 1006d97e63b6SChris Mason if (ae->num_blocks - ae->num_used < min_blocks) 1007d97e63b6SChris Mason BUG(); 1008d97e63b6SChris Mason return 0; 1009d97e63b6SChris Mason } 1010d97e63b6SChris Mason // FIXME, this recurses 1011d97e63b6SChris Mason ret = alloc_extent(root->extent_root, 1012d97e63b6SChris Mason min_blocks * 2, 0, (unsigned long)-1, 0, &key); 1013d97e63b6SChris Mason ae->blocknr = key.objectid; 1014d97e63b6SChris Mason ae->num_blocks = key.offset; 1015d97e63b6SChris Mason ae->num_used = 0; 1016d97e63b6SChris Mason return ret; 1017d97e63b6SChris Mason } 1018d97e63b6SChris Mason 1019be0e5c09SChris Mason void print_leaf(struct leaf *l) 1020be0e5c09SChris Mason { 1021be0e5c09SChris Mason int i; 1022be0e5c09SChris Mason int nr = l->header.nritems; 1023be0e5c09SChris Mason struct item *item; 1024eb60ceacSChris Mason printf("leaf %lu total ptrs %d free space %d\n", l->header.blocknr, nr, 1025be0e5c09SChris Mason leaf_free_space(l)); 1026be0e5c09SChris Mason fflush(stdout); 1027be0e5c09SChris Mason for (i = 0 ; i < nr ; i++) { 1028be0e5c09SChris Mason item = l->items + i; 1029be0e5c09SChris Mason printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", 1030be0e5c09SChris Mason i, 1031be0e5c09SChris Mason item->key.objectid, item->key.flags, item->key.offset, 1032be0e5c09SChris Mason item->offset, item->size); 1033be0e5c09SChris Mason fflush(stdout); 1034be0e5c09SChris Mason printf("\t\titem data %.*s\n", item->size, l->data+item->offset); 1035be0e5c09SChris Mason fflush(stdout); 1036be0e5c09SChris Mason } 1037be0e5c09SChris Mason } 1038eb60ceacSChris Mason void print_tree(struct ctree_root *root, struct tree_buffer *t) 1039be0e5c09SChris Mason { 1040be0e5c09SChris Mason int i; 1041be0e5c09SChris Mason int nr; 1042eb60ceacSChris Mason struct node *c; 1043be0e5c09SChris Mason 1044eb60ceacSChris Mason if (!t) 1045be0e5c09SChris Mason return; 1046eb60ceacSChris Mason c = &t->node; 1047be0e5c09SChris Mason nr = c->header.nritems; 1048eb60ceacSChris Mason if (c->header.blocknr != t->blocknr) 1049eb60ceacSChris Mason BUG(); 1050be0e5c09SChris Mason if (is_leaf(c->header.flags)) { 1051be0e5c09SChris Mason print_leaf((struct leaf *)c); 1052be0e5c09SChris Mason return; 1053be0e5c09SChris Mason } 1054eb60ceacSChris Mason printf("node %lu level %d total ptrs %d free spc %lu\n", t->blocknr, 1055be0e5c09SChris Mason node_level(c->header.flags), c->header.nritems, 1056be0e5c09SChris Mason NODEPTRS_PER_BLOCK - c->header.nritems); 1057be0e5c09SChris Mason fflush(stdout); 1058be0e5c09SChris Mason for (i = 0; i < nr; i++) { 1059eb60ceacSChris Mason printf("\tkey %d (%lu %u %lu) block %lu\n", 1060be0e5c09SChris Mason i, 1061be0e5c09SChris Mason c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, 1062be0e5c09SChris Mason c->blockptrs[i]); 1063be0e5c09SChris Mason fflush(stdout); 1064be0e5c09SChris Mason } 1065be0e5c09SChris Mason for (i = 0; i < nr; i++) { 1066eb60ceacSChris Mason struct tree_buffer *next_buf = read_tree_block(root, 1067eb60ceacSChris Mason c->blockptrs[i]); 1068eb60ceacSChris Mason struct node *next = &next_buf->node; 1069be0e5c09SChris Mason if (is_leaf(next->header.flags) && 1070be0e5c09SChris Mason node_level(c->header.flags) != 1) 1071be0e5c09SChris Mason BUG(); 1072be0e5c09SChris Mason if (node_level(next->header.flags) != 1073be0e5c09SChris Mason node_level(c->header.flags) - 1) 1074be0e5c09SChris Mason BUG(); 1075eb60ceacSChris Mason print_tree(root, next_buf); 1076eb60ceacSChris Mason tree_block_release(root, next_buf); 1077be0e5c09SChris Mason } 1078be0e5c09SChris Mason 1079be0e5c09SChris Mason } 1080be0e5c09SChris Mason 1081be0e5c09SChris Mason /* for testing only */ 1082be0e5c09SChris Mason int next_key(int i, int max_key) { 1083d97e63b6SChris Mason // return rand() % max_key; 1084d97e63b6SChris Mason return i; 1085be0e5c09SChris Mason } 1086be0e5c09SChris Mason 1087be0e5c09SChris Mason int main() { 1088eb60ceacSChris Mason struct ctree_root *root; 1089be0e5c09SChris Mason struct key ins; 10904920c9acSChris Mason struct key last = { (u64)-1, 0, 0}; 1091be0e5c09SChris Mason char *buf; 1092be0e5c09SChris Mason int i; 1093be0e5c09SChris Mason int num; 1094be0e5c09SChris Mason int ret; 1095d97e63b6SChris Mason int run_size = 256; 1096be0e5c09SChris Mason int max_key = 100000000; 1097be0e5c09SChris Mason int tree_size = 0; 1098be0e5c09SChris Mason struct ctree_path path; 1099be0e5c09SChris Mason 1100eb60ceacSChris Mason radix_tree_init(); 1101eb60ceacSChris Mason 1102eb60ceacSChris Mason 1103eb60ceacSChris Mason root = open_ctree("dbfile"); 1104be0e5c09SChris Mason 1105be0e5c09SChris Mason srand(55); 1106be0e5c09SChris Mason for (i = 0; i < run_size; i++) { 1107be0e5c09SChris Mason buf = malloc(64); 1108be0e5c09SChris Mason num = next_key(i, max_key); 1109be0e5c09SChris Mason // num = i; 1110be0e5c09SChris Mason sprintf(buf, "string-%d", num); 1111be0e5c09SChris Mason // printf("insert %d\n", num); 1112be0e5c09SChris Mason ins.objectid = num; 1113be0e5c09SChris Mason ins.offset = 0; 1114be0e5c09SChris Mason ins.flags = 0; 1115d97e63b6SChris Mason printf("insert %d\n", i); 1116eb60ceacSChris Mason ret = insert_item(root, &ins, buf, strlen(buf)); 1117be0e5c09SChris Mason if (!ret) 1118be0e5c09SChris Mason tree_size++; 1119d97e63b6SChris Mason printf("done insert %d\n", i); 1120be0e5c09SChris Mason } 1121d97e63b6SChris Mason printf("root used: %lu\n", root->alloc_extent->num_used); 1122d97e63b6SChris Mason printf("root tree\n"); 1123d97e63b6SChris Mason print_tree(root, root->node); 1124d97e63b6SChris Mason printf("map tree\n"); 1125d97e63b6SChris Mason printf("map used: %lu\n", root->extent_root->alloc_extent->num_used); 1126d97e63b6SChris Mason print_tree(root->extent_root, root->extent_root->node); 1127d97e63b6SChris Mason exit(1); 1128d97e63b6SChris Mason 1129eb60ceacSChris Mason close_ctree(root); 1130eb60ceacSChris Mason root = open_ctree("dbfile"); 1131eb60ceacSChris Mason printf("starting search\n"); 1132be0e5c09SChris Mason srand(55); 1133be0e5c09SChris Mason for (i = 0; i < run_size; i++) { 1134be0e5c09SChris Mason num = next_key(i, max_key); 1135be0e5c09SChris Mason ins.objectid = num; 1136be0e5c09SChris Mason init_path(&path); 1137eb60ceacSChris Mason ret = search_slot(root, &ins, &path); 1138be0e5c09SChris Mason if (ret) { 1139eb60ceacSChris Mason print_tree(root, root->node); 1140be0e5c09SChris Mason printf("unable to find %d\n", num); 1141be0e5c09SChris Mason exit(1); 1142be0e5c09SChris Mason } 1143eb60ceacSChris Mason release_path(root, &path); 1144be0e5c09SChris Mason } 1145eb60ceacSChris Mason close_ctree(root); 1146eb60ceacSChris Mason root = open_ctree("dbfile"); 1147eb60ceacSChris Mason printf("node %p level %d total ptrs %d free spc %lu\n", root->node, 1148eb60ceacSChris Mason node_level(root->node->node.header.flags), 1149eb60ceacSChris Mason root->node->node.header.nritems, 1150eb60ceacSChris Mason NODEPTRS_PER_BLOCK - root->node->node.header.nritems); 1151eb60ceacSChris Mason printf("all searches good, deleting some items\n"); 1152be0e5c09SChris Mason i = 0; 1153be0e5c09SChris Mason srand(55); 11544920c9acSChris Mason for (i = 0 ; i < run_size/4; i++) { 1155be0e5c09SChris Mason num = next_key(i, max_key); 1156be0e5c09SChris Mason ins.objectid = num; 11574920c9acSChris Mason init_path(&path); 1158eb60ceacSChris Mason ret = search_slot(root, &ins, &path); 11594920c9acSChris Mason if (ret) 11604920c9acSChris Mason continue; 1161eb60ceacSChris Mason ret = del_item(root, &path); 11624920c9acSChris Mason if (ret != 0) 11634920c9acSChris Mason BUG(); 1164eb60ceacSChris Mason release_path(root, &path); 11654920c9acSChris Mason tree_size--; 11664920c9acSChris Mason } 11674920c9acSChris Mason srand(128); 11684920c9acSChris Mason for (i = 0; i < run_size; i++) { 11694920c9acSChris Mason buf = malloc(64); 11704920c9acSChris Mason num = next_key(i, max_key); 11714920c9acSChris Mason sprintf(buf, "string-%d", num); 11724920c9acSChris Mason ins.objectid = num; 1173eb60ceacSChris Mason ret = insert_item(root, &ins, buf, strlen(buf)); 11744920c9acSChris Mason if (!ret) 11754920c9acSChris Mason tree_size++; 11764920c9acSChris Mason } 1177eb60ceacSChris Mason close_ctree(root); 1178eb60ceacSChris Mason root = open_ctree("dbfile"); 1179eb60ceacSChris Mason printf("starting search2\n"); 1180eb60ceacSChris Mason srand(128); 1181eb60ceacSChris Mason for (i = 0; i < run_size; i++) { 1182eb60ceacSChris Mason num = next_key(i, max_key); 1183eb60ceacSChris Mason ins.objectid = num; 1184eb60ceacSChris Mason init_path(&path); 1185eb60ceacSChris Mason ret = search_slot(root, &ins, &path); 1186eb60ceacSChris Mason if (ret) { 1187eb60ceacSChris Mason print_tree(root, root->node); 1188eb60ceacSChris Mason printf("unable to find %d\n", num); 1189eb60ceacSChris Mason exit(1); 1190eb60ceacSChris Mason } 1191eb60ceacSChris Mason release_path(root, &path); 1192eb60ceacSChris Mason } 1193eb60ceacSChris Mason printf("starting big long delete run\n"); 1194eb60ceacSChris Mason while(root->node && root->node->node.header.nritems > 0) { 11954920c9acSChris Mason struct leaf *leaf; 11964920c9acSChris Mason int slot; 11974920c9acSChris Mason ins.objectid = (u64)-1; 11984920c9acSChris Mason init_path(&path); 1199eb60ceacSChris Mason ret = search_slot(root, &ins, &path); 12004920c9acSChris Mason if (ret == 0) 12014920c9acSChris Mason BUG(); 12024920c9acSChris Mason 1203eb60ceacSChris Mason leaf = &path.nodes[0]->leaf; 12044920c9acSChris Mason slot = path.slots[0]; 12054920c9acSChris Mason if (slot != leaf->header.nritems) 12064920c9acSChris Mason BUG(); 12074920c9acSChris Mason while(path.slots[0] > 0) { 12084920c9acSChris Mason path.slots[0] -= 1; 12094920c9acSChris Mason slot = path.slots[0]; 1210eb60ceacSChris Mason leaf = &path.nodes[0]->leaf; 12114920c9acSChris Mason 12124920c9acSChris Mason if (comp_keys(&last, &leaf->items[slot].key) <= 0) 12134920c9acSChris Mason BUG(); 12144920c9acSChris Mason memcpy(&last, &leaf->items[slot].key, sizeof(last)); 1215eb60ceacSChris Mason ret = del_item(root, &path); 1216eb60ceacSChris Mason if (ret != 0) { 1217eb60ceacSChris Mason printf("del_item returned %d\n", ret); 12184920c9acSChris Mason BUG(); 1219eb60ceacSChris Mason } 12204920c9acSChris Mason tree_size--; 12214920c9acSChris Mason } 1222eb60ceacSChris Mason release_path(root, &path); 1223be0e5c09SChris Mason } 1224eb60ceacSChris Mason close_ctree(root); 12254920c9acSChris Mason printf("tree size is now %d\n", tree_size); 1226be0e5c09SChris Mason return 0; 1227be0e5c09SChris Mason } 1228