1be0e5c09SChris Mason #include <stdio.h> 2be0e5c09SChris Mason #include <stdlib.h> 3be0e5c09SChris Mason #include "kerncompat.h" 4be0e5c09SChris Mason 5be0e5c09SChris Mason #define BLOCKSIZE 4096 6be0e5c09SChris Mason 7be0e5c09SChris Mason struct key { 8be0e5c09SChris Mason u64 objectid; 9be0e5c09SChris Mason u32 flags; 10be0e5c09SChris Mason u64 offset; 11be0e5c09SChris Mason } __attribute__ ((__packed__)); 12be0e5c09SChris Mason 13be0e5c09SChris Mason struct header { 14be0e5c09SChris Mason u64 fsid[2]; /* FS specific uuid */ 15be0e5c09SChris Mason u64 blocknum; 16be0e5c09SChris Mason u64 parentid; 17be0e5c09SChris Mason u32 csum; 18be0e5c09SChris Mason u32 ham; 19be0e5c09SChris Mason u16 nritems; 20be0e5c09SChris Mason u16 flags; 21be0e5c09SChris Mason } __attribute__ ((__packed__)); 22be0e5c09SChris Mason 23be0e5c09SChris Mason #define NODEPTRS_PER_BLOCK ((BLOCKSIZE - sizeof(struct header)) / \ 24be0e5c09SChris Mason (sizeof(struct key) + sizeof(u64))) 25be0e5c09SChris Mason 26be0e5c09SChris Mason #define LEVEL_BITS 3 27be0e5c09SChris Mason #define MAX_LEVEL (1 << LEVEL_BITS) 28be0e5c09SChris Mason #define node_level(f) ((f) & (MAX_LEVEL-1)) 29be0e5c09SChris Mason #define is_leaf(f) (node_level(f) == 0) 30be0e5c09SChris Mason 31be0e5c09SChris Mason struct ctree_root { 32be0e5c09SChris Mason struct node *node; 33be0e5c09SChris Mason }; 34be0e5c09SChris Mason 35be0e5c09SChris Mason struct item { 36be0e5c09SChris Mason struct key key; 37be0e5c09SChris Mason u16 offset; 38be0e5c09SChris Mason u16 size; 39be0e5c09SChris Mason } __attribute__ ((__packed__)); 40be0e5c09SChris Mason 41be0e5c09SChris Mason #define LEAF_DATA_SIZE (BLOCKSIZE - sizeof(struct header)) 42be0e5c09SChris Mason struct leaf { 43be0e5c09SChris Mason struct header header; 44be0e5c09SChris Mason union { 45be0e5c09SChris Mason struct item items[LEAF_DATA_SIZE/sizeof(struct item)]; 46be0e5c09SChris Mason u8 data[BLOCKSIZE-sizeof(struct header)]; 47be0e5c09SChris Mason }; 48be0e5c09SChris Mason } __attribute__ ((__packed__)); 49be0e5c09SChris Mason 50be0e5c09SChris Mason struct node { 51be0e5c09SChris Mason struct header header; 52be0e5c09SChris Mason struct key keys[NODEPTRS_PER_BLOCK]; 53be0e5c09SChris Mason u64 blockptrs[NODEPTRS_PER_BLOCK]; 54be0e5c09SChris Mason } __attribute__ ((__packed__)); 55be0e5c09SChris Mason 56be0e5c09SChris Mason struct ctree_path { 57be0e5c09SChris Mason struct node *nodes[MAX_LEVEL]; 58be0e5c09SChris Mason int slots[MAX_LEVEL]; 59be0e5c09SChris Mason }; 60be0e5c09SChris Mason 61be0e5c09SChris Mason static inline void init_path(struct ctree_path *p) 62be0e5c09SChris Mason { 63be0e5c09SChris Mason memset(p, 0, sizeof(*p)); 64be0e5c09SChris Mason } 65be0e5c09SChris Mason 66be0e5c09SChris Mason static inline unsigned int leaf_data_end(struct leaf *leaf) 67be0e5c09SChris Mason { 68be0e5c09SChris Mason unsigned int nr = leaf->header.nritems; 69be0e5c09SChris Mason if (nr == 0) 70be0e5c09SChris Mason return ARRAY_SIZE(leaf->data); 71be0e5c09SChris Mason return leaf->items[nr-1].offset; 72be0e5c09SChris Mason } 73be0e5c09SChris Mason 74be0e5c09SChris Mason static inline int leaf_free_space(struct leaf *leaf) 75be0e5c09SChris Mason { 76be0e5c09SChris Mason int data_end = leaf_data_end(leaf); 77be0e5c09SChris Mason int nritems = leaf->header.nritems; 78be0e5c09SChris Mason char *items_end = (char *)(leaf->items + nritems + 1); 79be0e5c09SChris Mason return (char *)(leaf->data + data_end) - (char *)items_end; 80be0e5c09SChris Mason } 81be0e5c09SChris Mason 82be0e5c09SChris Mason int comp_keys(struct key *k1, struct key *k2) 83be0e5c09SChris Mason { 84be0e5c09SChris Mason if (k1->objectid > k2->objectid) 85be0e5c09SChris Mason return 1; 86be0e5c09SChris Mason if (k1->objectid < k2->objectid) 87be0e5c09SChris Mason return -1; 88be0e5c09SChris Mason if (k1->flags > k2->flags) 89be0e5c09SChris Mason return 1; 90be0e5c09SChris Mason if (k1->flags < k2->flags) 91be0e5c09SChris Mason return -1; 92be0e5c09SChris Mason if (k1->offset > k2->offset) 93be0e5c09SChris Mason return 1; 94be0e5c09SChris Mason if (k1->offset < k2->offset) 95be0e5c09SChris Mason return -1; 96be0e5c09SChris Mason return 0; 97be0e5c09SChris Mason } 98be0e5c09SChris Mason int generic_bin_search(char *p, int item_size, struct key *key, 99be0e5c09SChris Mason int max, int *slot) 100be0e5c09SChris Mason { 101be0e5c09SChris Mason int low = 0; 102be0e5c09SChris Mason int high = max; 103be0e5c09SChris Mason int mid; 104be0e5c09SChris Mason int ret; 105be0e5c09SChris Mason struct key *tmp; 106be0e5c09SChris Mason 107be0e5c09SChris Mason while(low < high) { 108be0e5c09SChris Mason mid = (low + high) / 2; 109be0e5c09SChris Mason tmp = (struct key *)(p + mid * item_size); 110be0e5c09SChris Mason ret = comp_keys(tmp, key); 111be0e5c09SChris Mason 112be0e5c09SChris Mason if (ret < 0) 113be0e5c09SChris Mason low = mid + 1; 114be0e5c09SChris Mason else if (ret > 0) 115be0e5c09SChris Mason high = mid; 116be0e5c09SChris Mason else { 117be0e5c09SChris Mason *slot = mid; 118be0e5c09SChris Mason return 0; 119be0e5c09SChris Mason } 120be0e5c09SChris Mason } 121be0e5c09SChris Mason *slot = low; 122be0e5c09SChris Mason return 1; 123be0e5c09SChris Mason } 124be0e5c09SChris Mason 125be0e5c09SChris Mason int bin_search(struct node *c, struct key *key, int *slot) 126be0e5c09SChris Mason { 127be0e5c09SChris Mason if (is_leaf(c->header.flags)) { 128be0e5c09SChris Mason struct leaf *l = (struct leaf *)c; 129be0e5c09SChris Mason return generic_bin_search((void *)l->items, sizeof(struct item), 130be0e5c09SChris Mason key, c->header.nritems, slot); 131be0e5c09SChris Mason } else { 132be0e5c09SChris Mason return generic_bin_search((void *)c->keys, sizeof(struct key), 133be0e5c09SChris Mason key, c->header.nritems, slot); 134be0e5c09SChris Mason } 135be0e5c09SChris Mason return -1; 136be0e5c09SChris Mason } 137be0e5c09SChris Mason 138be0e5c09SChris Mason void *read_block(u64 blocknum) 139be0e5c09SChris Mason { 140be0e5c09SChris Mason return (void *)blocknum; 141be0e5c09SChris Mason } 142be0e5c09SChris Mason 143be0e5c09SChris Mason int search_slot(struct ctree_root *root, struct key *key, struct ctree_path *p) 144be0e5c09SChris Mason { 145be0e5c09SChris Mason struct node *c = root->node; 146be0e5c09SChris Mason int slot; 147be0e5c09SChris Mason int ret; 148be0e5c09SChris Mason int level; 149be0e5c09SChris Mason while (c) { 150be0e5c09SChris Mason level = node_level(c->header.flags); 151be0e5c09SChris Mason p->nodes[level] = c; 152be0e5c09SChris Mason ret = bin_search(c, key, &slot); 153be0e5c09SChris Mason if (!is_leaf(c->header.flags)) { 154be0e5c09SChris Mason if (ret && slot > 0) 155be0e5c09SChris Mason slot -= 1; 156be0e5c09SChris Mason p->slots[level] = slot; 157be0e5c09SChris Mason c = read_block(c->blockptrs[slot]); 158be0e5c09SChris Mason continue; 159be0e5c09SChris Mason } else { 160be0e5c09SChris Mason p->slots[level] = slot; 161be0e5c09SChris Mason return ret; 162be0e5c09SChris Mason } 163be0e5c09SChris Mason } 164be0e5c09SChris Mason return -1; 165be0e5c09SChris Mason } 166be0e5c09SChris Mason 167be0e5c09SChris Mason static void fixup_low_keys(struct ctree_path *path, struct key *key, 168be0e5c09SChris Mason int level) 169be0e5c09SChris Mason { 170be0e5c09SChris Mason int i; 171be0e5c09SChris Mason /* adjust the pointers going up the tree */ 172be0e5c09SChris Mason for (i = level; i < MAX_LEVEL; i++) { 173be0e5c09SChris Mason struct node *t = path->nodes[i]; 174be0e5c09SChris Mason int tslot = path->slots[i]; 175be0e5c09SChris Mason if (!t) 176be0e5c09SChris Mason break; 177be0e5c09SChris Mason memcpy(t->keys + tslot, key, sizeof(*key)); 178be0e5c09SChris Mason if (tslot != 0) 179be0e5c09SChris Mason break; 180be0e5c09SChris Mason } 181be0e5c09SChris Mason } 182be0e5c09SChris Mason 183be0e5c09SChris Mason int __insert_ptr(struct ctree_root *root, 184be0e5c09SChris Mason struct ctree_path *path, struct key *key, 185be0e5c09SChris Mason u64 blocknr, int slot, int level) 186be0e5c09SChris Mason { 187be0e5c09SChris Mason struct node *c; 188be0e5c09SChris Mason struct node *lower; 189be0e5c09SChris Mason struct key *lower_key; 190be0e5c09SChris Mason int nritems; 191be0e5c09SChris Mason /* need a new root */ 192be0e5c09SChris Mason if (!path->nodes[level]) { 193be0e5c09SChris Mason c = malloc(sizeof(struct node)); 194be0e5c09SChris Mason memset(c, 0, sizeof(c)); 195be0e5c09SChris Mason c->header.nritems = 2; 196be0e5c09SChris Mason c->header.flags = node_level(level); 197be0e5c09SChris Mason lower = path->nodes[level-1]; 198be0e5c09SChris Mason if (is_leaf(lower->header.flags)) 199be0e5c09SChris Mason lower_key = &((struct leaf *)lower)->items[0].key; 200be0e5c09SChris Mason else 201be0e5c09SChris Mason lower_key = lower->keys; 202be0e5c09SChris Mason memcpy(c->keys, lower_key, sizeof(struct key)); 203be0e5c09SChris Mason memcpy(c->keys + 1, key, sizeof(struct key)); 204be0e5c09SChris Mason c->blockptrs[0] = (u64)lower; 205be0e5c09SChris Mason c->blockptrs[1] = blocknr; 206be0e5c09SChris Mason root->node = c; 207be0e5c09SChris Mason path->nodes[level] = c; 208be0e5c09SChris Mason path->slots[level] = 0; 209be0e5c09SChris Mason if (c->keys[1].objectid == 0) 210be0e5c09SChris Mason BUG(); 211be0e5c09SChris Mason return 0; 212be0e5c09SChris Mason } 213be0e5c09SChris Mason lower = path->nodes[level]; 214be0e5c09SChris Mason nritems = lower->header.nritems; 215be0e5c09SChris Mason if (slot > nritems) 216be0e5c09SChris Mason BUG(); 217be0e5c09SChris Mason if (nritems == NODEPTRS_PER_BLOCK) 218be0e5c09SChris Mason BUG(); 219be0e5c09SChris Mason if (slot != nritems) { 220be0e5c09SChris Mason memmove(lower->keys + slot + 1, lower->keys + slot, 221be0e5c09SChris Mason (nritems - slot) * sizeof(struct key)); 222be0e5c09SChris Mason memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot, 223be0e5c09SChris Mason (nritems - slot) * sizeof(u64)); 224be0e5c09SChris Mason } 225be0e5c09SChris Mason memcpy(lower->keys + slot, key, sizeof(struct key)); 226be0e5c09SChris Mason lower->blockptrs[slot] = blocknr; 227be0e5c09SChris Mason lower->header.nritems++; 228be0e5c09SChris Mason if (lower->keys[1].objectid == 0) 229be0e5c09SChris Mason BUG(); 230be0e5c09SChris Mason return 0; 231be0e5c09SChris Mason } 232be0e5c09SChris Mason 233be0e5c09SChris Mason int push_node_left(struct ctree_root *root, struct ctree_path *path, int level) 234be0e5c09SChris Mason { 235be0e5c09SChris Mason int slot; 236be0e5c09SChris Mason struct node *left; 237be0e5c09SChris Mason struct node *right; 238be0e5c09SChris Mason int push_items = 0; 239be0e5c09SChris Mason int left_nritems; 240be0e5c09SChris Mason int right_nritems; 241be0e5c09SChris Mason 242be0e5c09SChris Mason if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) 243be0e5c09SChris Mason return 1; 244be0e5c09SChris Mason slot = path->slots[level + 1]; 245be0e5c09SChris Mason if (slot == 0) 246be0e5c09SChris Mason return 1; 247be0e5c09SChris Mason 248be0e5c09SChris Mason left = read_block(path->nodes[level + 1]->blockptrs[slot - 1]); 249be0e5c09SChris Mason right = path->nodes[level]; 250be0e5c09SChris Mason left_nritems = left->header.nritems; 251be0e5c09SChris Mason right_nritems = right->header.nritems; 252be0e5c09SChris Mason push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1); 253be0e5c09SChris Mason if (push_items <= 0) 254be0e5c09SChris Mason return 1; 255be0e5c09SChris Mason 256be0e5c09SChris Mason if (right_nritems < push_items) 257be0e5c09SChris Mason push_items = right_nritems; 258be0e5c09SChris Mason memcpy(left->keys + left_nritems, right->keys, 259be0e5c09SChris Mason push_items * sizeof(struct key)); 260be0e5c09SChris Mason memcpy(left->blockptrs + left_nritems, right->blockptrs, 261be0e5c09SChris Mason push_items * sizeof(u64)); 262be0e5c09SChris Mason memmove(right->keys, right->keys + push_items, 263be0e5c09SChris Mason (right_nritems - push_items) * sizeof(struct key)); 264be0e5c09SChris Mason memmove(right->blockptrs, right->blockptrs + push_items, 265be0e5c09SChris Mason (right_nritems - push_items) * sizeof(u64)); 266be0e5c09SChris Mason right->header.nritems -= push_items; 267be0e5c09SChris Mason left->header.nritems += push_items; 268be0e5c09SChris Mason 269be0e5c09SChris Mason /* adjust the pointers going up the tree */ 270be0e5c09SChris Mason fixup_low_keys(path, right->keys, level + 1); 271be0e5c09SChris Mason 272be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 273be0e5c09SChris Mason if (path->slots[level] < push_items) { 274be0e5c09SChris Mason path->slots[level] += left_nritems; 275be0e5c09SChris Mason path->nodes[level] = (struct node*)left; 276be0e5c09SChris Mason path->slots[level + 1] -= 1; 277be0e5c09SChris Mason } else { 278be0e5c09SChris Mason path->slots[level] -= push_items; 279be0e5c09SChris Mason } 280be0e5c09SChris Mason return 0; 281be0e5c09SChris Mason } 282be0e5c09SChris Mason 283be0e5c09SChris Mason int push_node_right(struct ctree_root *root, struct ctree_path *path, int level) 284be0e5c09SChris Mason { 285be0e5c09SChris Mason int slot; 286be0e5c09SChris Mason struct node *dst; 287be0e5c09SChris Mason struct node *src; 288be0e5c09SChris Mason int push_items = 0; 289be0e5c09SChris Mason int dst_nritems; 290be0e5c09SChris Mason int src_nritems; 291be0e5c09SChris Mason 292be0e5c09SChris Mason if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0) 293be0e5c09SChris Mason return 1; 294be0e5c09SChris Mason slot = path->slots[level + 1]; 295be0e5c09SChris Mason if (slot == NODEPTRS_PER_BLOCK - 1) 296be0e5c09SChris Mason return 1; 297be0e5c09SChris Mason 298be0e5c09SChris Mason if (slot >= path->nodes[level + 1]->header.nritems -1) 299be0e5c09SChris Mason return 1; 300be0e5c09SChris Mason 301be0e5c09SChris Mason dst = read_block(path->nodes[level + 1]->blockptrs[slot + 1]); 302be0e5c09SChris Mason src = path->nodes[level]; 303be0e5c09SChris Mason dst_nritems = dst->header.nritems; 304be0e5c09SChris Mason src_nritems = src->header.nritems; 305be0e5c09SChris Mason push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1); 306be0e5c09SChris Mason if (push_items <= 0) 307be0e5c09SChris Mason return 1; 308be0e5c09SChris Mason 309be0e5c09SChris Mason if (src_nritems < push_items) 310be0e5c09SChris Mason push_items = src_nritems; 311be0e5c09SChris Mason memmove(dst->keys + push_items, dst->keys, 312be0e5c09SChris Mason dst_nritems * sizeof(struct key)); 313be0e5c09SChris Mason memcpy(dst->keys, src->keys + src_nritems - push_items, 314be0e5c09SChris Mason push_items * sizeof(struct key)); 315be0e5c09SChris Mason 316be0e5c09SChris Mason memmove(dst->blockptrs + push_items, dst->blockptrs, 317be0e5c09SChris Mason dst_nritems * sizeof(u64)); 318be0e5c09SChris Mason memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items, 319be0e5c09SChris Mason push_items * sizeof(u64)); 320be0e5c09SChris Mason 321be0e5c09SChris Mason src->header.nritems -= push_items; 322be0e5c09SChris Mason dst->header.nritems += push_items; 323be0e5c09SChris Mason 324be0e5c09SChris Mason /* adjust the pointers going up the tree */ 325be0e5c09SChris Mason memcpy(path->nodes[level + 1]->keys + path->slots[level + 1] + 1, 326be0e5c09SChris Mason dst->keys, sizeof(struct key)); 327be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 328be0e5c09SChris Mason if (path->slots[level] >= src->header.nritems) { 329be0e5c09SChris Mason path->slots[level] -= src->header.nritems; 330be0e5c09SChris Mason path->nodes[level] = (struct node*)dst; 331be0e5c09SChris Mason path->slots[level + 1] += 1; 332be0e5c09SChris Mason } 333be0e5c09SChris Mason return 0; 334be0e5c09SChris Mason } 335be0e5c09SChris Mason 336be0e5c09SChris Mason int insert_ptr(struct ctree_root *root, 337be0e5c09SChris Mason struct ctree_path *path, struct key *key, 338be0e5c09SChris Mason u64 blocknr, int level) 339be0e5c09SChris Mason { 340be0e5c09SChris Mason struct node *c = path->nodes[level]; 341be0e5c09SChris Mason struct node *b; 342be0e5c09SChris Mason struct node *bal[MAX_LEVEL]; 343be0e5c09SChris Mason int bal_level = level; 344be0e5c09SChris Mason int mid; 345be0e5c09SChris Mason int bal_start = -1; 346be0e5c09SChris Mason 347be0e5c09SChris Mason memset(bal, 0, ARRAY_SIZE(bal)); 348be0e5c09SChris Mason while(c && c->header.nritems == NODEPTRS_PER_BLOCK) { 349be0e5c09SChris Mason if (push_node_left(root, path, 350be0e5c09SChris Mason node_level(c->header.flags)) == 0) 351be0e5c09SChris Mason break; 352be0e5c09SChris Mason if (push_node_right(root, path, 353be0e5c09SChris Mason node_level(c->header.flags)) == 0) 354be0e5c09SChris Mason break; 355be0e5c09SChris Mason bal_start = bal_level; 356be0e5c09SChris Mason if (bal_level == MAX_LEVEL - 1) 357be0e5c09SChris Mason BUG(); 358be0e5c09SChris Mason b = malloc(sizeof(struct node)); 359be0e5c09SChris Mason b->header.flags = c->header.flags; 360be0e5c09SChris Mason mid = (c->header.nritems + 1) / 2; 361be0e5c09SChris Mason memcpy(b->keys, c->keys + mid, 362be0e5c09SChris Mason (c->header.nritems - mid) * sizeof(struct key)); 363be0e5c09SChris Mason memcpy(b->blockptrs, c->blockptrs + mid, 364be0e5c09SChris Mason (c->header.nritems - mid) * sizeof(u64)); 365be0e5c09SChris Mason b->header.nritems = c->header.nritems - mid; 366be0e5c09SChris Mason c->header.nritems = mid; 367be0e5c09SChris Mason bal[bal_level] = b; 368be0e5c09SChris Mason if (bal_level == MAX_LEVEL - 1) 369be0e5c09SChris Mason break; 370be0e5c09SChris Mason bal_level += 1; 371be0e5c09SChris Mason c = path->nodes[bal_level]; 372be0e5c09SChris Mason } 373be0e5c09SChris Mason while(bal_start > 0) { 374be0e5c09SChris Mason b = bal[bal_start]; 375be0e5c09SChris Mason c = path->nodes[bal_start]; 376be0e5c09SChris Mason __insert_ptr(root, path, b->keys, (u64)b, 377be0e5c09SChris Mason path->slots[bal_start + 1] + 1, bal_start + 1); 378be0e5c09SChris Mason if (path->slots[bal_start] >= c->header.nritems) { 379be0e5c09SChris Mason path->slots[bal_start] -= c->header.nritems; 380be0e5c09SChris Mason path->nodes[bal_start] = b; 381be0e5c09SChris Mason path->slots[bal_start + 1] += 1; 382be0e5c09SChris Mason } 383be0e5c09SChris Mason bal_start--; 384be0e5c09SChris Mason if (!bal[bal_start]) 385be0e5c09SChris Mason break; 386be0e5c09SChris Mason } 387be0e5c09SChris Mason return __insert_ptr(root, path, key, blocknr, path->slots[level] + 1, 388be0e5c09SChris Mason level); 389be0e5c09SChris Mason } 390be0e5c09SChris Mason 391be0e5c09SChris Mason int leaf_space_used(struct leaf *l, int start, int nr) 392be0e5c09SChris Mason { 393be0e5c09SChris Mason int data_len; 394be0e5c09SChris Mason int end = start + nr - 1; 395be0e5c09SChris Mason 396be0e5c09SChris Mason if (!nr) 397be0e5c09SChris Mason return 0; 398be0e5c09SChris Mason data_len = l->items[start].offset + l->items[start].size; 399be0e5c09SChris Mason data_len = data_len - l->items[end].offset; 400be0e5c09SChris Mason data_len += sizeof(struct item) * nr; 401be0e5c09SChris Mason return data_len; 402be0e5c09SChris Mason } 403be0e5c09SChris Mason 404be0e5c09SChris Mason int push_leaf_left(struct ctree_root *root, struct ctree_path *path, 405be0e5c09SChris Mason int data_size) 406be0e5c09SChris Mason { 407be0e5c09SChris Mason struct leaf *right = (struct leaf *)path->nodes[0]; 408be0e5c09SChris Mason struct leaf *left; 409be0e5c09SChris Mason int slot; 410be0e5c09SChris Mason int i; 411be0e5c09SChris Mason int free_space; 412be0e5c09SChris Mason int push_space = 0; 413be0e5c09SChris Mason int push_items = 0; 414be0e5c09SChris Mason struct item *item; 415be0e5c09SChris Mason int old_left_nritems; 416be0e5c09SChris Mason 417be0e5c09SChris Mason slot = path->slots[1]; 418be0e5c09SChris Mason if (slot == 0) { 419be0e5c09SChris Mason return 1; 420be0e5c09SChris Mason } 421be0e5c09SChris Mason if (!path->nodes[1]) { 422be0e5c09SChris Mason return 1; 423be0e5c09SChris Mason } 424be0e5c09SChris Mason left = read_block(path->nodes[1]->blockptrs[slot - 1]); 425be0e5c09SChris Mason free_space = leaf_free_space(left); 426be0e5c09SChris Mason if (free_space < data_size + sizeof(struct item)) { 427be0e5c09SChris Mason return 1; 428be0e5c09SChris Mason } 429be0e5c09SChris Mason for (i = 0; i < right->header.nritems; i++) { 430be0e5c09SChris Mason item = right->items + i; 431be0e5c09SChris Mason if (path->slots[0] == i) 432be0e5c09SChris Mason push_space += data_size + sizeof(*item); 433be0e5c09SChris Mason if (item->size + sizeof(*item) + push_space > free_space) 434be0e5c09SChris Mason break; 435be0e5c09SChris Mason push_items++; 436be0e5c09SChris Mason push_space += item->size + sizeof(*item); 437be0e5c09SChris Mason } 438be0e5c09SChris Mason if (push_items == 0) { 439be0e5c09SChris Mason return 1; 440be0e5c09SChris Mason } 441be0e5c09SChris Mason /* push data from right to left */ 442be0e5c09SChris Mason memcpy(left->items + left->header.nritems, 443be0e5c09SChris Mason right->items, push_items * sizeof(struct item)); 444be0e5c09SChris Mason push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset; 445be0e5c09SChris Mason memcpy(left->data + leaf_data_end(left) - push_space, 446be0e5c09SChris Mason right->data + right->items[push_items - 1].offset, 447be0e5c09SChris Mason push_space); 448be0e5c09SChris Mason old_left_nritems = left->header.nritems; 449be0e5c09SChris Mason for(i = old_left_nritems; i < old_left_nritems + push_items; i++) { 450be0e5c09SChris Mason left->items[i].offset -= LEAF_DATA_SIZE - 451be0e5c09SChris Mason left->items[old_left_nritems -1].offset; 452be0e5c09SChris Mason } 453be0e5c09SChris Mason left->header.nritems += push_items; 454be0e5c09SChris Mason 455be0e5c09SChris Mason /* fixup right node */ 456be0e5c09SChris Mason push_space = right->items[push_items-1].offset - leaf_data_end(right); 457be0e5c09SChris Mason memmove(right->data + LEAF_DATA_SIZE - push_space, right->data + 458be0e5c09SChris Mason leaf_data_end(right), push_space); 459be0e5c09SChris Mason memmove(right->items, right->items + push_items, 460be0e5c09SChris Mason (right->header.nritems - push_items) * sizeof(struct item)); 461be0e5c09SChris Mason right->header.nritems -= push_items; 462be0e5c09SChris Mason push_space = LEAF_DATA_SIZE; 463be0e5c09SChris Mason for (i = 0; i < right->header.nritems; i++) { 464be0e5c09SChris Mason right->items[i].offset = push_space - right->items[i].size; 465be0e5c09SChris Mason push_space = right->items[i].offset; 466be0e5c09SChris Mason } 467be0e5c09SChris Mason fixup_low_keys(path, &right->items[0].key, 1); 468be0e5c09SChris Mason 469be0e5c09SChris Mason /* then fixup the leaf pointer in the path */ 470be0e5c09SChris Mason if (path->slots[0] < push_items) { 471be0e5c09SChris Mason path->slots[0] += old_left_nritems; 472be0e5c09SChris Mason path->nodes[0] = (struct node*)left; 473be0e5c09SChris Mason path->slots[1] -= 1; 474be0e5c09SChris Mason } else { 475be0e5c09SChris Mason path->slots[0] -= push_items; 476be0e5c09SChris Mason } 477be0e5c09SChris Mason return 0; 478be0e5c09SChris Mason } 479be0e5c09SChris Mason 480be0e5c09SChris Mason int split_leaf(struct ctree_root *root, struct ctree_path *path, int data_size) 481be0e5c09SChris Mason { 482be0e5c09SChris Mason struct leaf *l = (struct leaf *)path->nodes[0]; 483be0e5c09SChris Mason int nritems = l->header.nritems; 484be0e5c09SChris Mason int mid = (nritems + 1)/ 2; 485be0e5c09SChris Mason int slot = path->slots[0]; 486be0e5c09SChris Mason struct leaf *right; 487be0e5c09SChris Mason int space_needed = data_size + sizeof(struct item); 488be0e5c09SChris Mason int data_copy_size; 489be0e5c09SChris Mason int rt_data_off; 490be0e5c09SChris Mason int i; 491be0e5c09SChris Mason int ret; 492be0e5c09SChris Mason 493be0e5c09SChris Mason if (push_leaf_left(root, path, data_size) == 0) { 494be0e5c09SChris Mason return 0; 495be0e5c09SChris Mason } 496be0e5c09SChris Mason right = malloc(sizeof(struct leaf)); 497be0e5c09SChris Mason memset(right, 0, sizeof(*right)); 498be0e5c09SChris Mason if (mid <= slot) { 499be0e5c09SChris Mason if (leaf_space_used(l, mid, nritems - mid) + space_needed > 500be0e5c09SChris Mason LEAF_DATA_SIZE) 501be0e5c09SChris Mason BUG(); 502be0e5c09SChris Mason } else { 503be0e5c09SChris Mason if (leaf_space_used(l, 0, mid + 1) + space_needed > 504be0e5c09SChris Mason LEAF_DATA_SIZE) 505be0e5c09SChris Mason BUG(); 506be0e5c09SChris Mason } 507be0e5c09SChris Mason right->header.nritems = nritems - mid; 508be0e5c09SChris Mason data_copy_size = l->items[mid].offset + l->items[mid].size - 509be0e5c09SChris Mason leaf_data_end(l); 510be0e5c09SChris Mason memcpy(right->items, l->items + mid, 511be0e5c09SChris Mason (nritems - mid) * sizeof(struct item)); 512be0e5c09SChris Mason memcpy(right->data + LEAF_DATA_SIZE - data_copy_size, 513be0e5c09SChris Mason l->data + leaf_data_end(l), data_copy_size); 514be0e5c09SChris Mason rt_data_off = LEAF_DATA_SIZE - 515be0e5c09SChris Mason (l->items[mid].offset + l->items[mid].size); 516be0e5c09SChris Mason for (i = 0; i < right->header.nritems; i++) { 517be0e5c09SChris Mason right->items[i].offset += rt_data_off; 518be0e5c09SChris Mason } 519be0e5c09SChris Mason l->header.nritems = mid; 520be0e5c09SChris Mason ret = insert_ptr(root, path, &right->items[0].key, 521be0e5c09SChris Mason (u64)right, 1); 522be0e5c09SChris Mason if (mid <= slot) { 523be0e5c09SChris Mason path->nodes[0] = (struct node *)right; 524be0e5c09SChris Mason path->slots[0] -= mid; 525be0e5c09SChris Mason path->slots[1] += 1; 526be0e5c09SChris Mason } 527be0e5c09SChris Mason return ret; 528be0e5c09SChris Mason } 529be0e5c09SChris Mason 530be0e5c09SChris Mason int insert_item(struct ctree_root *root, struct key *key, 531be0e5c09SChris Mason void *data, int data_size) 532be0e5c09SChris Mason { 533be0e5c09SChris Mason int ret; 534be0e5c09SChris Mason int slot; 535be0e5c09SChris Mason struct leaf *leaf; 536be0e5c09SChris Mason unsigned int nritems; 537be0e5c09SChris Mason unsigned int data_end; 538be0e5c09SChris Mason struct ctree_path path; 539be0e5c09SChris Mason 540be0e5c09SChris Mason init_path(&path); 541be0e5c09SChris Mason ret = search_slot(root, key, &path); 542be0e5c09SChris Mason if (ret == 0) 543be0e5c09SChris Mason return -EEXIST; 544be0e5c09SChris Mason 545be0e5c09SChris Mason leaf = (struct leaf *)path.nodes[0]; 546be0e5c09SChris Mason if (leaf_free_space(leaf) < sizeof(struct item) + data_size) 547be0e5c09SChris Mason split_leaf(root, &path, data_size); 548be0e5c09SChris Mason leaf = (struct leaf *)path.nodes[0]; 549be0e5c09SChris Mason nritems = leaf->header.nritems; 550be0e5c09SChris Mason data_end = leaf_data_end(leaf); 551be0e5c09SChris Mason if (leaf_free_space(leaf) < sizeof(struct item) + data_size) 552be0e5c09SChris Mason BUG(); 553be0e5c09SChris Mason 554be0e5c09SChris Mason slot = path.slots[0]; 555be0e5c09SChris Mason if (slot == 0) 556be0e5c09SChris Mason fixup_low_keys(&path, key, 1); 557be0e5c09SChris Mason if (slot != nritems) { 558be0e5c09SChris Mason int i; 559be0e5c09SChris Mason unsigned int old_data = leaf->items[slot].offset + 560be0e5c09SChris Mason leaf->items[slot].size; 561be0e5c09SChris Mason 562be0e5c09SChris Mason /* 563be0e5c09SChris Mason * item0..itemN ... dataN.offset..dataN.size .. data0.size 564be0e5c09SChris Mason */ 565be0e5c09SChris Mason /* first correct the data pointers */ 566be0e5c09SChris Mason for (i = slot; i < nritems; i++) 567be0e5c09SChris Mason leaf->items[i].offset -= data_size; 568be0e5c09SChris Mason 569be0e5c09SChris Mason /* shift the items */ 570be0e5c09SChris Mason memmove(leaf->items + slot + 1, leaf->items + slot, 571be0e5c09SChris Mason (nritems - slot) * sizeof(struct item)); 572be0e5c09SChris Mason 573be0e5c09SChris Mason /* shift the data */ 574be0e5c09SChris Mason memmove(leaf->data + data_end - data_size, leaf->data + 575be0e5c09SChris Mason data_end, old_data - data_end); 576be0e5c09SChris Mason data_end = old_data; 577be0e5c09SChris Mason } 578be0e5c09SChris Mason memcpy(&leaf->items[slot].key, key, sizeof(struct key)); 579be0e5c09SChris Mason leaf->items[slot].offset = data_end - data_size; 580be0e5c09SChris Mason leaf->items[slot].size = data_size; 581be0e5c09SChris Mason memcpy(leaf->data + data_end - data_size, data, data_size); 582be0e5c09SChris Mason leaf->header.nritems += 1; 583be0e5c09SChris Mason if (leaf_free_space(leaf) < 0) 584be0e5c09SChris Mason BUG(); 585be0e5c09SChris Mason return 0; 586be0e5c09SChris Mason } 587be0e5c09SChris Mason 588be0e5c09SChris Mason int del_ptr(struct ctree_root *root, struct ctree_path *path, int level) 589be0e5c09SChris Mason { 590be0e5c09SChris Mason int slot; 591be0e5c09SChris Mason struct node *node; 592be0e5c09SChris Mason int nritems; 593be0e5c09SChris Mason 594be0e5c09SChris Mason while(1) { 595be0e5c09SChris Mason node = path->nodes[level]; 596be0e5c09SChris Mason if (!node) 597be0e5c09SChris Mason break; 598be0e5c09SChris Mason slot = path->slots[level]; 599be0e5c09SChris Mason nritems = node->header.nritems; 600be0e5c09SChris Mason 601be0e5c09SChris Mason if (slot != nritems -1) { 602be0e5c09SChris Mason memmove(node->keys + slot, node->keys + slot + 1, 603be0e5c09SChris Mason sizeof(struct key) * (nritems - slot - 1)); 604be0e5c09SChris Mason memmove(node->blockptrs + slot, 605be0e5c09SChris Mason node->blockptrs + slot + 1, 606be0e5c09SChris Mason sizeof(u64) * (nritems - slot - 1)); 607be0e5c09SChris Mason } 608be0e5c09SChris Mason node->header.nritems--; 609be0e5c09SChris Mason if (node->header.nritems != 0) { 610be0e5c09SChris Mason int tslot; 611be0e5c09SChris Mason if (slot == 0) 612be0e5c09SChris Mason fixup_low_keys(path, node->keys, level + 1); 613be0e5c09SChris Mason tslot = path->slots[level+1]; 614be0e5c09SChris Mason push_node_left(root, path, level); 615be0e5c09SChris Mason if (node->header.nritems) { 616be0e5c09SChris Mason push_node_right(root, path, level); 617be0e5c09SChris Mason } 618be0e5c09SChris Mason path->slots[level+1] = tslot; 619be0e5c09SChris Mason if (node->header.nritems) 620be0e5c09SChris Mason break; 621be0e5c09SChris Mason } 622be0e5c09SChris Mason if (node == root->node) { 623be0e5c09SChris Mason printf("root is now null!\n"); 624be0e5c09SChris Mason root->node = NULL; 625be0e5c09SChris Mason break; 626be0e5c09SChris Mason } 627be0e5c09SChris Mason level++; 628be0e5c09SChris Mason if (!path->nodes[level]) 629be0e5c09SChris Mason BUG(); 630be0e5c09SChris Mason free(node); 631be0e5c09SChris Mason } 632be0e5c09SChris Mason return 0; 633be0e5c09SChris Mason } 634be0e5c09SChris Mason 635be0e5c09SChris Mason int del_item(struct ctree_root *root, struct key *key) 636be0e5c09SChris Mason { 637be0e5c09SChris Mason int ret; 638be0e5c09SChris Mason int slot; 639be0e5c09SChris Mason struct leaf *leaf; 640be0e5c09SChris Mason struct ctree_path path; 641be0e5c09SChris Mason int doff; 642be0e5c09SChris Mason int dsize; 643be0e5c09SChris Mason 644be0e5c09SChris Mason init_path(&path); 645be0e5c09SChris Mason ret = search_slot(root, key, &path); 646be0e5c09SChris Mason if (ret != 0) 647be0e5c09SChris Mason return -1; 648be0e5c09SChris Mason 649be0e5c09SChris Mason leaf = (struct leaf *)path.nodes[0]; 650be0e5c09SChris Mason slot = path.slots[0]; 651be0e5c09SChris Mason doff = leaf->items[slot].offset; 652be0e5c09SChris Mason dsize = leaf->items[slot].size; 653be0e5c09SChris Mason 654be0e5c09SChris Mason if (slot != leaf->header.nritems - 1) { 655be0e5c09SChris Mason int i; 656be0e5c09SChris Mason int data_end = leaf_data_end(leaf); 657be0e5c09SChris Mason memmove(leaf->data + data_end + dsize, 658be0e5c09SChris Mason leaf->data + data_end, 659be0e5c09SChris Mason doff - data_end); 660be0e5c09SChris Mason for (i = slot + 1; i < leaf->header.nritems; i++) 661be0e5c09SChris Mason leaf->items[i].offset += dsize; 662be0e5c09SChris Mason memmove(leaf->items + slot, leaf->items + slot + 1, 663be0e5c09SChris Mason sizeof(struct item) * 664be0e5c09SChris Mason (leaf->header.nritems - slot - 1)); 665be0e5c09SChris Mason } 666be0e5c09SChris Mason leaf->header.nritems -= 1; 667be0e5c09SChris Mason if (leaf->header.nritems == 0) { 668be0e5c09SChris Mason free(leaf); 669be0e5c09SChris Mason del_ptr(root, &path, 1); 670be0e5c09SChris Mason } else { 671be0e5c09SChris Mason if (slot == 0) 672be0e5c09SChris Mason fixup_low_keys(&path, &leaf->items[0].key, 1); 673be0e5c09SChris Mason if (leaf_space_used(leaf, 0, leaf->header.nritems) < 674be0e5c09SChris Mason LEAF_DATA_SIZE / 4) { 675be0e5c09SChris Mason /* push_leaf_left fixes the path. 676be0e5c09SChris Mason * make sure the path still points to our leaf 677be0e5c09SChris Mason * for possible call to del_ptr below 678be0e5c09SChris Mason */ 679be0e5c09SChris Mason slot = path.slots[1]; 680be0e5c09SChris Mason push_leaf_left(root, &path, 1); 681be0e5c09SChris Mason path.slots[1] = slot; 682be0e5c09SChris Mason if (leaf->header.nritems == 0) { 683be0e5c09SChris Mason free(leaf); 684be0e5c09SChris Mason del_ptr(root, &path, 1); 685be0e5c09SChris Mason } 686be0e5c09SChris Mason } 687be0e5c09SChris Mason } 688be0e5c09SChris Mason return 0; 689be0e5c09SChris Mason } 690be0e5c09SChris Mason 691be0e5c09SChris Mason void print_leaf(struct leaf *l) 692be0e5c09SChris Mason { 693be0e5c09SChris Mason int i; 694be0e5c09SChris Mason int nr = l->header.nritems; 695be0e5c09SChris Mason struct item *item; 696be0e5c09SChris Mason printf("leaf %p total ptrs %d free space %d\n", l, nr, 697be0e5c09SChris Mason leaf_free_space(l)); 698be0e5c09SChris Mason fflush(stdout); 699be0e5c09SChris Mason for (i = 0 ; i < nr ; i++) { 700be0e5c09SChris Mason item = l->items + i; 701be0e5c09SChris Mason printf("\titem %d key (%lu %u %lu) itemoff %d itemsize %d\n", 702be0e5c09SChris Mason i, 703be0e5c09SChris Mason item->key.objectid, item->key.flags, item->key.offset, 704be0e5c09SChris Mason item->offset, item->size); 705be0e5c09SChris Mason fflush(stdout); 706be0e5c09SChris Mason printf("\t\titem data %.*s\n", item->size, l->data+item->offset); 707be0e5c09SChris Mason fflush(stdout); 708be0e5c09SChris Mason } 709be0e5c09SChris Mason } 710be0e5c09SChris Mason void print_tree(struct node *c) 711be0e5c09SChris Mason { 712be0e5c09SChris Mason int i; 713be0e5c09SChris Mason int nr; 714be0e5c09SChris Mason 715be0e5c09SChris Mason if (!c) 716be0e5c09SChris Mason return; 717be0e5c09SChris Mason nr = c->header.nritems; 718be0e5c09SChris Mason if (is_leaf(c->header.flags)) { 719be0e5c09SChris Mason print_leaf((struct leaf *)c); 720be0e5c09SChris Mason return; 721be0e5c09SChris Mason } 722be0e5c09SChris Mason printf("node %p level %d total ptrs %d free spc %lu\n", c, 723be0e5c09SChris Mason node_level(c->header.flags), c->header.nritems, 724be0e5c09SChris Mason NODEPTRS_PER_BLOCK - c->header.nritems); 725be0e5c09SChris Mason fflush(stdout); 726be0e5c09SChris Mason for (i = 0; i < nr; i++) { 727be0e5c09SChris Mason printf("\tkey %d (%lu %u %lu) block %lx\n", 728be0e5c09SChris Mason i, 729be0e5c09SChris Mason c->keys[i].objectid, c->keys[i].flags, c->keys[i].offset, 730be0e5c09SChris Mason c->blockptrs[i]); 731be0e5c09SChris Mason fflush(stdout); 732be0e5c09SChris Mason } 733be0e5c09SChris Mason for (i = 0; i < nr; i++) { 734be0e5c09SChris Mason struct node *next = read_block(c->blockptrs[i]); 735be0e5c09SChris Mason if (is_leaf(next->header.flags) && 736be0e5c09SChris Mason node_level(c->header.flags) != 1) 737be0e5c09SChris Mason BUG(); 738be0e5c09SChris Mason if (node_level(next->header.flags) != 739be0e5c09SChris Mason node_level(c->header.flags) - 1) 740be0e5c09SChris Mason BUG(); 741be0e5c09SChris Mason print_tree(next); 742be0e5c09SChris Mason } 743be0e5c09SChris Mason 744be0e5c09SChris Mason } 745be0e5c09SChris Mason 746be0e5c09SChris Mason /* for testing only */ 747be0e5c09SChris Mason int next_key(int i, int max_key) { 748be0e5c09SChris Mason return rand() % max_key; 749be0e5c09SChris Mason // return i; 750be0e5c09SChris Mason } 751be0e5c09SChris Mason 752be0e5c09SChris Mason int main() { 753be0e5c09SChris Mason struct leaf *first_node = malloc(sizeof(struct leaf)); 754be0e5c09SChris Mason struct ctree_root root; 755be0e5c09SChris Mason struct key ins; 756be0e5c09SChris Mason char *buf; 757be0e5c09SChris Mason int i; 758be0e5c09SChris Mason int num; 759be0e5c09SChris Mason int ret; 760be0e5c09SChris Mason int run_size = 10000000; 761be0e5c09SChris Mason int max_key = 100000000; 762be0e5c09SChris Mason int tree_size = 0; 763be0e5c09SChris Mason struct ctree_path path; 764be0e5c09SChris Mason 765be0e5c09SChris Mason 766be0e5c09SChris Mason srand(55); 767be0e5c09SChris Mason root.node = (struct node *)first_node; 768be0e5c09SChris Mason memset(first_node, 0, sizeof(*first_node)); 769be0e5c09SChris Mason for (i = 0; i < run_size; i++) { 770be0e5c09SChris Mason buf = malloc(64); 771be0e5c09SChris Mason num = next_key(i, max_key); 772be0e5c09SChris Mason // num = i; 773be0e5c09SChris Mason sprintf(buf, "string-%d", num); 774be0e5c09SChris Mason // printf("insert %d\n", num); 775be0e5c09SChris Mason ins.objectid = num; 776be0e5c09SChris Mason ins.offset = 0; 777be0e5c09SChris Mason ins.flags = 0; 778be0e5c09SChris Mason ret = insert_item(&root, &ins, buf, strlen(buf)); 779be0e5c09SChris Mason if (!ret) 780be0e5c09SChris Mason tree_size++; 781be0e5c09SChris Mason } 782be0e5c09SChris Mason srand(55); 783be0e5c09SChris Mason for (i = 0; i < run_size; i++) { 784be0e5c09SChris Mason num = next_key(i, max_key); 785be0e5c09SChris Mason ins.objectid = num; 786be0e5c09SChris Mason ins.offset = 0; 787be0e5c09SChris Mason ins.flags = 0; 788be0e5c09SChris Mason init_path(&path); 789be0e5c09SChris Mason ret = search_slot(&root, &ins, &path); 790be0e5c09SChris Mason if (ret) { 791be0e5c09SChris Mason print_tree(root.node); 792be0e5c09SChris Mason printf("unable to find %d\n", num); 793be0e5c09SChris Mason exit(1); 794be0e5c09SChris Mason } 795be0e5c09SChris Mason } 796be0e5c09SChris Mason printf("node %p level %d total ptrs %d free spc %lu\n", root.node, 797be0e5c09SChris Mason node_level(root.node->header.flags), root.node->header.nritems, 798be0e5c09SChris Mason NODEPTRS_PER_BLOCK - root.node->header.nritems); 799be0e5c09SChris Mason // print_tree(root.node); 800be0e5c09SChris Mason printf("all searches good\n"); 801be0e5c09SChris Mason i = 0; 802be0e5c09SChris Mason srand(55); 803be0e5c09SChris Mason for (i = 0; i < run_size; i++) { 804be0e5c09SChris Mason num = next_key(i, max_key); 805be0e5c09SChris Mason ins.objectid = num; 806be0e5c09SChris Mason del_item(&root, &ins); 807be0e5c09SChris Mason } 808be0e5c09SChris Mason print_tree(root.node); 809be0e5c09SChris Mason return 0; 810be0e5c09SChris Mason } 811