1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-btree-internal.h" 8 #include "dm-space-map.h" 9 #include "dm-transaction-manager.h" 10 11 #include <linux/export.h> 12 #include <linux/device-mapper.h> 13 14 #define DM_MSG_PREFIX "btree" 15 16 /*---------------------------------------------------------------- 17 * Array manipulation 18 *--------------------------------------------------------------*/ 19 static void memcpy_disk(void *dest, const void *src, size_t len) 20 __dm_written_to_disk(src) 21 { 22 memcpy(dest, src, len); 23 __dm_unbless_for_disk(src); 24 } 25 26 static void array_insert(void *base, size_t elt_size, unsigned nr_elts, 27 unsigned index, void *elt) 28 __dm_written_to_disk(elt) 29 { 30 if (index < nr_elts) 31 memmove(base + (elt_size * (index + 1)), 32 base + (elt_size * index), 33 (nr_elts - index) * elt_size); 34 35 memcpy_disk(base + (elt_size * index), elt, elt_size); 36 } 37 38 /*----------------------------------------------------------------*/ 39 40 /* makes the assumption that no two keys are the same. */ 41 static int bsearch(struct btree_node *n, uint64_t key, int want_hi) 42 { 43 int lo = -1, hi = le32_to_cpu(n->header.nr_entries); 44 45 while (hi - lo > 1) { 46 int mid = lo + ((hi - lo) / 2); 47 uint64_t mid_key = le64_to_cpu(n->keys[mid]); 48 49 if (mid_key == key) 50 return mid; 51 52 if (mid_key < key) 53 lo = mid; 54 else 55 hi = mid; 56 } 57 58 return want_hi ? hi : lo; 59 } 60 61 int lower_bound(struct btree_node *n, uint64_t key) 62 { 63 return bsearch(n, key, 0); 64 } 65 66 void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, 67 struct dm_btree_value_type *vt) 68 { 69 unsigned i; 70 uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); 71 72 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) 73 for (i = 0; i < nr_entries; i++) 74 dm_tm_inc(tm, value64(n, i)); 75 else if (vt->inc) 76 for (i = 0; i < nr_entries; i++) 77 vt->inc(vt->context, value_ptr(n, i)); 78 } 79 80 static int insert_at(size_t value_size, struct btree_node *node, unsigned index, 81 uint64_t key, void *value) 82 __dm_written_to_disk(value) 83 { 84 uint32_t nr_entries = le32_to_cpu(node->header.nr_entries); 85 __le64 key_le = cpu_to_le64(key); 86 87 if (index > nr_entries || 88 index >= le32_to_cpu(node->header.max_entries)) { 89 DMERR("too many entries in btree node for insert"); 90 __dm_unbless_for_disk(value); 91 return -ENOMEM; 92 } 93 94 __dm_bless_for_disk(&key_le); 95 96 array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le); 97 array_insert(value_base(node), value_size, nr_entries, index, value); 98 node->header.nr_entries = cpu_to_le32(nr_entries + 1); 99 100 return 0; 101 } 102 103 /*----------------------------------------------------------------*/ 104 105 /* 106 * We want 3n entries (for some n). This works more nicely for repeated 107 * insert remove loops than (2n + 1). 108 */ 109 static uint32_t calc_max_entries(size_t value_size, size_t block_size) 110 { 111 uint32_t total, n; 112 size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */ 113 114 block_size -= sizeof(struct node_header); 115 total = block_size / elt_size; 116 n = total / 3; /* rounds down */ 117 118 return 3 * n; 119 } 120 121 int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) 122 { 123 int r; 124 struct dm_block *b; 125 struct btree_node *n; 126 size_t block_size; 127 uint32_t max_entries; 128 129 r = new_block(info, &b); 130 if (r < 0) 131 return r; 132 133 block_size = dm_bm_block_size(dm_tm_get_bm(info->tm)); 134 max_entries = calc_max_entries(info->value_type.size, block_size); 135 136 n = dm_block_data(b); 137 memset(n, 0, block_size); 138 n->header.flags = cpu_to_le32(LEAF_NODE); 139 n->header.nr_entries = cpu_to_le32(0); 140 n->header.max_entries = cpu_to_le32(max_entries); 141 n->header.value_size = cpu_to_le32(info->value_type.size); 142 143 *root = dm_block_location(b); 144 return unlock_block(info, b); 145 } 146 EXPORT_SYMBOL_GPL(dm_btree_empty); 147 148 /*----------------------------------------------------------------*/ 149 150 /* 151 * Deletion uses a recursive algorithm, since we have limited stack space 152 * we explicitly manage our own stack on the heap. 153 */ 154 #define MAX_SPINE_DEPTH 64 155 struct frame { 156 struct dm_block *b; 157 struct btree_node *n; 158 unsigned level; 159 unsigned nr_children; 160 unsigned current_child; 161 }; 162 163 struct del_stack { 164 struct dm_btree_info *info; 165 struct dm_transaction_manager *tm; 166 int top; 167 struct frame spine[MAX_SPINE_DEPTH]; 168 }; 169 170 static int top_frame(struct del_stack *s, struct frame **f) 171 { 172 if (s->top < 0) { 173 DMERR("btree deletion stack empty"); 174 return -EINVAL; 175 } 176 177 *f = s->spine + s->top; 178 179 return 0; 180 } 181 182 static int unprocessed_frames(struct del_stack *s) 183 { 184 return s->top >= 0; 185 } 186 187 static void prefetch_children(struct del_stack *s, struct frame *f) 188 { 189 unsigned i; 190 struct dm_block_manager *bm = dm_tm_get_bm(s->tm); 191 192 for (i = 0; i < f->nr_children; i++) 193 dm_bm_prefetch(bm, value64(f->n, i)); 194 } 195 196 static bool is_internal_level(struct dm_btree_info *info, struct frame *f) 197 { 198 return f->level < (info->levels - 1); 199 } 200 201 static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) 202 { 203 int r; 204 uint32_t ref_count; 205 206 if (s->top >= MAX_SPINE_DEPTH - 1) { 207 DMERR("btree deletion stack out of memory"); 208 return -ENOMEM; 209 } 210 211 r = dm_tm_ref(s->tm, b, &ref_count); 212 if (r) 213 return r; 214 215 if (ref_count > 1) 216 /* 217 * This is a shared node, so we can just decrement it's 218 * reference counter and leave the children. 219 */ 220 dm_tm_dec(s->tm, b); 221 222 else { 223 uint32_t flags; 224 struct frame *f = s->spine + ++s->top; 225 226 r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b); 227 if (r) { 228 s->top--; 229 return r; 230 } 231 232 f->n = dm_block_data(f->b); 233 f->level = level; 234 f->nr_children = le32_to_cpu(f->n->header.nr_entries); 235 f->current_child = 0; 236 237 flags = le32_to_cpu(f->n->header.flags); 238 if (flags & INTERNAL_NODE || is_internal_level(s->info, f)) 239 prefetch_children(s, f); 240 } 241 242 return 0; 243 } 244 245 static void pop_frame(struct del_stack *s) 246 { 247 struct frame *f = s->spine + s->top--; 248 249 dm_tm_dec(s->tm, dm_block_location(f->b)); 250 dm_tm_unlock(s->tm, f->b); 251 } 252 253 int dm_btree_del(struct dm_btree_info *info, dm_block_t root) 254 { 255 int r; 256 struct del_stack *s; 257 258 s = kmalloc(sizeof(*s), GFP_KERNEL); 259 if (!s) 260 return -ENOMEM; 261 s->info = info; 262 s->tm = info->tm; 263 s->top = -1; 264 265 r = push_frame(s, root, 0); 266 if (r) 267 goto out; 268 269 while (unprocessed_frames(s)) { 270 uint32_t flags; 271 struct frame *f; 272 dm_block_t b; 273 274 r = top_frame(s, &f); 275 if (r) 276 goto out; 277 278 if (f->current_child >= f->nr_children) { 279 pop_frame(s); 280 continue; 281 } 282 283 flags = le32_to_cpu(f->n->header.flags); 284 if (flags & INTERNAL_NODE) { 285 b = value64(f->n, f->current_child); 286 f->current_child++; 287 r = push_frame(s, b, f->level); 288 if (r) 289 goto out; 290 291 } else if (is_internal_level(info, f)) { 292 b = value64(f->n, f->current_child); 293 f->current_child++; 294 r = push_frame(s, b, f->level + 1); 295 if (r) 296 goto out; 297 298 } else { 299 if (info->value_type.dec) { 300 unsigned i; 301 302 for (i = 0; i < f->nr_children; i++) 303 info->value_type.dec(info->value_type.context, 304 value_ptr(f->n, i)); 305 } 306 pop_frame(s); 307 } 308 } 309 310 out: 311 kfree(s); 312 return r; 313 } 314 EXPORT_SYMBOL_GPL(dm_btree_del); 315 316 /*----------------------------------------------------------------*/ 317 318 static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, 319 int (*search_fn)(struct btree_node *, uint64_t), 320 uint64_t *result_key, void *v, size_t value_size) 321 { 322 int i, r; 323 uint32_t flags, nr_entries; 324 325 do { 326 r = ro_step(s, block); 327 if (r < 0) 328 return r; 329 330 i = search_fn(ro_node(s), key); 331 332 flags = le32_to_cpu(ro_node(s)->header.flags); 333 nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries); 334 if (i < 0 || i >= nr_entries) 335 return -ENODATA; 336 337 if (flags & INTERNAL_NODE) 338 block = value64(ro_node(s), i); 339 340 } while (!(flags & LEAF_NODE)); 341 342 *result_key = le64_to_cpu(ro_node(s)->keys[i]); 343 memcpy(v, value_ptr(ro_node(s), i), value_size); 344 345 return 0; 346 } 347 348 int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, 349 uint64_t *keys, void *value_le) 350 { 351 unsigned level, last_level = info->levels - 1; 352 int r = -ENODATA; 353 uint64_t rkey; 354 __le64 internal_value_le; 355 struct ro_spine spine; 356 357 init_ro_spine(&spine, info); 358 for (level = 0; level < info->levels; level++) { 359 size_t size; 360 void *value_p; 361 362 if (level == last_level) { 363 value_p = value_le; 364 size = info->value_type.size; 365 366 } else { 367 value_p = &internal_value_le; 368 size = sizeof(uint64_t); 369 } 370 371 r = btree_lookup_raw(&spine, root, keys[level], 372 lower_bound, &rkey, 373 value_p, size); 374 375 if (!r) { 376 if (rkey != keys[level]) { 377 exit_ro_spine(&spine); 378 return -ENODATA; 379 } 380 } else { 381 exit_ro_spine(&spine); 382 return r; 383 } 384 385 root = le64_to_cpu(internal_value_le); 386 } 387 exit_ro_spine(&spine); 388 389 return r; 390 } 391 EXPORT_SYMBOL_GPL(dm_btree_lookup); 392 393 /* 394 * Splits a node by creating a sibling node and shifting half the nodes 395 * contents across. Assumes there is a parent node, and it has room for 396 * another child. 397 * 398 * Before: 399 * +--------+ 400 * | Parent | 401 * +--------+ 402 * | 403 * v 404 * +----------+ 405 * | A ++++++ | 406 * +----------+ 407 * 408 * 409 * After: 410 * +--------+ 411 * | Parent | 412 * +--------+ 413 * | | 414 * v +------+ 415 * +---------+ | 416 * | A* +++ | v 417 * +---------+ +-------+ 418 * | B +++ | 419 * +-------+ 420 * 421 * Where A* is a shadow of A. 422 */ 423 static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, 424 unsigned parent_index, uint64_t key) 425 { 426 int r; 427 size_t size; 428 unsigned nr_left, nr_right; 429 struct dm_block *left, *right, *parent; 430 struct btree_node *ln, *rn, *pn; 431 __le64 location; 432 433 left = shadow_current(s); 434 435 r = new_block(s->info, &right); 436 if (r < 0) 437 return r; 438 439 ln = dm_block_data(left); 440 rn = dm_block_data(right); 441 442 nr_left = le32_to_cpu(ln->header.nr_entries) / 2; 443 nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left; 444 445 ln->header.nr_entries = cpu_to_le32(nr_left); 446 447 rn->header.flags = ln->header.flags; 448 rn->header.nr_entries = cpu_to_le32(nr_right); 449 rn->header.max_entries = ln->header.max_entries; 450 rn->header.value_size = ln->header.value_size; 451 memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0])); 452 453 size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ? 454 sizeof(uint64_t) : s->info->value_type.size; 455 memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left), 456 size * nr_right); 457 458 /* 459 * Patch up the parent 460 */ 461 parent = shadow_parent(s); 462 463 pn = dm_block_data(parent); 464 location = cpu_to_le64(dm_block_location(left)); 465 __dm_bless_for_disk(&location); 466 memcpy_disk(value_ptr(pn, parent_index), 467 &location, sizeof(__le64)); 468 469 location = cpu_to_le64(dm_block_location(right)); 470 __dm_bless_for_disk(&location); 471 472 r = insert_at(sizeof(__le64), pn, parent_index + 1, 473 le64_to_cpu(rn->keys[0]), &location); 474 if (r) 475 return r; 476 477 if (key < le64_to_cpu(rn->keys[0])) { 478 unlock_block(s->info, right); 479 s->nodes[1] = left; 480 } else { 481 unlock_block(s->info, left); 482 s->nodes[1] = right; 483 } 484 485 return 0; 486 } 487 488 /* 489 * Splits a node by creating two new children beneath the given node. 490 * 491 * Before: 492 * +----------+ 493 * | A ++++++ | 494 * +----------+ 495 * 496 * 497 * After: 498 * +------------+ 499 * | A (shadow) | 500 * +------------+ 501 * | | 502 * +------+ +----+ 503 * | | 504 * v v 505 * +-------+ +-------+ 506 * | B +++ | | C +++ | 507 * +-------+ +-------+ 508 */ 509 static int btree_split_beneath(struct shadow_spine *s, uint64_t key) 510 { 511 int r; 512 size_t size; 513 unsigned nr_left, nr_right; 514 struct dm_block *left, *right, *new_parent; 515 struct btree_node *pn, *ln, *rn; 516 __le64 val; 517 518 new_parent = shadow_current(s); 519 520 r = new_block(s->info, &left); 521 if (r < 0) 522 return r; 523 524 r = new_block(s->info, &right); 525 if (r < 0) { 526 /* FIXME: put left */ 527 return r; 528 } 529 530 pn = dm_block_data(new_parent); 531 ln = dm_block_data(left); 532 rn = dm_block_data(right); 533 534 nr_left = le32_to_cpu(pn->header.nr_entries) / 2; 535 nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; 536 537 ln->header.flags = pn->header.flags; 538 ln->header.nr_entries = cpu_to_le32(nr_left); 539 ln->header.max_entries = pn->header.max_entries; 540 ln->header.value_size = pn->header.value_size; 541 542 rn->header.flags = pn->header.flags; 543 rn->header.nr_entries = cpu_to_le32(nr_right); 544 rn->header.max_entries = pn->header.max_entries; 545 rn->header.value_size = pn->header.value_size; 546 547 memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); 548 memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); 549 550 size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? 551 sizeof(__le64) : s->info->value_type.size; 552 memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); 553 memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), 554 nr_right * size); 555 556 /* new_parent should just point to l and r now */ 557 pn->header.flags = cpu_to_le32(INTERNAL_NODE); 558 pn->header.nr_entries = cpu_to_le32(2); 559 pn->header.max_entries = cpu_to_le32( 560 calc_max_entries(sizeof(__le64), 561 dm_bm_block_size( 562 dm_tm_get_bm(s->info->tm)))); 563 pn->header.value_size = cpu_to_le32(sizeof(__le64)); 564 565 val = cpu_to_le64(dm_block_location(left)); 566 __dm_bless_for_disk(&val); 567 pn->keys[0] = ln->keys[0]; 568 memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64)); 569 570 val = cpu_to_le64(dm_block_location(right)); 571 __dm_bless_for_disk(&val); 572 pn->keys[1] = rn->keys[0]; 573 memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); 574 575 /* 576 * rejig the spine. This is ugly, since it knows too 577 * much about the spine 578 */ 579 if (s->nodes[0] != new_parent) { 580 unlock_block(s->info, s->nodes[0]); 581 s->nodes[0] = new_parent; 582 } 583 if (key < le64_to_cpu(rn->keys[0])) { 584 unlock_block(s->info, right); 585 s->nodes[1] = left; 586 } else { 587 unlock_block(s->info, left); 588 s->nodes[1] = right; 589 } 590 s->count = 2; 591 592 return 0; 593 } 594 595 static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, 596 struct dm_btree_value_type *vt, 597 uint64_t key, unsigned *index) 598 { 599 int r, i = *index, top = 1; 600 struct btree_node *node; 601 602 for (;;) { 603 r = shadow_step(s, root, vt); 604 if (r < 0) 605 return r; 606 607 node = dm_block_data(shadow_current(s)); 608 609 /* 610 * We have to patch up the parent node, ugly, but I don't 611 * see a way to do this automatically as part of the spine 612 * op. 613 */ 614 if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */ 615 __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); 616 617 __dm_bless_for_disk(&location); 618 memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i), 619 &location, sizeof(__le64)); 620 } 621 622 node = dm_block_data(shadow_current(s)); 623 624 if (node->header.nr_entries == node->header.max_entries) { 625 if (top) 626 r = btree_split_beneath(s, key); 627 else 628 r = btree_split_sibling(s, root, i, key); 629 630 if (r < 0) 631 return r; 632 } 633 634 node = dm_block_data(shadow_current(s)); 635 636 i = lower_bound(node, key); 637 638 if (le32_to_cpu(node->header.flags) & LEAF_NODE) 639 break; 640 641 if (i < 0) { 642 /* change the bounds on the lowest key */ 643 node->keys[0] = cpu_to_le64(key); 644 i = 0; 645 } 646 647 root = value64(node, i); 648 top = 0; 649 } 650 651 if (i < 0 || le64_to_cpu(node->keys[i]) != key) 652 i++; 653 654 *index = i; 655 return 0; 656 } 657 658 static int insert(struct dm_btree_info *info, dm_block_t root, 659 uint64_t *keys, void *value, dm_block_t *new_root, 660 int *inserted) 661 __dm_written_to_disk(value) 662 { 663 int r, need_insert; 664 unsigned level, index = -1, last_level = info->levels - 1; 665 dm_block_t block = root; 666 struct shadow_spine spine; 667 struct btree_node *n; 668 struct dm_btree_value_type le64_type; 669 670 le64_type.context = NULL; 671 le64_type.size = sizeof(__le64); 672 le64_type.inc = NULL; 673 le64_type.dec = NULL; 674 le64_type.equal = NULL; 675 676 init_shadow_spine(&spine, info); 677 678 for (level = 0; level < (info->levels - 1); level++) { 679 r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index); 680 if (r < 0) 681 goto bad; 682 683 n = dm_block_data(shadow_current(&spine)); 684 need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) || 685 (le64_to_cpu(n->keys[index]) != keys[level])); 686 687 if (need_insert) { 688 dm_block_t new_tree; 689 __le64 new_le; 690 691 r = dm_btree_empty(info, &new_tree); 692 if (r < 0) 693 goto bad; 694 695 new_le = cpu_to_le64(new_tree); 696 __dm_bless_for_disk(&new_le); 697 698 r = insert_at(sizeof(uint64_t), n, index, 699 keys[level], &new_le); 700 if (r) 701 goto bad; 702 } 703 704 if (level < last_level) 705 block = value64(n, index); 706 } 707 708 r = btree_insert_raw(&spine, block, &info->value_type, 709 keys[level], &index); 710 if (r < 0) 711 goto bad; 712 713 n = dm_block_data(shadow_current(&spine)); 714 need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) || 715 (le64_to_cpu(n->keys[index]) != keys[level])); 716 717 if (need_insert) { 718 if (inserted) 719 *inserted = 1; 720 721 r = insert_at(info->value_type.size, n, index, 722 keys[level], value); 723 if (r) 724 goto bad_unblessed; 725 } else { 726 if (inserted) 727 *inserted = 0; 728 729 if (info->value_type.dec && 730 (!info->value_type.equal || 731 !info->value_type.equal( 732 info->value_type.context, 733 value_ptr(n, index), 734 value))) { 735 info->value_type.dec(info->value_type.context, 736 value_ptr(n, index)); 737 } 738 memcpy_disk(value_ptr(n, index), 739 value, info->value_type.size); 740 } 741 742 *new_root = shadow_root(&spine); 743 exit_shadow_spine(&spine); 744 745 return 0; 746 747 bad: 748 __dm_unbless_for_disk(value); 749 bad_unblessed: 750 exit_shadow_spine(&spine); 751 return r; 752 } 753 754 int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, 755 uint64_t *keys, void *value, dm_block_t *new_root) 756 __dm_written_to_disk(value) 757 { 758 return insert(info, root, keys, value, new_root, NULL); 759 } 760 EXPORT_SYMBOL_GPL(dm_btree_insert); 761 762 int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, 763 uint64_t *keys, void *value, dm_block_t *new_root, 764 int *inserted) 765 __dm_written_to_disk(value) 766 { 767 return insert(info, root, keys, value, new_root, inserted); 768 } 769 EXPORT_SYMBOL_GPL(dm_btree_insert_notify); 770 771 /*----------------------------------------------------------------*/ 772 773 static int find_highest_key(struct ro_spine *s, dm_block_t block, 774 uint64_t *result_key, dm_block_t *next_block) 775 { 776 int i, r; 777 uint32_t flags; 778 779 do { 780 r = ro_step(s, block); 781 if (r < 0) 782 return r; 783 784 flags = le32_to_cpu(ro_node(s)->header.flags); 785 i = le32_to_cpu(ro_node(s)->header.nr_entries); 786 if (!i) 787 return -ENODATA; 788 else 789 i--; 790 791 *result_key = le64_to_cpu(ro_node(s)->keys[i]); 792 if (next_block || flags & INTERNAL_NODE) 793 block = value64(ro_node(s), i); 794 795 } while (flags & INTERNAL_NODE); 796 797 if (next_block) 798 *next_block = block; 799 return 0; 800 } 801 802 int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, 803 uint64_t *result_keys) 804 { 805 int r = 0, count = 0, level; 806 struct ro_spine spine; 807 808 init_ro_spine(&spine, info); 809 for (level = 0; level < info->levels; level++) { 810 r = find_highest_key(&spine, root, result_keys + level, 811 level == info->levels - 1 ? NULL : &root); 812 if (r == -ENODATA) { 813 r = 0; 814 break; 815 816 } else if (r) 817 break; 818 819 count++; 820 } 821 exit_ro_spine(&spine); 822 823 return r ? r : count; 824 } 825 EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); 826 827 /* 828 * FIXME: We shouldn't use a recursive algorithm when we have limited stack 829 * space. Also this only works for single level trees. 830 */ 831 static int walk_node(struct ro_spine *s, dm_block_t block, 832 int (*fn)(void *context, uint64_t *keys, void *leaf), 833 void *context) 834 { 835 int r; 836 unsigned i, nr; 837 struct btree_node *n; 838 uint64_t keys; 839 840 r = ro_step(s, block); 841 n = ro_node(s); 842 843 nr = le32_to_cpu(n->header.nr_entries); 844 for (i = 0; i < nr; i++) { 845 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { 846 r = walk_node(s, value64(n, i), fn, context); 847 if (r) 848 goto out; 849 } else { 850 keys = le64_to_cpu(*key_ptr(n, i)); 851 r = fn(context, &keys, value_ptr(n, i)); 852 if (r) 853 goto out; 854 } 855 } 856 857 out: 858 ro_pop(s); 859 return r; 860 } 861 862 int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, 863 int (*fn)(void *context, uint64_t *keys, void *leaf), 864 void *context) 865 { 866 int r; 867 struct ro_spine spine; 868 869 BUG_ON(info->levels > 1); 870 871 init_ro_spine(&spine, info); 872 r = walk_node(&spine, root, fn, context); 873 exit_ro_spine(&spine); 874 875 return r; 876 } 877 EXPORT_SYMBOL_GPL(dm_btree_walk); 878