1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-btree-internal.h" 8 #include "dm-space-map.h" 9 #include "dm-transaction-manager.h" 10 11 #include <linux/export.h> 12 #include <linux/device-mapper.h> 13 14 #define DM_MSG_PREFIX "btree" 15 16 /*---------------------------------------------------------------- 17 * Array manipulation 18 *--------------------------------------------------------------*/ 19 static void memcpy_disk(void *dest, const void *src, size_t len) 20 __dm_written_to_disk(src) 21 { 22 memcpy(dest, src, len); 23 __dm_unbless_for_disk(src); 24 } 25 26 static void array_insert(void *base, size_t elt_size, unsigned nr_elts, 27 unsigned index, void *elt) 28 __dm_written_to_disk(elt) 29 { 30 if (index < nr_elts) 31 memmove(base + (elt_size * (index + 1)), 32 base + (elt_size * index), 33 (nr_elts - index) * elt_size); 34 35 memcpy_disk(base + (elt_size * index), elt, elt_size); 36 } 37 38 /*----------------------------------------------------------------*/ 39 40 /* makes the assumption that no two keys are the same. */ 41 static int bsearch(struct btree_node *n, uint64_t key, int want_hi) 42 { 43 int lo = -1, hi = le32_to_cpu(n->header.nr_entries); 44 45 while (hi - lo > 1) { 46 int mid = lo + ((hi - lo) / 2); 47 uint64_t mid_key = le64_to_cpu(n->keys[mid]); 48 49 if (mid_key == key) 50 return mid; 51 52 if (mid_key < key) 53 lo = mid; 54 else 55 hi = mid; 56 } 57 58 return want_hi ? hi : lo; 59 } 60 61 int lower_bound(struct btree_node *n, uint64_t key) 62 { 63 return bsearch(n, key, 0); 64 } 65 66 void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, 67 struct dm_btree_value_type *vt) 68 { 69 unsigned i; 70 uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); 71 72 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) 73 for (i = 0; i < nr_entries; i++) 74 dm_tm_inc(tm, value64(n, i)); 75 else if (vt->inc) 76 for (i = 0; i < nr_entries; i++) 77 vt->inc(vt->context, value_ptr(n, i)); 78 } 79 80 static int insert_at(size_t value_size, struct btree_node *node, unsigned index, 81 uint64_t key, void *value) 82 __dm_written_to_disk(value) 83 { 84 uint32_t nr_entries = le32_to_cpu(node->header.nr_entries); 85 __le64 key_le = cpu_to_le64(key); 86 87 if (index > nr_entries || 88 index >= le32_to_cpu(node->header.max_entries)) { 89 DMERR("too many entries in btree node for insert"); 90 __dm_unbless_for_disk(value); 91 return -ENOMEM; 92 } 93 94 __dm_bless_for_disk(&key_le); 95 96 array_insert(node->keys, sizeof(*node->keys), nr_entries, index, &key_le); 97 array_insert(value_base(node), value_size, nr_entries, index, value); 98 node->header.nr_entries = cpu_to_le32(nr_entries + 1); 99 100 return 0; 101 } 102 103 /*----------------------------------------------------------------*/ 104 105 /* 106 * We want 3n entries (for some n). This works more nicely for repeated 107 * insert remove loops than (2n + 1). 108 */ 109 static uint32_t calc_max_entries(size_t value_size, size_t block_size) 110 { 111 uint32_t total, n; 112 size_t elt_size = sizeof(uint64_t) + value_size; /* key + value */ 113 114 block_size -= sizeof(struct node_header); 115 total = block_size / elt_size; 116 n = total / 3; /* rounds down */ 117 118 return 3 * n; 119 } 120 121 int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) 122 { 123 int r; 124 struct dm_block *b; 125 struct btree_node *n; 126 size_t block_size; 127 uint32_t max_entries; 128 129 r = new_block(info, &b); 130 if (r < 0) 131 return r; 132 133 block_size = dm_bm_block_size(dm_tm_get_bm(info->tm)); 134 max_entries = calc_max_entries(info->value_type.size, block_size); 135 136 n = dm_block_data(b); 137 memset(n, 0, block_size); 138 n->header.flags = cpu_to_le32(LEAF_NODE); 139 n->header.nr_entries = cpu_to_le32(0); 140 n->header.max_entries = cpu_to_le32(max_entries); 141 n->header.value_size = cpu_to_le32(info->value_type.size); 142 143 *root = dm_block_location(b); 144 return unlock_block(info, b); 145 } 146 EXPORT_SYMBOL_GPL(dm_btree_empty); 147 148 /*----------------------------------------------------------------*/ 149 150 /* 151 * Deletion uses a recursive algorithm, since we have limited stack space 152 * we explicitly manage our own stack on the heap. 153 */ 154 #define MAX_SPINE_DEPTH 64 155 struct frame { 156 struct dm_block *b; 157 struct btree_node *n; 158 unsigned level; 159 unsigned nr_children; 160 unsigned current_child; 161 }; 162 163 struct del_stack { 164 struct dm_transaction_manager *tm; 165 int top; 166 struct frame spine[MAX_SPINE_DEPTH]; 167 }; 168 169 static int top_frame(struct del_stack *s, struct frame **f) 170 { 171 if (s->top < 0) { 172 DMERR("btree deletion stack empty"); 173 return -EINVAL; 174 } 175 176 *f = s->spine + s->top; 177 178 return 0; 179 } 180 181 static int unprocessed_frames(struct del_stack *s) 182 { 183 return s->top >= 0; 184 } 185 186 static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) 187 { 188 int r; 189 uint32_t ref_count; 190 191 if (s->top >= MAX_SPINE_DEPTH - 1) { 192 DMERR("btree deletion stack out of memory"); 193 return -ENOMEM; 194 } 195 196 r = dm_tm_ref(s->tm, b, &ref_count); 197 if (r) 198 return r; 199 200 if (ref_count > 1) 201 /* 202 * This is a shared node, so we can just decrement it's 203 * reference counter and leave the children. 204 */ 205 dm_tm_dec(s->tm, b); 206 207 else { 208 struct frame *f = s->spine + ++s->top; 209 210 r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b); 211 if (r) { 212 s->top--; 213 return r; 214 } 215 216 f->n = dm_block_data(f->b); 217 f->level = level; 218 f->nr_children = le32_to_cpu(f->n->header.nr_entries); 219 f->current_child = 0; 220 } 221 222 return 0; 223 } 224 225 static void pop_frame(struct del_stack *s) 226 { 227 struct frame *f = s->spine + s->top--; 228 229 dm_tm_dec(s->tm, dm_block_location(f->b)); 230 dm_tm_unlock(s->tm, f->b); 231 } 232 233 static bool is_internal_level(struct dm_btree_info *info, struct frame *f) 234 { 235 return f->level < (info->levels - 1); 236 } 237 238 int dm_btree_del(struct dm_btree_info *info, dm_block_t root) 239 { 240 int r; 241 struct del_stack *s; 242 243 s = kmalloc(sizeof(*s), GFP_KERNEL); 244 if (!s) 245 return -ENOMEM; 246 s->tm = info->tm; 247 s->top = -1; 248 249 r = push_frame(s, root, 0); 250 if (r) 251 goto out; 252 253 while (unprocessed_frames(s)) { 254 uint32_t flags; 255 struct frame *f; 256 dm_block_t b; 257 258 r = top_frame(s, &f); 259 if (r) 260 goto out; 261 262 if (f->current_child >= f->nr_children) { 263 pop_frame(s); 264 continue; 265 } 266 267 flags = le32_to_cpu(f->n->header.flags); 268 if (flags & INTERNAL_NODE) { 269 b = value64(f->n, f->current_child); 270 f->current_child++; 271 r = push_frame(s, b, f->level); 272 if (r) 273 goto out; 274 275 } else if (is_internal_level(info, f)) { 276 b = value64(f->n, f->current_child); 277 f->current_child++; 278 r = push_frame(s, b, f->level + 1); 279 if (r) 280 goto out; 281 282 } else { 283 if (info->value_type.dec) { 284 unsigned i; 285 286 for (i = 0; i < f->nr_children; i++) 287 info->value_type.dec(info->value_type.context, 288 value_ptr(f->n, i)); 289 } 290 f->current_child = f->nr_children; 291 } 292 } 293 294 out: 295 kfree(s); 296 return r; 297 } 298 EXPORT_SYMBOL_GPL(dm_btree_del); 299 300 /*----------------------------------------------------------------*/ 301 302 static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, 303 int (*search_fn)(struct btree_node *, uint64_t), 304 uint64_t *result_key, void *v, size_t value_size) 305 { 306 int i, r; 307 uint32_t flags, nr_entries; 308 309 do { 310 r = ro_step(s, block); 311 if (r < 0) 312 return r; 313 314 i = search_fn(ro_node(s), key); 315 316 flags = le32_to_cpu(ro_node(s)->header.flags); 317 nr_entries = le32_to_cpu(ro_node(s)->header.nr_entries); 318 if (i < 0 || i >= nr_entries) 319 return -ENODATA; 320 321 if (flags & INTERNAL_NODE) 322 block = value64(ro_node(s), i); 323 324 } while (!(flags & LEAF_NODE)); 325 326 *result_key = le64_to_cpu(ro_node(s)->keys[i]); 327 memcpy(v, value_ptr(ro_node(s), i), value_size); 328 329 return 0; 330 } 331 332 int dm_btree_lookup(struct dm_btree_info *info, dm_block_t root, 333 uint64_t *keys, void *value_le) 334 { 335 unsigned level, last_level = info->levels - 1; 336 int r = -ENODATA; 337 uint64_t rkey; 338 __le64 internal_value_le; 339 struct ro_spine spine; 340 341 init_ro_spine(&spine, info); 342 for (level = 0; level < info->levels; level++) { 343 size_t size; 344 void *value_p; 345 346 if (level == last_level) { 347 value_p = value_le; 348 size = info->value_type.size; 349 350 } else { 351 value_p = &internal_value_le; 352 size = sizeof(uint64_t); 353 } 354 355 r = btree_lookup_raw(&spine, root, keys[level], 356 lower_bound, &rkey, 357 value_p, size); 358 359 if (!r) { 360 if (rkey != keys[level]) { 361 exit_ro_spine(&spine); 362 return -ENODATA; 363 } 364 } else { 365 exit_ro_spine(&spine); 366 return r; 367 } 368 369 root = le64_to_cpu(internal_value_le); 370 } 371 exit_ro_spine(&spine); 372 373 return r; 374 } 375 EXPORT_SYMBOL_GPL(dm_btree_lookup); 376 377 /* 378 * Splits a node by creating a sibling node and shifting half the nodes 379 * contents across. Assumes there is a parent node, and it has room for 380 * another child. 381 * 382 * Before: 383 * +--------+ 384 * | Parent | 385 * +--------+ 386 * | 387 * v 388 * +----------+ 389 * | A ++++++ | 390 * +----------+ 391 * 392 * 393 * After: 394 * +--------+ 395 * | Parent | 396 * +--------+ 397 * | | 398 * v +------+ 399 * +---------+ | 400 * | A* +++ | v 401 * +---------+ +-------+ 402 * | B +++ | 403 * +-------+ 404 * 405 * Where A* is a shadow of A. 406 */ 407 static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, 408 unsigned parent_index, uint64_t key) 409 { 410 int r; 411 size_t size; 412 unsigned nr_left, nr_right; 413 struct dm_block *left, *right, *parent; 414 struct btree_node *ln, *rn, *pn; 415 __le64 location; 416 417 left = shadow_current(s); 418 419 r = new_block(s->info, &right); 420 if (r < 0) 421 return r; 422 423 ln = dm_block_data(left); 424 rn = dm_block_data(right); 425 426 nr_left = le32_to_cpu(ln->header.nr_entries) / 2; 427 nr_right = le32_to_cpu(ln->header.nr_entries) - nr_left; 428 429 ln->header.nr_entries = cpu_to_le32(nr_left); 430 431 rn->header.flags = ln->header.flags; 432 rn->header.nr_entries = cpu_to_le32(nr_right); 433 rn->header.max_entries = ln->header.max_entries; 434 rn->header.value_size = ln->header.value_size; 435 memcpy(rn->keys, ln->keys + nr_left, nr_right * sizeof(rn->keys[0])); 436 437 size = le32_to_cpu(ln->header.flags) & INTERNAL_NODE ? 438 sizeof(uint64_t) : s->info->value_type.size; 439 memcpy(value_ptr(rn, 0), value_ptr(ln, nr_left), 440 size * nr_right); 441 442 /* 443 * Patch up the parent 444 */ 445 parent = shadow_parent(s); 446 447 pn = dm_block_data(parent); 448 location = cpu_to_le64(dm_block_location(left)); 449 __dm_bless_for_disk(&location); 450 memcpy_disk(value_ptr(pn, parent_index), 451 &location, sizeof(__le64)); 452 453 location = cpu_to_le64(dm_block_location(right)); 454 __dm_bless_for_disk(&location); 455 456 r = insert_at(sizeof(__le64), pn, parent_index + 1, 457 le64_to_cpu(rn->keys[0]), &location); 458 if (r) 459 return r; 460 461 if (key < le64_to_cpu(rn->keys[0])) { 462 unlock_block(s->info, right); 463 s->nodes[1] = left; 464 } else { 465 unlock_block(s->info, left); 466 s->nodes[1] = right; 467 } 468 469 return 0; 470 } 471 472 /* 473 * Splits a node by creating two new children beneath the given node. 474 * 475 * Before: 476 * +----------+ 477 * | A ++++++ | 478 * +----------+ 479 * 480 * 481 * After: 482 * +------------+ 483 * | A (shadow) | 484 * +------------+ 485 * | | 486 * +------+ +----+ 487 * | | 488 * v v 489 * +-------+ +-------+ 490 * | B +++ | | C +++ | 491 * +-------+ +-------+ 492 */ 493 static int btree_split_beneath(struct shadow_spine *s, uint64_t key) 494 { 495 int r; 496 size_t size; 497 unsigned nr_left, nr_right; 498 struct dm_block *left, *right, *new_parent; 499 struct btree_node *pn, *ln, *rn; 500 __le64 val; 501 502 new_parent = shadow_current(s); 503 504 r = new_block(s->info, &left); 505 if (r < 0) 506 return r; 507 508 r = new_block(s->info, &right); 509 if (r < 0) { 510 /* FIXME: put left */ 511 return r; 512 } 513 514 pn = dm_block_data(new_parent); 515 ln = dm_block_data(left); 516 rn = dm_block_data(right); 517 518 nr_left = le32_to_cpu(pn->header.nr_entries) / 2; 519 nr_right = le32_to_cpu(pn->header.nr_entries) - nr_left; 520 521 ln->header.flags = pn->header.flags; 522 ln->header.nr_entries = cpu_to_le32(nr_left); 523 ln->header.max_entries = pn->header.max_entries; 524 ln->header.value_size = pn->header.value_size; 525 526 rn->header.flags = pn->header.flags; 527 rn->header.nr_entries = cpu_to_le32(nr_right); 528 rn->header.max_entries = pn->header.max_entries; 529 rn->header.value_size = pn->header.value_size; 530 531 memcpy(ln->keys, pn->keys, nr_left * sizeof(pn->keys[0])); 532 memcpy(rn->keys, pn->keys + nr_left, nr_right * sizeof(pn->keys[0])); 533 534 size = le32_to_cpu(pn->header.flags) & INTERNAL_NODE ? 535 sizeof(__le64) : s->info->value_type.size; 536 memcpy(value_ptr(ln, 0), value_ptr(pn, 0), nr_left * size); 537 memcpy(value_ptr(rn, 0), value_ptr(pn, nr_left), 538 nr_right * size); 539 540 /* new_parent should just point to l and r now */ 541 pn->header.flags = cpu_to_le32(INTERNAL_NODE); 542 pn->header.nr_entries = cpu_to_le32(2); 543 pn->header.max_entries = cpu_to_le32( 544 calc_max_entries(sizeof(__le64), 545 dm_bm_block_size( 546 dm_tm_get_bm(s->info->tm)))); 547 pn->header.value_size = cpu_to_le32(sizeof(__le64)); 548 549 val = cpu_to_le64(dm_block_location(left)); 550 __dm_bless_for_disk(&val); 551 pn->keys[0] = ln->keys[0]; 552 memcpy_disk(value_ptr(pn, 0), &val, sizeof(__le64)); 553 554 val = cpu_to_le64(dm_block_location(right)); 555 __dm_bless_for_disk(&val); 556 pn->keys[1] = rn->keys[0]; 557 memcpy_disk(value_ptr(pn, 1), &val, sizeof(__le64)); 558 559 /* 560 * rejig the spine. This is ugly, since it knows too 561 * much about the spine 562 */ 563 if (s->nodes[0] != new_parent) { 564 unlock_block(s->info, s->nodes[0]); 565 s->nodes[0] = new_parent; 566 } 567 if (key < le64_to_cpu(rn->keys[0])) { 568 unlock_block(s->info, right); 569 s->nodes[1] = left; 570 } else { 571 unlock_block(s->info, left); 572 s->nodes[1] = right; 573 } 574 s->count = 2; 575 576 return 0; 577 } 578 579 static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, 580 struct dm_btree_value_type *vt, 581 uint64_t key, unsigned *index) 582 { 583 int r, i = *index, top = 1; 584 struct btree_node *node; 585 586 for (;;) { 587 r = shadow_step(s, root, vt); 588 if (r < 0) 589 return r; 590 591 node = dm_block_data(shadow_current(s)); 592 593 /* 594 * We have to patch up the parent node, ugly, but I don't 595 * see a way to do this automatically as part of the spine 596 * op. 597 */ 598 if (shadow_has_parent(s) && i >= 0) { /* FIXME: second clause unness. */ 599 __le64 location = cpu_to_le64(dm_block_location(shadow_current(s))); 600 601 __dm_bless_for_disk(&location); 602 memcpy_disk(value_ptr(dm_block_data(shadow_parent(s)), i), 603 &location, sizeof(__le64)); 604 } 605 606 node = dm_block_data(shadow_current(s)); 607 608 if (node->header.nr_entries == node->header.max_entries) { 609 if (top) 610 r = btree_split_beneath(s, key); 611 else 612 r = btree_split_sibling(s, root, i, key); 613 614 if (r < 0) 615 return r; 616 } 617 618 node = dm_block_data(shadow_current(s)); 619 620 i = lower_bound(node, key); 621 622 if (le32_to_cpu(node->header.flags) & LEAF_NODE) 623 break; 624 625 if (i < 0) { 626 /* change the bounds on the lowest key */ 627 node->keys[0] = cpu_to_le64(key); 628 i = 0; 629 } 630 631 root = value64(node, i); 632 top = 0; 633 } 634 635 if (i < 0 || le64_to_cpu(node->keys[i]) != key) 636 i++; 637 638 *index = i; 639 return 0; 640 } 641 642 static int insert(struct dm_btree_info *info, dm_block_t root, 643 uint64_t *keys, void *value, dm_block_t *new_root, 644 int *inserted) 645 __dm_written_to_disk(value) 646 { 647 int r, need_insert; 648 unsigned level, index = -1, last_level = info->levels - 1; 649 dm_block_t block = root; 650 struct shadow_spine spine; 651 struct btree_node *n; 652 struct dm_btree_value_type le64_type; 653 654 le64_type.context = NULL; 655 le64_type.size = sizeof(__le64); 656 le64_type.inc = NULL; 657 le64_type.dec = NULL; 658 le64_type.equal = NULL; 659 660 init_shadow_spine(&spine, info); 661 662 for (level = 0; level < (info->levels - 1); level++) { 663 r = btree_insert_raw(&spine, block, &le64_type, keys[level], &index); 664 if (r < 0) 665 goto bad; 666 667 n = dm_block_data(shadow_current(&spine)); 668 need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) || 669 (le64_to_cpu(n->keys[index]) != keys[level])); 670 671 if (need_insert) { 672 dm_block_t new_tree; 673 __le64 new_le; 674 675 r = dm_btree_empty(info, &new_tree); 676 if (r < 0) 677 goto bad; 678 679 new_le = cpu_to_le64(new_tree); 680 __dm_bless_for_disk(&new_le); 681 682 r = insert_at(sizeof(uint64_t), n, index, 683 keys[level], &new_le); 684 if (r) 685 goto bad; 686 } 687 688 if (level < last_level) 689 block = value64(n, index); 690 } 691 692 r = btree_insert_raw(&spine, block, &info->value_type, 693 keys[level], &index); 694 if (r < 0) 695 goto bad; 696 697 n = dm_block_data(shadow_current(&spine)); 698 need_insert = ((index >= le32_to_cpu(n->header.nr_entries)) || 699 (le64_to_cpu(n->keys[index]) != keys[level])); 700 701 if (need_insert) { 702 if (inserted) 703 *inserted = 1; 704 705 r = insert_at(info->value_type.size, n, index, 706 keys[level], value); 707 if (r) 708 goto bad_unblessed; 709 } else { 710 if (inserted) 711 *inserted = 0; 712 713 if (info->value_type.dec && 714 (!info->value_type.equal || 715 !info->value_type.equal( 716 info->value_type.context, 717 value_ptr(n, index), 718 value))) { 719 info->value_type.dec(info->value_type.context, 720 value_ptr(n, index)); 721 } 722 memcpy_disk(value_ptr(n, index), 723 value, info->value_type.size); 724 } 725 726 *new_root = shadow_root(&spine); 727 exit_shadow_spine(&spine); 728 729 return 0; 730 731 bad: 732 __dm_unbless_for_disk(value); 733 bad_unblessed: 734 exit_shadow_spine(&spine); 735 return r; 736 } 737 738 int dm_btree_insert(struct dm_btree_info *info, dm_block_t root, 739 uint64_t *keys, void *value, dm_block_t *new_root) 740 __dm_written_to_disk(value) 741 { 742 return insert(info, root, keys, value, new_root, NULL); 743 } 744 EXPORT_SYMBOL_GPL(dm_btree_insert); 745 746 int dm_btree_insert_notify(struct dm_btree_info *info, dm_block_t root, 747 uint64_t *keys, void *value, dm_block_t *new_root, 748 int *inserted) 749 __dm_written_to_disk(value) 750 { 751 return insert(info, root, keys, value, new_root, inserted); 752 } 753 EXPORT_SYMBOL_GPL(dm_btree_insert_notify); 754 755 /*----------------------------------------------------------------*/ 756 757 static int find_highest_key(struct ro_spine *s, dm_block_t block, 758 uint64_t *result_key, dm_block_t *next_block) 759 { 760 int i, r; 761 uint32_t flags; 762 763 do { 764 r = ro_step(s, block); 765 if (r < 0) 766 return r; 767 768 flags = le32_to_cpu(ro_node(s)->header.flags); 769 i = le32_to_cpu(ro_node(s)->header.nr_entries); 770 if (!i) 771 return -ENODATA; 772 else 773 i--; 774 775 *result_key = le64_to_cpu(ro_node(s)->keys[i]); 776 if (next_block || flags & INTERNAL_NODE) 777 block = value64(ro_node(s), i); 778 779 } while (flags & INTERNAL_NODE); 780 781 if (next_block) 782 *next_block = block; 783 return 0; 784 } 785 786 int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, 787 uint64_t *result_keys) 788 { 789 int r = 0, count = 0, level; 790 struct ro_spine spine; 791 792 init_ro_spine(&spine, info); 793 for (level = 0; level < info->levels; level++) { 794 r = find_highest_key(&spine, root, result_keys + level, 795 level == info->levels - 1 ? NULL : &root); 796 if (r == -ENODATA) { 797 r = 0; 798 break; 799 800 } else if (r) 801 break; 802 803 count++; 804 } 805 exit_ro_spine(&spine); 806 807 return r ? r : count; 808 } 809 EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); 810 811 /* 812 * FIXME: We shouldn't use a recursive algorithm when we have limited stack 813 * space. Also this only works for single level trees. 814 */ 815 static int walk_node(struct ro_spine *s, dm_block_t block, 816 int (*fn)(void *context, uint64_t *keys, void *leaf), 817 void *context) 818 { 819 int r; 820 unsigned i, nr; 821 struct btree_node *n; 822 uint64_t keys; 823 824 r = ro_step(s, block); 825 n = ro_node(s); 826 827 nr = le32_to_cpu(n->header.nr_entries); 828 for (i = 0; i < nr; i++) { 829 if (le32_to_cpu(n->header.flags) & INTERNAL_NODE) { 830 r = walk_node(s, value64(n, i), fn, context); 831 if (r) 832 goto out; 833 } else { 834 keys = le64_to_cpu(*key_ptr(n, i)); 835 r = fn(context, &keys, value_ptr(n, i)); 836 if (r) 837 goto out; 838 } 839 } 840 841 out: 842 ro_pop(s); 843 return r; 844 } 845 846 int dm_btree_walk(struct dm_btree_info *info, dm_block_t root, 847 int (*fn)(void *context, uint64_t *keys, void *leaf), 848 void *context) 849 { 850 int r; 851 struct ro_spine spine; 852 853 BUG_ON(info->levels > 1); 854 855 init_ro_spine(&spine, info); 856 r = walk_node(&spine, root, fn, context); 857 exit_ro_spine(&spine); 858 859 return r; 860 } 861 EXPORT_SYMBOL_GPL(dm_btree_walk); 862