1 /* 2 * btree.c - NILFS B-tree. 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * Written by Koji Sato. 17 */ 18 19 #include <linux/slab.h> 20 #include <linux/string.h> 21 #include <linux/errno.h> 22 #include <linux/pagevec.h> 23 #include "nilfs.h" 24 #include "page.h" 25 #include "btnode.h" 26 #include "btree.h" 27 #include "alloc.h" 28 #include "dat.h" 29 30 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 31 32 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 33 { 34 struct nilfs_btree_path *path; 35 int level = NILFS_BTREE_LEVEL_DATA; 36 37 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 38 if (path == NULL) 39 goto out; 40 41 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 42 path[level].bp_bh = NULL; 43 path[level].bp_sib_bh = NULL; 44 path[level].bp_index = 0; 45 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 46 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 47 path[level].bp_op = NULL; 48 } 49 50 out: 51 return path; 52 } 53 54 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 55 { 56 int level = NILFS_BTREE_LEVEL_DATA; 57 58 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 59 brelse(path[level].bp_bh); 60 61 kmem_cache_free(nilfs_btree_path_cache, path); 62 } 63 64 /* 65 * B-tree node operations 66 */ 67 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 68 __u64 ptr, struct buffer_head **bhp) 69 { 70 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 71 struct buffer_head *bh; 72 73 bh = nilfs_btnode_create_block(btnc, ptr); 74 if (!bh) 75 return -ENOMEM; 76 77 set_buffer_nilfs_volatile(bh); 78 *bhp = bh; 79 return 0; 80 } 81 82 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 83 { 84 return node->bn_flags; 85 } 86 87 static void 88 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 89 { 90 node->bn_flags = flags; 91 } 92 93 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 94 { 95 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 96 } 97 98 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 99 { 100 return node->bn_level; 101 } 102 103 static void 104 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 105 { 106 node->bn_level = level; 107 } 108 109 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 110 { 111 return le16_to_cpu(node->bn_nchildren); 112 } 113 114 static void 115 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 116 { 117 node->bn_nchildren = cpu_to_le16(nchildren); 118 } 119 120 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 121 { 122 return 1 << btree->b_inode->i_blkbits; 123 } 124 125 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 126 { 127 return btree->b_nchildren_per_block; 128 } 129 130 static __le64 * 131 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 132 { 133 return (__le64 *)((char *)(node + 1) + 134 (nilfs_btree_node_root(node) ? 135 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 136 } 137 138 static __le64 * 139 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 140 { 141 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 142 } 143 144 static __u64 145 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 146 { 147 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 148 } 149 150 static void 151 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 152 { 153 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 154 } 155 156 static __u64 157 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 158 int ncmax) 159 { 160 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 161 } 162 163 static void 164 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 165 int ncmax) 166 { 167 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 168 } 169 170 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 171 int level, int nchildren, int ncmax, 172 const __u64 *keys, const __u64 *ptrs) 173 { 174 __le64 *dkeys; 175 __le64 *dptrs; 176 int i; 177 178 nilfs_btree_node_set_flags(node, flags); 179 nilfs_btree_node_set_level(node, level); 180 nilfs_btree_node_set_nchildren(node, nchildren); 181 182 dkeys = nilfs_btree_node_dkeys(node); 183 dptrs = nilfs_btree_node_dptrs(node, ncmax); 184 for (i = 0; i < nchildren; i++) { 185 dkeys[i] = cpu_to_le64(keys[i]); 186 dptrs[i] = cpu_to_le64(ptrs[i]); 187 } 188 } 189 190 /* Assume the buffer heads corresponding to left and right are locked. */ 191 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 192 struct nilfs_btree_node *right, 193 int n, int lncmax, int rncmax) 194 { 195 __le64 *ldkeys, *rdkeys; 196 __le64 *ldptrs, *rdptrs; 197 int lnchildren, rnchildren; 198 199 ldkeys = nilfs_btree_node_dkeys(left); 200 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 201 lnchildren = nilfs_btree_node_get_nchildren(left); 202 203 rdkeys = nilfs_btree_node_dkeys(right); 204 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 205 rnchildren = nilfs_btree_node_get_nchildren(right); 206 207 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 208 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 209 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 210 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 211 212 lnchildren += n; 213 rnchildren -= n; 214 nilfs_btree_node_set_nchildren(left, lnchildren); 215 nilfs_btree_node_set_nchildren(right, rnchildren); 216 } 217 218 /* Assume that the buffer heads corresponding to left and right are locked. */ 219 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 220 struct nilfs_btree_node *right, 221 int n, int lncmax, int rncmax) 222 { 223 __le64 *ldkeys, *rdkeys; 224 __le64 *ldptrs, *rdptrs; 225 int lnchildren, rnchildren; 226 227 ldkeys = nilfs_btree_node_dkeys(left); 228 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 229 lnchildren = nilfs_btree_node_get_nchildren(left); 230 231 rdkeys = nilfs_btree_node_dkeys(right); 232 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 233 rnchildren = nilfs_btree_node_get_nchildren(right); 234 235 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 236 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 237 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 238 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 239 240 lnchildren -= n; 241 rnchildren += n; 242 nilfs_btree_node_set_nchildren(left, lnchildren); 243 nilfs_btree_node_set_nchildren(right, rnchildren); 244 } 245 246 /* Assume that the buffer head corresponding to node is locked. */ 247 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 248 __u64 key, __u64 ptr, int ncmax) 249 { 250 __le64 *dkeys; 251 __le64 *dptrs; 252 int nchildren; 253 254 dkeys = nilfs_btree_node_dkeys(node); 255 dptrs = nilfs_btree_node_dptrs(node, ncmax); 256 nchildren = nilfs_btree_node_get_nchildren(node); 257 if (index < nchildren) { 258 memmove(dkeys + index + 1, dkeys + index, 259 (nchildren - index) * sizeof(*dkeys)); 260 memmove(dptrs + index + 1, dptrs + index, 261 (nchildren - index) * sizeof(*dptrs)); 262 } 263 dkeys[index] = cpu_to_le64(key); 264 dptrs[index] = cpu_to_le64(ptr); 265 nchildren++; 266 nilfs_btree_node_set_nchildren(node, nchildren); 267 } 268 269 /* Assume that the buffer head corresponding to node is locked. */ 270 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 271 __u64 *keyp, __u64 *ptrp, int ncmax) 272 { 273 __u64 key; 274 __u64 ptr; 275 __le64 *dkeys; 276 __le64 *dptrs; 277 int nchildren; 278 279 dkeys = nilfs_btree_node_dkeys(node); 280 dptrs = nilfs_btree_node_dptrs(node, ncmax); 281 key = le64_to_cpu(dkeys[index]); 282 ptr = le64_to_cpu(dptrs[index]); 283 nchildren = nilfs_btree_node_get_nchildren(node); 284 if (keyp != NULL) 285 *keyp = key; 286 if (ptrp != NULL) 287 *ptrp = ptr; 288 289 if (index < nchildren - 1) { 290 memmove(dkeys + index, dkeys + index + 1, 291 (nchildren - index - 1) * sizeof(*dkeys)); 292 memmove(dptrs + index, dptrs + index + 1, 293 (nchildren - index - 1) * sizeof(*dptrs)); 294 } 295 nchildren--; 296 nilfs_btree_node_set_nchildren(node, nchildren); 297 } 298 299 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 300 __u64 key, int *indexp) 301 { 302 __u64 nkey; 303 int index, low, high, s; 304 305 /* binary search */ 306 low = 0; 307 high = nilfs_btree_node_get_nchildren(node) - 1; 308 index = 0; 309 s = 0; 310 while (low <= high) { 311 index = (low + high) / 2; 312 nkey = nilfs_btree_node_get_key(node, index); 313 if (nkey == key) { 314 s = 0; 315 goto out; 316 } else if (nkey < key) { 317 low = index + 1; 318 s = -1; 319 } else { 320 high = index - 1; 321 s = 1; 322 } 323 } 324 325 /* adjust index */ 326 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 327 if (s > 0 && index > 0) 328 index--; 329 } else if (s < 0) 330 index++; 331 332 out: 333 *indexp = index; 334 335 return s == 0; 336 } 337 338 /** 339 * nilfs_btree_node_broken - verify consistency of btree node 340 * @node: btree node block to be examined 341 * @size: node size (in bytes) 342 * @blocknr: block number 343 * 344 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 345 */ 346 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 347 size_t size, sector_t blocknr) 348 { 349 int level, flags, nchildren; 350 int ret = 0; 351 352 level = nilfs_btree_node_get_level(node); 353 flags = nilfs_btree_node_get_flags(node); 354 nchildren = nilfs_btree_node_get_nchildren(node); 355 356 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 357 level >= NILFS_BTREE_LEVEL_MAX || 358 (flags & NILFS_BTREE_NODE_ROOT) || 359 nchildren < 0 || 360 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 361 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): " 362 "level = %d, flags = 0x%x, nchildren = %d\n", 363 (unsigned long long)blocknr, level, flags, nchildren); 364 ret = 1; 365 } 366 return ret; 367 } 368 369 /** 370 * nilfs_btree_root_broken - verify consistency of btree root node 371 * @node: btree root node to be examined 372 * @ino: inode number 373 * 374 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 375 */ 376 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 377 unsigned long ino) 378 { 379 int level, flags, nchildren; 380 int ret = 0; 381 382 level = nilfs_btree_node_get_level(node); 383 flags = nilfs_btree_node_get_flags(node); 384 nchildren = nilfs_btree_node_get_nchildren(node); 385 386 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 387 level >= NILFS_BTREE_LEVEL_MAX || 388 nchildren < 0 || 389 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 390 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", 391 ino, level, flags, nchildren); 392 ret = 1; 393 } 394 return ret; 395 } 396 397 int nilfs_btree_broken_node_block(struct buffer_head *bh) 398 { 399 int ret; 400 401 if (buffer_nilfs_checked(bh)) 402 return 0; 403 404 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 405 bh->b_size, bh->b_blocknr); 406 if (likely(!ret)) 407 set_buffer_nilfs_checked(bh); 408 return ret; 409 } 410 411 static struct nilfs_btree_node * 412 nilfs_btree_get_root(const struct nilfs_bmap *btree) 413 { 414 return (struct nilfs_btree_node *)btree->b_u.u_data; 415 } 416 417 static struct nilfs_btree_node * 418 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 419 { 420 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 421 } 422 423 static struct nilfs_btree_node * 424 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 425 { 426 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 427 } 428 429 static int nilfs_btree_height(const struct nilfs_bmap *btree) 430 { 431 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 432 } 433 434 static struct nilfs_btree_node * 435 nilfs_btree_get_node(const struct nilfs_bmap *btree, 436 const struct nilfs_btree_path *path, 437 int level, int *ncmaxp) 438 { 439 struct nilfs_btree_node *node; 440 441 if (level == nilfs_btree_height(btree) - 1) { 442 node = nilfs_btree_get_root(btree); 443 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 444 } else { 445 node = nilfs_btree_get_nonroot_node(path, level); 446 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 447 } 448 return node; 449 } 450 451 static int 452 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) 453 { 454 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 455 dump_stack(); 456 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n", 457 nilfs_btree_node_get_level(node), level); 458 return 1; 459 } 460 return 0; 461 } 462 463 struct nilfs_btree_readahead_info { 464 struct nilfs_btree_node *node; /* parent node */ 465 int max_ra_blocks; /* max nof blocks to read ahead */ 466 int index; /* current index on the parent node */ 467 int ncmax; /* nof children in the parent node */ 468 }; 469 470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 471 struct buffer_head **bhp, 472 const struct nilfs_btree_readahead_info *ra) 473 { 474 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 475 struct buffer_head *bh, *ra_bh; 476 sector_t submit_ptr = 0; 477 int ret; 478 479 ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr); 480 if (ret) { 481 if (ret != -EEXIST) 482 return ret; 483 goto out_check; 484 } 485 486 if (ra) { 487 int i, n; 488 __u64 ptr2; 489 490 /* read ahead sibling nodes */ 491 for (n = ra->max_ra_blocks, i = ra->index + 1; 492 n > 0 && i < ra->ncmax; n--, i++) { 493 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 494 495 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA, 496 &ra_bh, &submit_ptr); 497 if (likely(!ret || ret == -EEXIST)) 498 brelse(ra_bh); 499 else if (ret != -EBUSY) 500 break; 501 if (!buffer_locked(bh)) 502 goto out_no_wait; 503 } 504 } 505 506 wait_on_buffer(bh); 507 508 out_no_wait: 509 if (!buffer_uptodate(bh)) { 510 brelse(bh); 511 return -EIO; 512 } 513 514 out_check: 515 if (nilfs_btree_broken_node_block(bh)) { 516 clear_buffer_uptodate(bh); 517 brelse(bh); 518 return -EINVAL; 519 } 520 521 *bhp = bh; 522 return 0; 523 } 524 525 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 526 struct buffer_head **bhp) 527 { 528 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 529 } 530 531 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 532 struct nilfs_btree_path *path, 533 __u64 key, __u64 *ptrp, int minlevel, 534 int readahead) 535 { 536 struct nilfs_btree_node *node; 537 struct nilfs_btree_readahead_info p, *ra; 538 __u64 ptr; 539 int level, index, found, ncmax, ret; 540 541 node = nilfs_btree_get_root(btree); 542 level = nilfs_btree_node_get_level(node); 543 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 544 return -ENOENT; 545 546 found = nilfs_btree_node_lookup(node, key, &index); 547 ptr = nilfs_btree_node_get_ptr(node, index, 548 NILFS_BTREE_ROOT_NCHILDREN_MAX); 549 path[level].bp_bh = NULL; 550 path[level].bp_index = index; 551 552 ncmax = nilfs_btree_nchildren_per_block(btree); 553 554 while (--level >= minlevel) { 555 ra = NULL; 556 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 557 p.node = nilfs_btree_get_node(btree, path, level + 1, 558 &p.ncmax); 559 p.index = index; 560 p.max_ra_blocks = 7; 561 ra = &p; 562 } 563 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 564 ra); 565 if (ret < 0) 566 return ret; 567 568 node = nilfs_btree_get_nonroot_node(path, level); 569 if (nilfs_btree_bad_node(node, level)) 570 return -EINVAL; 571 if (!found) 572 found = nilfs_btree_node_lookup(node, key, &index); 573 else 574 index = 0; 575 if (index < ncmax) { 576 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 577 } else { 578 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 579 /* insert */ 580 ptr = NILFS_BMAP_INVALID_PTR; 581 } 582 path[level].bp_index = index; 583 } 584 if (!found) 585 return -ENOENT; 586 587 if (ptrp != NULL) 588 *ptrp = ptr; 589 590 return 0; 591 } 592 593 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 594 struct nilfs_btree_path *path, 595 __u64 *keyp, __u64 *ptrp) 596 { 597 struct nilfs_btree_node *node; 598 __u64 ptr; 599 int index, level, ncmax, ret; 600 601 node = nilfs_btree_get_root(btree); 602 index = nilfs_btree_node_get_nchildren(node) - 1; 603 if (index < 0) 604 return -ENOENT; 605 level = nilfs_btree_node_get_level(node); 606 ptr = nilfs_btree_node_get_ptr(node, index, 607 NILFS_BTREE_ROOT_NCHILDREN_MAX); 608 path[level].bp_bh = NULL; 609 path[level].bp_index = index; 610 ncmax = nilfs_btree_nchildren_per_block(btree); 611 612 for (level--; level > 0; level--) { 613 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 614 if (ret < 0) 615 return ret; 616 node = nilfs_btree_get_nonroot_node(path, level); 617 if (nilfs_btree_bad_node(node, level)) 618 return -EINVAL; 619 index = nilfs_btree_node_get_nchildren(node) - 1; 620 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 621 path[level].bp_index = index; 622 } 623 624 if (keyp != NULL) 625 *keyp = nilfs_btree_node_get_key(node, index); 626 if (ptrp != NULL) 627 *ptrp = ptr; 628 629 return 0; 630 } 631 632 /** 633 * nilfs_btree_get_next_key - get next valid key from btree path array 634 * @btree: bmap struct of btree 635 * @path: array of nilfs_btree_path struct 636 * @minlevel: start level 637 * @nextkey: place to store the next valid key 638 * 639 * Return Value: If a next key was found, 0 is returned. Otherwise, 640 * -ENOENT is returned. 641 */ 642 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 643 const struct nilfs_btree_path *path, 644 int minlevel, __u64 *nextkey) 645 { 646 struct nilfs_btree_node *node; 647 int maxlevel = nilfs_btree_height(btree) - 1; 648 int index, next_adj, level; 649 650 /* Next index is already set to bp_index for leaf nodes. */ 651 next_adj = 0; 652 for (level = minlevel; level <= maxlevel; level++) { 653 if (level == maxlevel) 654 node = nilfs_btree_get_root(btree); 655 else 656 node = nilfs_btree_get_nonroot_node(path, level); 657 658 index = path[level].bp_index + next_adj; 659 if (index < nilfs_btree_node_get_nchildren(node)) { 660 /* Next key is in this node */ 661 *nextkey = nilfs_btree_node_get_key(node, index); 662 return 0; 663 } 664 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 665 next_adj = 1; 666 } 667 return -ENOENT; 668 } 669 670 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 671 __u64 key, int level, __u64 *ptrp) 672 { 673 struct nilfs_btree_path *path; 674 int ret; 675 676 path = nilfs_btree_alloc_path(); 677 if (path == NULL) 678 return -ENOMEM; 679 680 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 681 682 nilfs_btree_free_path(path); 683 684 return ret; 685 } 686 687 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 688 __u64 key, __u64 *ptrp, 689 unsigned int maxblocks) 690 { 691 struct nilfs_btree_path *path; 692 struct nilfs_btree_node *node; 693 struct inode *dat = NULL; 694 __u64 ptr, ptr2; 695 sector_t blocknr; 696 int level = NILFS_BTREE_LEVEL_NODE_MIN; 697 int ret, cnt, index, maxlevel, ncmax; 698 struct nilfs_btree_readahead_info p; 699 700 path = nilfs_btree_alloc_path(); 701 if (path == NULL) 702 return -ENOMEM; 703 704 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 705 if (ret < 0) 706 goto out; 707 708 if (NILFS_BMAP_USE_VBN(btree)) { 709 dat = nilfs_bmap_get_dat(btree); 710 ret = nilfs_dat_translate(dat, ptr, &blocknr); 711 if (ret < 0) 712 goto out; 713 ptr = blocknr; 714 } 715 cnt = 1; 716 if (cnt == maxblocks) 717 goto end; 718 719 maxlevel = nilfs_btree_height(btree) - 1; 720 node = nilfs_btree_get_node(btree, path, level, &ncmax); 721 index = path[level].bp_index + 1; 722 for (;;) { 723 while (index < nilfs_btree_node_get_nchildren(node)) { 724 if (nilfs_btree_node_get_key(node, index) != 725 key + cnt) 726 goto end; 727 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 728 if (dat) { 729 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 730 if (ret < 0) 731 goto out; 732 ptr2 = blocknr; 733 } 734 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 735 goto end; 736 index++; 737 continue; 738 } 739 if (level == maxlevel) 740 break; 741 742 /* look-up right sibling node */ 743 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 744 p.index = path[level + 1].bp_index + 1; 745 p.max_ra_blocks = 7; 746 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 747 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 748 break; 749 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 750 path[level + 1].bp_index = p.index; 751 752 brelse(path[level].bp_bh); 753 path[level].bp_bh = NULL; 754 755 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 756 &p); 757 if (ret < 0) 758 goto out; 759 node = nilfs_btree_get_nonroot_node(path, level); 760 ncmax = nilfs_btree_nchildren_per_block(btree); 761 index = 0; 762 path[level].bp_index = index; 763 } 764 end: 765 *ptrp = ptr; 766 ret = cnt; 767 out: 768 nilfs_btree_free_path(path); 769 return ret; 770 } 771 772 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 773 struct nilfs_btree_path *path, 774 int level, __u64 key) 775 { 776 if (level < nilfs_btree_height(btree) - 1) { 777 do { 778 nilfs_btree_node_set_key( 779 nilfs_btree_get_nonroot_node(path, level), 780 path[level].bp_index, key); 781 if (!buffer_dirty(path[level].bp_bh)) 782 mark_buffer_dirty(path[level].bp_bh); 783 } while ((path[level].bp_index == 0) && 784 (++level < nilfs_btree_height(btree) - 1)); 785 } 786 787 /* root */ 788 if (level == nilfs_btree_height(btree) - 1) { 789 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 790 path[level].bp_index, key); 791 } 792 } 793 794 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 795 struct nilfs_btree_path *path, 796 int level, __u64 *keyp, __u64 *ptrp) 797 { 798 struct nilfs_btree_node *node; 799 int ncblk; 800 801 if (level < nilfs_btree_height(btree) - 1) { 802 node = nilfs_btree_get_nonroot_node(path, level); 803 ncblk = nilfs_btree_nchildren_per_block(btree); 804 nilfs_btree_node_insert(node, path[level].bp_index, 805 *keyp, *ptrp, ncblk); 806 if (!buffer_dirty(path[level].bp_bh)) 807 mark_buffer_dirty(path[level].bp_bh); 808 809 if (path[level].bp_index == 0) 810 nilfs_btree_promote_key(btree, path, level + 1, 811 nilfs_btree_node_get_key(node, 812 0)); 813 } else { 814 node = nilfs_btree_get_root(btree); 815 nilfs_btree_node_insert(node, path[level].bp_index, 816 *keyp, *ptrp, 817 NILFS_BTREE_ROOT_NCHILDREN_MAX); 818 } 819 } 820 821 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 822 struct nilfs_btree_path *path, 823 int level, __u64 *keyp, __u64 *ptrp) 824 { 825 struct nilfs_btree_node *node, *left; 826 int nchildren, lnchildren, n, move, ncblk; 827 828 node = nilfs_btree_get_nonroot_node(path, level); 829 left = nilfs_btree_get_sib_node(path, level); 830 nchildren = nilfs_btree_node_get_nchildren(node); 831 lnchildren = nilfs_btree_node_get_nchildren(left); 832 ncblk = nilfs_btree_nchildren_per_block(btree); 833 move = 0; 834 835 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 836 if (n > path[level].bp_index) { 837 /* move insert point */ 838 n--; 839 move = 1; 840 } 841 842 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 843 844 if (!buffer_dirty(path[level].bp_bh)) 845 mark_buffer_dirty(path[level].bp_bh); 846 if (!buffer_dirty(path[level].bp_sib_bh)) 847 mark_buffer_dirty(path[level].bp_sib_bh); 848 849 nilfs_btree_promote_key(btree, path, level + 1, 850 nilfs_btree_node_get_key(node, 0)); 851 852 if (move) { 853 brelse(path[level].bp_bh); 854 path[level].bp_bh = path[level].bp_sib_bh; 855 path[level].bp_sib_bh = NULL; 856 path[level].bp_index += lnchildren; 857 path[level + 1].bp_index--; 858 } else { 859 brelse(path[level].bp_sib_bh); 860 path[level].bp_sib_bh = NULL; 861 path[level].bp_index -= n; 862 } 863 864 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 865 } 866 867 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 868 struct nilfs_btree_path *path, 869 int level, __u64 *keyp, __u64 *ptrp) 870 { 871 struct nilfs_btree_node *node, *right; 872 int nchildren, rnchildren, n, move, ncblk; 873 874 node = nilfs_btree_get_nonroot_node(path, level); 875 right = nilfs_btree_get_sib_node(path, level); 876 nchildren = nilfs_btree_node_get_nchildren(node); 877 rnchildren = nilfs_btree_node_get_nchildren(right); 878 ncblk = nilfs_btree_nchildren_per_block(btree); 879 move = 0; 880 881 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 882 if (n > nchildren - path[level].bp_index) { 883 /* move insert point */ 884 n--; 885 move = 1; 886 } 887 888 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 889 890 if (!buffer_dirty(path[level].bp_bh)) 891 mark_buffer_dirty(path[level].bp_bh); 892 if (!buffer_dirty(path[level].bp_sib_bh)) 893 mark_buffer_dirty(path[level].bp_sib_bh); 894 895 path[level + 1].bp_index++; 896 nilfs_btree_promote_key(btree, path, level + 1, 897 nilfs_btree_node_get_key(right, 0)); 898 path[level + 1].bp_index--; 899 900 if (move) { 901 brelse(path[level].bp_bh); 902 path[level].bp_bh = path[level].bp_sib_bh; 903 path[level].bp_sib_bh = NULL; 904 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 905 path[level + 1].bp_index++; 906 } else { 907 brelse(path[level].bp_sib_bh); 908 path[level].bp_sib_bh = NULL; 909 } 910 911 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 912 } 913 914 static void nilfs_btree_split(struct nilfs_bmap *btree, 915 struct nilfs_btree_path *path, 916 int level, __u64 *keyp, __u64 *ptrp) 917 { 918 struct nilfs_btree_node *node, *right; 919 int nchildren, n, move, ncblk; 920 921 node = nilfs_btree_get_nonroot_node(path, level); 922 right = nilfs_btree_get_sib_node(path, level); 923 nchildren = nilfs_btree_node_get_nchildren(node); 924 ncblk = nilfs_btree_nchildren_per_block(btree); 925 move = 0; 926 927 n = (nchildren + 1) / 2; 928 if (n > nchildren - path[level].bp_index) { 929 n--; 930 move = 1; 931 } 932 933 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 934 935 if (!buffer_dirty(path[level].bp_bh)) 936 mark_buffer_dirty(path[level].bp_bh); 937 if (!buffer_dirty(path[level].bp_sib_bh)) 938 mark_buffer_dirty(path[level].bp_sib_bh); 939 940 if (move) { 941 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 942 nilfs_btree_node_insert(right, path[level].bp_index, 943 *keyp, *ptrp, ncblk); 944 945 *keyp = nilfs_btree_node_get_key(right, 0); 946 *ptrp = path[level].bp_newreq.bpr_ptr; 947 948 brelse(path[level].bp_bh); 949 path[level].bp_bh = path[level].bp_sib_bh; 950 path[level].bp_sib_bh = NULL; 951 } else { 952 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 953 954 *keyp = nilfs_btree_node_get_key(right, 0); 955 *ptrp = path[level].bp_newreq.bpr_ptr; 956 957 brelse(path[level].bp_sib_bh); 958 path[level].bp_sib_bh = NULL; 959 } 960 961 path[level + 1].bp_index++; 962 } 963 964 static void nilfs_btree_grow(struct nilfs_bmap *btree, 965 struct nilfs_btree_path *path, 966 int level, __u64 *keyp, __u64 *ptrp) 967 { 968 struct nilfs_btree_node *root, *child; 969 int n, ncblk; 970 971 root = nilfs_btree_get_root(btree); 972 child = nilfs_btree_get_sib_node(path, level); 973 ncblk = nilfs_btree_nchildren_per_block(btree); 974 975 n = nilfs_btree_node_get_nchildren(root); 976 977 nilfs_btree_node_move_right(root, child, n, 978 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 979 nilfs_btree_node_set_level(root, level + 1); 980 981 if (!buffer_dirty(path[level].bp_sib_bh)) 982 mark_buffer_dirty(path[level].bp_sib_bh); 983 984 path[level].bp_bh = path[level].bp_sib_bh; 985 path[level].bp_sib_bh = NULL; 986 987 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 988 989 *keyp = nilfs_btree_node_get_key(child, 0); 990 *ptrp = path[level].bp_newreq.bpr_ptr; 991 } 992 993 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 994 const struct nilfs_btree_path *path) 995 { 996 struct nilfs_btree_node *node; 997 int level, ncmax; 998 999 if (path == NULL) 1000 return NILFS_BMAP_INVALID_PTR; 1001 1002 /* left sibling */ 1003 level = NILFS_BTREE_LEVEL_NODE_MIN; 1004 if (path[level].bp_index > 0) { 1005 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1006 return nilfs_btree_node_get_ptr(node, 1007 path[level].bp_index - 1, 1008 ncmax); 1009 } 1010 1011 /* parent */ 1012 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1013 if (level <= nilfs_btree_height(btree) - 1) { 1014 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1015 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1016 ncmax); 1017 } 1018 1019 return NILFS_BMAP_INVALID_PTR; 1020 } 1021 1022 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1023 const struct nilfs_btree_path *path, 1024 __u64 key) 1025 { 1026 __u64 ptr; 1027 1028 ptr = nilfs_bmap_find_target_seq(btree, key); 1029 if (ptr != NILFS_BMAP_INVALID_PTR) 1030 /* sequential access */ 1031 return ptr; 1032 1033 ptr = nilfs_btree_find_near(btree, path); 1034 if (ptr != NILFS_BMAP_INVALID_PTR) 1035 /* near */ 1036 return ptr; 1037 1038 /* block group */ 1039 return nilfs_bmap_find_target_in_group(btree); 1040 } 1041 1042 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1043 struct nilfs_btree_path *path, 1044 int *levelp, __u64 key, __u64 ptr, 1045 struct nilfs_bmap_stats *stats) 1046 { 1047 struct buffer_head *bh; 1048 struct nilfs_btree_node *node, *parent, *sib; 1049 __u64 sibptr; 1050 int pindex, level, ncmax, ncblk, ret; 1051 struct inode *dat = NULL; 1052 1053 stats->bs_nblocks = 0; 1054 level = NILFS_BTREE_LEVEL_DATA; 1055 1056 /* allocate a new ptr for data block */ 1057 if (NILFS_BMAP_USE_VBN(btree)) { 1058 path[level].bp_newreq.bpr_ptr = 1059 nilfs_btree_find_target_v(btree, path, key); 1060 dat = nilfs_bmap_get_dat(btree); 1061 } 1062 1063 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1064 if (ret < 0) 1065 goto err_out_data; 1066 1067 ncblk = nilfs_btree_nchildren_per_block(btree); 1068 1069 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1070 level < nilfs_btree_height(btree) - 1; 1071 level++) { 1072 node = nilfs_btree_get_nonroot_node(path, level); 1073 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1074 path[level].bp_op = nilfs_btree_do_insert; 1075 stats->bs_nblocks++; 1076 goto out; 1077 } 1078 1079 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1080 pindex = path[level + 1].bp_index; 1081 1082 /* left sibling */ 1083 if (pindex > 0) { 1084 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1085 ncmax); 1086 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1087 if (ret < 0) 1088 goto err_out_child_node; 1089 sib = (struct nilfs_btree_node *)bh->b_data; 1090 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1091 path[level].bp_sib_bh = bh; 1092 path[level].bp_op = nilfs_btree_carry_left; 1093 stats->bs_nblocks++; 1094 goto out; 1095 } else { 1096 brelse(bh); 1097 } 1098 } 1099 1100 /* right sibling */ 1101 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1102 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1103 ncmax); 1104 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1105 if (ret < 0) 1106 goto err_out_child_node; 1107 sib = (struct nilfs_btree_node *)bh->b_data; 1108 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1109 path[level].bp_sib_bh = bh; 1110 path[level].bp_op = nilfs_btree_carry_right; 1111 stats->bs_nblocks++; 1112 goto out; 1113 } else { 1114 brelse(bh); 1115 } 1116 } 1117 1118 /* split */ 1119 path[level].bp_newreq.bpr_ptr = 1120 path[level - 1].bp_newreq.bpr_ptr + 1; 1121 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1122 &path[level].bp_newreq, dat); 1123 if (ret < 0) 1124 goto err_out_child_node; 1125 ret = nilfs_btree_get_new_block(btree, 1126 path[level].bp_newreq.bpr_ptr, 1127 &bh); 1128 if (ret < 0) 1129 goto err_out_curr_node; 1130 1131 stats->bs_nblocks++; 1132 1133 sib = (struct nilfs_btree_node *)bh->b_data; 1134 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1135 path[level].bp_sib_bh = bh; 1136 path[level].bp_op = nilfs_btree_split; 1137 } 1138 1139 /* root */ 1140 node = nilfs_btree_get_root(btree); 1141 if (nilfs_btree_node_get_nchildren(node) < 1142 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1143 path[level].bp_op = nilfs_btree_do_insert; 1144 stats->bs_nblocks++; 1145 goto out; 1146 } 1147 1148 /* grow */ 1149 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1150 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1151 if (ret < 0) 1152 goto err_out_child_node; 1153 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1154 &bh); 1155 if (ret < 0) 1156 goto err_out_curr_node; 1157 1158 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1159 0, level, 0, ncblk, NULL, NULL); 1160 path[level].bp_sib_bh = bh; 1161 path[level].bp_op = nilfs_btree_grow; 1162 1163 level++; 1164 path[level].bp_op = nilfs_btree_do_insert; 1165 1166 /* a newly-created node block and a data block are added */ 1167 stats->bs_nblocks += 2; 1168 1169 /* success */ 1170 out: 1171 *levelp = level; 1172 return ret; 1173 1174 /* error */ 1175 err_out_curr_node: 1176 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1177 err_out_child_node: 1178 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1179 nilfs_btnode_delete(path[level].bp_sib_bh); 1180 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1181 1182 } 1183 1184 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1185 err_out_data: 1186 *levelp = level; 1187 stats->bs_nblocks = 0; 1188 return ret; 1189 } 1190 1191 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1192 struct nilfs_btree_path *path, 1193 int maxlevel, __u64 key, __u64 ptr) 1194 { 1195 struct inode *dat = NULL; 1196 int level; 1197 1198 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1199 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1200 if (NILFS_BMAP_USE_VBN(btree)) { 1201 nilfs_bmap_set_target_v(btree, key, ptr); 1202 dat = nilfs_bmap_get_dat(btree); 1203 } 1204 1205 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1206 nilfs_bmap_commit_alloc_ptr(btree, 1207 &path[level - 1].bp_newreq, dat); 1208 path[level].bp_op(btree, path, level, &key, &ptr); 1209 } 1210 1211 if (!nilfs_bmap_dirty(btree)) 1212 nilfs_bmap_set_dirty(btree); 1213 } 1214 1215 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1216 { 1217 struct nilfs_btree_path *path; 1218 struct nilfs_bmap_stats stats; 1219 int level, ret; 1220 1221 path = nilfs_btree_alloc_path(); 1222 if (path == NULL) 1223 return -ENOMEM; 1224 1225 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1226 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1227 if (ret != -ENOENT) { 1228 if (ret == 0) 1229 ret = -EEXIST; 1230 goto out; 1231 } 1232 1233 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1234 if (ret < 0) 1235 goto out; 1236 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1237 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1238 1239 out: 1240 nilfs_btree_free_path(path); 1241 return ret; 1242 } 1243 1244 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1245 struct nilfs_btree_path *path, 1246 int level, __u64 *keyp, __u64 *ptrp) 1247 { 1248 struct nilfs_btree_node *node; 1249 int ncblk; 1250 1251 if (level < nilfs_btree_height(btree) - 1) { 1252 node = nilfs_btree_get_nonroot_node(path, level); 1253 ncblk = nilfs_btree_nchildren_per_block(btree); 1254 nilfs_btree_node_delete(node, path[level].bp_index, 1255 keyp, ptrp, ncblk); 1256 if (!buffer_dirty(path[level].bp_bh)) 1257 mark_buffer_dirty(path[level].bp_bh); 1258 if (path[level].bp_index == 0) 1259 nilfs_btree_promote_key(btree, path, level + 1, 1260 nilfs_btree_node_get_key(node, 0)); 1261 } else { 1262 node = nilfs_btree_get_root(btree); 1263 nilfs_btree_node_delete(node, path[level].bp_index, 1264 keyp, ptrp, 1265 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1266 } 1267 } 1268 1269 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1270 struct nilfs_btree_path *path, 1271 int level, __u64 *keyp, __u64 *ptrp) 1272 { 1273 struct nilfs_btree_node *node, *left; 1274 int nchildren, lnchildren, n, ncblk; 1275 1276 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1277 1278 node = nilfs_btree_get_nonroot_node(path, level); 1279 left = nilfs_btree_get_sib_node(path, level); 1280 nchildren = nilfs_btree_node_get_nchildren(node); 1281 lnchildren = nilfs_btree_node_get_nchildren(left); 1282 ncblk = nilfs_btree_nchildren_per_block(btree); 1283 1284 n = (nchildren + lnchildren) / 2 - nchildren; 1285 1286 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1287 1288 if (!buffer_dirty(path[level].bp_bh)) 1289 mark_buffer_dirty(path[level].bp_bh); 1290 if (!buffer_dirty(path[level].bp_sib_bh)) 1291 mark_buffer_dirty(path[level].bp_sib_bh); 1292 1293 nilfs_btree_promote_key(btree, path, level + 1, 1294 nilfs_btree_node_get_key(node, 0)); 1295 1296 brelse(path[level].bp_sib_bh); 1297 path[level].bp_sib_bh = NULL; 1298 path[level].bp_index += n; 1299 } 1300 1301 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1302 struct nilfs_btree_path *path, 1303 int level, __u64 *keyp, __u64 *ptrp) 1304 { 1305 struct nilfs_btree_node *node, *right; 1306 int nchildren, rnchildren, n, ncblk; 1307 1308 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1309 1310 node = nilfs_btree_get_nonroot_node(path, level); 1311 right = nilfs_btree_get_sib_node(path, level); 1312 nchildren = nilfs_btree_node_get_nchildren(node); 1313 rnchildren = nilfs_btree_node_get_nchildren(right); 1314 ncblk = nilfs_btree_nchildren_per_block(btree); 1315 1316 n = (nchildren + rnchildren) / 2 - nchildren; 1317 1318 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1319 1320 if (!buffer_dirty(path[level].bp_bh)) 1321 mark_buffer_dirty(path[level].bp_bh); 1322 if (!buffer_dirty(path[level].bp_sib_bh)) 1323 mark_buffer_dirty(path[level].bp_sib_bh); 1324 1325 path[level + 1].bp_index++; 1326 nilfs_btree_promote_key(btree, path, level + 1, 1327 nilfs_btree_node_get_key(right, 0)); 1328 path[level + 1].bp_index--; 1329 1330 brelse(path[level].bp_sib_bh); 1331 path[level].bp_sib_bh = NULL; 1332 } 1333 1334 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1335 struct nilfs_btree_path *path, 1336 int level, __u64 *keyp, __u64 *ptrp) 1337 { 1338 struct nilfs_btree_node *node, *left; 1339 int n, ncblk; 1340 1341 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1342 1343 node = nilfs_btree_get_nonroot_node(path, level); 1344 left = nilfs_btree_get_sib_node(path, level); 1345 ncblk = nilfs_btree_nchildren_per_block(btree); 1346 1347 n = nilfs_btree_node_get_nchildren(node); 1348 1349 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1350 1351 if (!buffer_dirty(path[level].bp_sib_bh)) 1352 mark_buffer_dirty(path[level].bp_sib_bh); 1353 1354 nilfs_btnode_delete(path[level].bp_bh); 1355 path[level].bp_bh = path[level].bp_sib_bh; 1356 path[level].bp_sib_bh = NULL; 1357 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1358 } 1359 1360 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1361 struct nilfs_btree_path *path, 1362 int level, __u64 *keyp, __u64 *ptrp) 1363 { 1364 struct nilfs_btree_node *node, *right; 1365 int n, ncblk; 1366 1367 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1368 1369 node = nilfs_btree_get_nonroot_node(path, level); 1370 right = nilfs_btree_get_sib_node(path, level); 1371 ncblk = nilfs_btree_nchildren_per_block(btree); 1372 1373 n = nilfs_btree_node_get_nchildren(right); 1374 1375 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1376 1377 if (!buffer_dirty(path[level].bp_bh)) 1378 mark_buffer_dirty(path[level].bp_bh); 1379 1380 nilfs_btnode_delete(path[level].bp_sib_bh); 1381 path[level].bp_sib_bh = NULL; 1382 path[level + 1].bp_index++; 1383 } 1384 1385 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1386 struct nilfs_btree_path *path, 1387 int level, __u64 *keyp, __u64 *ptrp) 1388 { 1389 struct nilfs_btree_node *root, *child; 1390 int n, ncblk; 1391 1392 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1393 1394 root = nilfs_btree_get_root(btree); 1395 child = nilfs_btree_get_nonroot_node(path, level); 1396 ncblk = nilfs_btree_nchildren_per_block(btree); 1397 1398 nilfs_btree_node_delete(root, 0, NULL, NULL, 1399 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1400 nilfs_btree_node_set_level(root, level); 1401 n = nilfs_btree_node_get_nchildren(child); 1402 nilfs_btree_node_move_left(root, child, n, 1403 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1404 1405 nilfs_btnode_delete(path[level].bp_bh); 1406 path[level].bp_bh = NULL; 1407 } 1408 1409 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1410 struct nilfs_btree_path *path, 1411 int level, __u64 *keyp, __u64 *ptrp) 1412 { 1413 } 1414 1415 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1416 struct nilfs_btree_path *path, 1417 int *levelp, 1418 struct nilfs_bmap_stats *stats, 1419 struct inode *dat) 1420 { 1421 struct buffer_head *bh; 1422 struct nilfs_btree_node *node, *parent, *sib; 1423 __u64 sibptr; 1424 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1425 1426 ret = 0; 1427 stats->bs_nblocks = 0; 1428 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1429 ncblk = nilfs_btree_nchildren_per_block(btree); 1430 1431 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1432 level < nilfs_btree_height(btree) - 1; 1433 level++) { 1434 node = nilfs_btree_get_nonroot_node(path, level); 1435 path[level].bp_oldreq.bpr_ptr = 1436 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1437 ret = nilfs_bmap_prepare_end_ptr(btree, 1438 &path[level].bp_oldreq, dat); 1439 if (ret < 0) 1440 goto err_out_child_node; 1441 1442 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1443 path[level].bp_op = nilfs_btree_do_delete; 1444 stats->bs_nblocks++; 1445 goto out; 1446 } 1447 1448 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1449 pindex = path[level + 1].bp_index; 1450 dindex = pindex; 1451 1452 if (pindex > 0) { 1453 /* left sibling */ 1454 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1455 ncmax); 1456 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1457 if (ret < 0) 1458 goto err_out_curr_node; 1459 sib = (struct nilfs_btree_node *)bh->b_data; 1460 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1461 path[level].bp_sib_bh = bh; 1462 path[level].bp_op = nilfs_btree_borrow_left; 1463 stats->bs_nblocks++; 1464 goto out; 1465 } else { 1466 path[level].bp_sib_bh = bh; 1467 path[level].bp_op = nilfs_btree_concat_left; 1468 stats->bs_nblocks++; 1469 /* continue; */ 1470 } 1471 } else if (pindex < 1472 nilfs_btree_node_get_nchildren(parent) - 1) { 1473 /* right sibling */ 1474 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1475 ncmax); 1476 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1477 if (ret < 0) 1478 goto err_out_curr_node; 1479 sib = (struct nilfs_btree_node *)bh->b_data; 1480 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1481 path[level].bp_sib_bh = bh; 1482 path[level].bp_op = nilfs_btree_borrow_right; 1483 stats->bs_nblocks++; 1484 goto out; 1485 } else { 1486 path[level].bp_sib_bh = bh; 1487 path[level].bp_op = nilfs_btree_concat_right; 1488 stats->bs_nblocks++; 1489 /* 1490 * When merging right sibling node 1491 * into the current node, pointer to 1492 * the right sibling node must be 1493 * terminated instead. The adjustment 1494 * below is required for that. 1495 */ 1496 dindex = pindex + 1; 1497 /* continue; */ 1498 } 1499 } else { 1500 /* no siblings */ 1501 /* the only child of the root node */ 1502 WARN_ON(level != nilfs_btree_height(btree) - 2); 1503 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1504 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1505 path[level].bp_op = nilfs_btree_shrink; 1506 stats->bs_nblocks += 2; 1507 level++; 1508 path[level].bp_op = nilfs_btree_nop; 1509 goto shrink_root_child; 1510 } else { 1511 path[level].bp_op = nilfs_btree_do_delete; 1512 stats->bs_nblocks++; 1513 goto out; 1514 } 1515 } 1516 } 1517 1518 /* child of the root node is deleted */ 1519 path[level].bp_op = nilfs_btree_do_delete; 1520 stats->bs_nblocks++; 1521 1522 shrink_root_child: 1523 node = nilfs_btree_get_root(btree); 1524 path[level].bp_oldreq.bpr_ptr = 1525 nilfs_btree_node_get_ptr(node, dindex, 1526 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1527 1528 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1529 if (ret < 0) 1530 goto err_out_child_node; 1531 1532 /* success */ 1533 out: 1534 *levelp = level; 1535 return ret; 1536 1537 /* error */ 1538 err_out_curr_node: 1539 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1540 err_out_child_node: 1541 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1542 brelse(path[level].bp_sib_bh); 1543 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1544 } 1545 *levelp = level; 1546 stats->bs_nblocks = 0; 1547 return ret; 1548 } 1549 1550 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1551 struct nilfs_btree_path *path, 1552 int maxlevel, struct inode *dat) 1553 { 1554 int level; 1555 1556 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1557 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1558 path[level].bp_op(btree, path, level, NULL, NULL); 1559 } 1560 1561 if (!nilfs_bmap_dirty(btree)) 1562 nilfs_bmap_set_dirty(btree); 1563 } 1564 1565 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1566 1567 { 1568 struct nilfs_btree_path *path; 1569 struct nilfs_bmap_stats stats; 1570 struct inode *dat; 1571 int level, ret; 1572 1573 path = nilfs_btree_alloc_path(); 1574 if (path == NULL) 1575 return -ENOMEM; 1576 1577 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1578 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1579 if (ret < 0) 1580 goto out; 1581 1582 1583 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1584 1585 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1586 if (ret < 0) 1587 goto out; 1588 nilfs_btree_commit_delete(btree, path, level, dat); 1589 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1590 1591 out: 1592 nilfs_btree_free_path(path); 1593 return ret; 1594 } 1595 1596 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1597 __u64 *keyp) 1598 { 1599 struct nilfs_btree_path *path; 1600 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1601 int ret; 1602 1603 path = nilfs_btree_alloc_path(); 1604 if (!path) 1605 return -ENOMEM; 1606 1607 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1608 if (!ret) 1609 *keyp = start; 1610 else if (ret == -ENOENT) 1611 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1612 1613 nilfs_btree_free_path(path); 1614 return ret; 1615 } 1616 1617 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1618 { 1619 struct nilfs_btree_path *path; 1620 int ret; 1621 1622 path = nilfs_btree_alloc_path(); 1623 if (path == NULL) 1624 return -ENOMEM; 1625 1626 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1627 1628 nilfs_btree_free_path(path); 1629 1630 return ret; 1631 } 1632 1633 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1634 { 1635 struct buffer_head *bh; 1636 struct nilfs_btree_node *root, *node; 1637 __u64 maxkey, nextmaxkey; 1638 __u64 ptr; 1639 int nchildren, ret; 1640 1641 root = nilfs_btree_get_root(btree); 1642 switch (nilfs_btree_height(btree)) { 1643 case 2: 1644 bh = NULL; 1645 node = root; 1646 break; 1647 case 3: 1648 nchildren = nilfs_btree_node_get_nchildren(root); 1649 if (nchildren > 1) 1650 return 0; 1651 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1652 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1653 ret = nilfs_btree_get_block(btree, ptr, &bh); 1654 if (ret < 0) 1655 return ret; 1656 node = (struct nilfs_btree_node *)bh->b_data; 1657 break; 1658 default: 1659 return 0; 1660 } 1661 1662 nchildren = nilfs_btree_node_get_nchildren(node); 1663 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1664 nextmaxkey = (nchildren > 1) ? 1665 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1666 if (bh != NULL) 1667 brelse(bh); 1668 1669 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1670 } 1671 1672 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1673 __u64 *keys, __u64 *ptrs, int nitems) 1674 { 1675 struct buffer_head *bh; 1676 struct nilfs_btree_node *node, *root; 1677 __le64 *dkeys; 1678 __le64 *dptrs; 1679 __u64 ptr; 1680 int nchildren, ncmax, i, ret; 1681 1682 root = nilfs_btree_get_root(btree); 1683 switch (nilfs_btree_height(btree)) { 1684 case 2: 1685 bh = NULL; 1686 node = root; 1687 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1688 break; 1689 case 3: 1690 nchildren = nilfs_btree_node_get_nchildren(root); 1691 WARN_ON(nchildren > 1); 1692 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1693 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1694 ret = nilfs_btree_get_block(btree, ptr, &bh); 1695 if (ret < 0) 1696 return ret; 1697 node = (struct nilfs_btree_node *)bh->b_data; 1698 ncmax = nilfs_btree_nchildren_per_block(btree); 1699 break; 1700 default: 1701 node = NULL; 1702 return -EINVAL; 1703 } 1704 1705 nchildren = nilfs_btree_node_get_nchildren(node); 1706 if (nchildren < nitems) 1707 nitems = nchildren; 1708 dkeys = nilfs_btree_node_dkeys(node); 1709 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1710 for (i = 0; i < nitems; i++) { 1711 keys[i] = le64_to_cpu(dkeys[i]); 1712 ptrs[i] = le64_to_cpu(dptrs[i]); 1713 } 1714 1715 if (bh != NULL) 1716 brelse(bh); 1717 1718 return nitems; 1719 } 1720 1721 static int 1722 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1723 union nilfs_bmap_ptr_req *dreq, 1724 union nilfs_bmap_ptr_req *nreq, 1725 struct buffer_head **bhp, 1726 struct nilfs_bmap_stats *stats) 1727 { 1728 struct buffer_head *bh; 1729 struct inode *dat = NULL; 1730 int ret; 1731 1732 stats->bs_nblocks = 0; 1733 1734 /* for data */ 1735 /* cannot find near ptr */ 1736 if (NILFS_BMAP_USE_VBN(btree)) { 1737 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1738 dat = nilfs_bmap_get_dat(btree); 1739 } 1740 1741 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1742 if (ret < 0) 1743 return ret; 1744 1745 *bhp = NULL; 1746 stats->bs_nblocks++; 1747 if (nreq != NULL) { 1748 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1749 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1750 if (ret < 0) 1751 goto err_out_dreq; 1752 1753 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1754 if (ret < 0) 1755 goto err_out_nreq; 1756 1757 *bhp = bh; 1758 stats->bs_nblocks++; 1759 } 1760 1761 /* success */ 1762 return 0; 1763 1764 /* error */ 1765 err_out_nreq: 1766 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1767 err_out_dreq: 1768 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1769 stats->bs_nblocks = 0; 1770 return ret; 1771 1772 } 1773 1774 static void 1775 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1776 __u64 key, __u64 ptr, 1777 const __u64 *keys, const __u64 *ptrs, 1778 int n, 1779 union nilfs_bmap_ptr_req *dreq, 1780 union nilfs_bmap_ptr_req *nreq, 1781 struct buffer_head *bh) 1782 { 1783 struct nilfs_btree_node *node; 1784 struct inode *dat; 1785 __u64 tmpptr; 1786 int ncblk; 1787 1788 /* free resources */ 1789 if (btree->b_ops->bop_clear != NULL) 1790 btree->b_ops->bop_clear(btree); 1791 1792 /* ptr must be a pointer to a buffer head. */ 1793 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1794 1795 /* convert and insert */ 1796 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1797 __nilfs_btree_init(btree); 1798 if (nreq != NULL) { 1799 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1800 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1801 1802 /* create child node at level 1 */ 1803 node = (struct nilfs_btree_node *)bh->b_data; 1804 ncblk = nilfs_btree_nchildren_per_block(btree); 1805 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1806 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1807 if (!buffer_dirty(bh)) 1808 mark_buffer_dirty(bh); 1809 if (!nilfs_bmap_dirty(btree)) 1810 nilfs_bmap_set_dirty(btree); 1811 1812 brelse(bh); 1813 1814 /* create root node at level 2 */ 1815 node = nilfs_btree_get_root(btree); 1816 tmpptr = nreq->bpr_ptr; 1817 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1818 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1819 &keys[0], &tmpptr); 1820 } else { 1821 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1822 1823 /* create root node at level 1 */ 1824 node = nilfs_btree_get_root(btree); 1825 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1826 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1827 keys, ptrs); 1828 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1829 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1830 if (!nilfs_bmap_dirty(btree)) 1831 nilfs_bmap_set_dirty(btree); 1832 } 1833 1834 if (NILFS_BMAP_USE_VBN(btree)) 1835 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1836 } 1837 1838 /** 1839 * nilfs_btree_convert_and_insert - 1840 * @bmap: 1841 * @key: 1842 * @ptr: 1843 * @keys: 1844 * @ptrs: 1845 * @n: 1846 */ 1847 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1848 __u64 key, __u64 ptr, 1849 const __u64 *keys, const __u64 *ptrs, int n) 1850 { 1851 struct buffer_head *bh = NULL; 1852 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1853 struct nilfs_bmap_stats stats; 1854 int ret; 1855 1856 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1857 di = &dreq; 1858 ni = NULL; 1859 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1860 1 << btree->b_inode->i_blkbits)) { 1861 di = &dreq; 1862 ni = &nreq; 1863 } else { 1864 di = NULL; 1865 ni = NULL; 1866 BUG(); 1867 } 1868 1869 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1870 &stats); 1871 if (ret < 0) 1872 return ret; 1873 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1874 di, ni, bh); 1875 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1876 return 0; 1877 } 1878 1879 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1880 struct nilfs_btree_path *path, 1881 int level, 1882 struct buffer_head *bh) 1883 { 1884 while ((++level < nilfs_btree_height(btree) - 1) && 1885 !buffer_dirty(path[level].bp_bh)) 1886 mark_buffer_dirty(path[level].bp_bh); 1887 1888 return 0; 1889 } 1890 1891 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1892 struct nilfs_btree_path *path, 1893 int level, struct inode *dat) 1894 { 1895 struct nilfs_btree_node *parent; 1896 int ncmax, ret; 1897 1898 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1899 path[level].bp_oldreq.bpr_ptr = 1900 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1901 ncmax); 1902 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1903 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1904 &path[level].bp_newreq.bpr_req); 1905 if (ret < 0) 1906 return ret; 1907 1908 if (buffer_nilfs_node(path[level].bp_bh)) { 1909 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1910 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1911 path[level].bp_ctxt.bh = path[level].bp_bh; 1912 ret = nilfs_btnode_prepare_change_key( 1913 &NILFS_BMAP_I(btree)->i_btnode_cache, 1914 &path[level].bp_ctxt); 1915 if (ret < 0) { 1916 nilfs_dat_abort_update(dat, 1917 &path[level].bp_oldreq.bpr_req, 1918 &path[level].bp_newreq.bpr_req); 1919 return ret; 1920 } 1921 } 1922 1923 return 0; 1924 } 1925 1926 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1927 struct nilfs_btree_path *path, 1928 int level, struct inode *dat) 1929 { 1930 struct nilfs_btree_node *parent; 1931 int ncmax; 1932 1933 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1934 &path[level].bp_newreq.bpr_req, 1935 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1936 1937 if (buffer_nilfs_node(path[level].bp_bh)) { 1938 nilfs_btnode_commit_change_key( 1939 &NILFS_BMAP_I(btree)->i_btnode_cache, 1940 &path[level].bp_ctxt); 1941 path[level].bp_bh = path[level].bp_ctxt.bh; 1942 } 1943 set_buffer_nilfs_volatile(path[level].bp_bh); 1944 1945 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1946 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1947 path[level].bp_newreq.bpr_ptr, ncmax); 1948 } 1949 1950 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1951 struct nilfs_btree_path *path, 1952 int level, struct inode *dat) 1953 { 1954 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1955 &path[level].bp_newreq.bpr_req); 1956 if (buffer_nilfs_node(path[level].bp_bh)) 1957 nilfs_btnode_abort_change_key( 1958 &NILFS_BMAP_I(btree)->i_btnode_cache, 1959 &path[level].bp_ctxt); 1960 } 1961 1962 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1963 struct nilfs_btree_path *path, 1964 int minlevel, int *maxlevelp, 1965 struct inode *dat) 1966 { 1967 int level, ret; 1968 1969 level = minlevel; 1970 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1971 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1972 if (ret < 0) 1973 return ret; 1974 } 1975 while ((++level < nilfs_btree_height(btree) - 1) && 1976 !buffer_dirty(path[level].bp_bh)) { 1977 1978 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1979 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1980 if (ret < 0) 1981 goto out; 1982 } 1983 1984 /* success */ 1985 *maxlevelp = level - 1; 1986 return 0; 1987 1988 /* error */ 1989 out: 1990 while (--level > minlevel) 1991 nilfs_btree_abort_update_v(btree, path, level, dat); 1992 if (!buffer_nilfs_volatile(path[level].bp_bh)) 1993 nilfs_btree_abort_update_v(btree, path, level, dat); 1994 return ret; 1995 } 1996 1997 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 1998 struct nilfs_btree_path *path, 1999 int minlevel, int maxlevel, 2000 struct buffer_head *bh, 2001 struct inode *dat) 2002 { 2003 int level; 2004 2005 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2006 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2007 2008 for (level = minlevel + 1; level <= maxlevel; level++) 2009 nilfs_btree_commit_update_v(btree, path, level, dat); 2010 } 2011 2012 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2013 struct nilfs_btree_path *path, 2014 int level, struct buffer_head *bh) 2015 { 2016 int maxlevel = 0, ret; 2017 struct nilfs_btree_node *parent; 2018 struct inode *dat = nilfs_bmap_get_dat(btree); 2019 __u64 ptr; 2020 int ncmax; 2021 2022 get_bh(bh); 2023 path[level].bp_bh = bh; 2024 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2025 dat); 2026 if (ret < 0) 2027 goto out; 2028 2029 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2030 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2031 ptr = nilfs_btree_node_get_ptr(parent, 2032 path[level + 1].bp_index, 2033 ncmax); 2034 ret = nilfs_dat_mark_dirty(dat, ptr); 2035 if (ret < 0) 2036 goto out; 2037 } 2038 2039 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2040 2041 out: 2042 brelse(path[level].bp_bh); 2043 path[level].bp_bh = NULL; 2044 return ret; 2045 } 2046 2047 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2048 struct buffer_head *bh) 2049 { 2050 struct nilfs_btree_path *path; 2051 struct nilfs_btree_node *node; 2052 __u64 key; 2053 int level, ret; 2054 2055 WARN_ON(!buffer_dirty(bh)); 2056 2057 path = nilfs_btree_alloc_path(); 2058 if (path == NULL) 2059 return -ENOMEM; 2060 2061 if (buffer_nilfs_node(bh)) { 2062 node = (struct nilfs_btree_node *)bh->b_data; 2063 key = nilfs_btree_node_get_key(node, 0); 2064 level = nilfs_btree_node_get_level(node); 2065 } else { 2066 key = nilfs_bmap_data_get_key(btree, bh); 2067 level = NILFS_BTREE_LEVEL_DATA; 2068 } 2069 2070 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2071 if (ret < 0) { 2072 if (unlikely(ret == -ENOENT)) 2073 printk(KERN_CRIT "%s: key = %llu, level == %d\n", 2074 __func__, (unsigned long long)key, level); 2075 goto out; 2076 } 2077 2078 ret = NILFS_BMAP_USE_VBN(btree) ? 2079 nilfs_btree_propagate_v(btree, path, level, bh) : 2080 nilfs_btree_propagate_p(btree, path, level, bh); 2081 2082 out: 2083 nilfs_btree_free_path(path); 2084 2085 return ret; 2086 } 2087 2088 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2089 struct buffer_head *bh) 2090 { 2091 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2092 } 2093 2094 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2095 struct list_head *lists, 2096 struct buffer_head *bh) 2097 { 2098 struct list_head *head; 2099 struct buffer_head *cbh; 2100 struct nilfs_btree_node *node, *cnode; 2101 __u64 key, ckey; 2102 int level; 2103 2104 get_bh(bh); 2105 node = (struct nilfs_btree_node *)bh->b_data; 2106 key = nilfs_btree_node_get_key(node, 0); 2107 level = nilfs_btree_node_get_level(node); 2108 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2109 level >= NILFS_BTREE_LEVEL_MAX) { 2110 dump_stack(); 2111 printk(KERN_WARNING 2112 "%s: invalid btree level: %d (key=%llu, ino=%lu, " 2113 "blocknr=%llu)\n", 2114 __func__, level, (unsigned long long)key, 2115 NILFS_BMAP_I(btree)->vfs_inode.i_ino, 2116 (unsigned long long)bh->b_blocknr); 2117 return; 2118 } 2119 2120 list_for_each(head, &lists[level]) { 2121 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2122 cnode = (struct nilfs_btree_node *)cbh->b_data; 2123 ckey = nilfs_btree_node_get_key(cnode, 0); 2124 if (key < ckey) 2125 break; 2126 } 2127 list_add_tail(&bh->b_assoc_buffers, head); 2128 } 2129 2130 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2131 struct list_head *listp) 2132 { 2133 struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; 2134 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2135 struct pagevec pvec; 2136 struct buffer_head *bh, *head; 2137 pgoff_t index = 0; 2138 int level, i; 2139 2140 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2141 level < NILFS_BTREE_LEVEL_MAX; 2142 level++) 2143 INIT_LIST_HEAD(&lists[level]); 2144 2145 pagevec_init(&pvec, 0); 2146 2147 while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, 2148 PAGEVEC_SIZE)) { 2149 for (i = 0; i < pagevec_count(&pvec); i++) { 2150 bh = head = page_buffers(pvec.pages[i]); 2151 do { 2152 if (buffer_dirty(bh)) 2153 nilfs_btree_add_dirty_buffer(btree, 2154 lists, bh); 2155 } while ((bh = bh->b_this_page) != head); 2156 } 2157 pagevec_release(&pvec); 2158 cond_resched(); 2159 } 2160 2161 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2162 level < NILFS_BTREE_LEVEL_MAX; 2163 level++) 2164 list_splice_tail(&lists[level], listp); 2165 } 2166 2167 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2168 struct nilfs_btree_path *path, 2169 int level, 2170 struct buffer_head **bh, 2171 sector_t blocknr, 2172 union nilfs_binfo *binfo) 2173 { 2174 struct nilfs_btree_node *parent; 2175 __u64 key; 2176 __u64 ptr; 2177 int ncmax, ret; 2178 2179 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2180 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2181 ncmax); 2182 if (buffer_nilfs_node(*bh)) { 2183 path[level].bp_ctxt.oldkey = ptr; 2184 path[level].bp_ctxt.newkey = blocknr; 2185 path[level].bp_ctxt.bh = *bh; 2186 ret = nilfs_btnode_prepare_change_key( 2187 &NILFS_BMAP_I(btree)->i_btnode_cache, 2188 &path[level].bp_ctxt); 2189 if (ret < 0) 2190 return ret; 2191 nilfs_btnode_commit_change_key( 2192 &NILFS_BMAP_I(btree)->i_btnode_cache, 2193 &path[level].bp_ctxt); 2194 *bh = path[level].bp_ctxt.bh; 2195 } 2196 2197 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2198 ncmax); 2199 2200 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2201 /* on-disk format */ 2202 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2203 binfo->bi_dat.bi_level = level; 2204 2205 return 0; 2206 } 2207 2208 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2209 struct nilfs_btree_path *path, 2210 int level, 2211 struct buffer_head **bh, 2212 sector_t blocknr, 2213 union nilfs_binfo *binfo) 2214 { 2215 struct nilfs_btree_node *parent; 2216 struct inode *dat = nilfs_bmap_get_dat(btree); 2217 __u64 key; 2218 __u64 ptr; 2219 union nilfs_bmap_ptr_req req; 2220 int ncmax, ret; 2221 2222 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2223 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2224 ncmax); 2225 req.bpr_ptr = ptr; 2226 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2227 if (ret < 0) 2228 return ret; 2229 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2230 2231 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2232 /* on-disk format */ 2233 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2234 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2235 2236 return 0; 2237 } 2238 2239 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2240 struct buffer_head **bh, 2241 sector_t blocknr, 2242 union nilfs_binfo *binfo) 2243 { 2244 struct nilfs_btree_path *path; 2245 struct nilfs_btree_node *node; 2246 __u64 key; 2247 int level, ret; 2248 2249 path = nilfs_btree_alloc_path(); 2250 if (path == NULL) 2251 return -ENOMEM; 2252 2253 if (buffer_nilfs_node(*bh)) { 2254 node = (struct nilfs_btree_node *)(*bh)->b_data; 2255 key = nilfs_btree_node_get_key(node, 0); 2256 level = nilfs_btree_node_get_level(node); 2257 } else { 2258 key = nilfs_bmap_data_get_key(btree, *bh); 2259 level = NILFS_BTREE_LEVEL_DATA; 2260 } 2261 2262 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2263 if (ret < 0) { 2264 WARN_ON(ret == -ENOENT); 2265 goto out; 2266 } 2267 2268 ret = NILFS_BMAP_USE_VBN(btree) ? 2269 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2270 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2271 2272 out: 2273 nilfs_btree_free_path(path); 2274 2275 return ret; 2276 } 2277 2278 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2279 struct buffer_head **bh, 2280 sector_t blocknr, 2281 union nilfs_binfo *binfo) 2282 { 2283 struct nilfs_btree_node *node; 2284 __u64 key; 2285 int ret; 2286 2287 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2288 blocknr); 2289 if (ret < 0) 2290 return ret; 2291 2292 if (buffer_nilfs_node(*bh)) { 2293 node = (struct nilfs_btree_node *)(*bh)->b_data; 2294 key = nilfs_btree_node_get_key(node, 0); 2295 } else 2296 key = nilfs_bmap_data_get_key(btree, *bh); 2297 2298 /* on-disk format */ 2299 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2300 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2301 2302 return 0; 2303 } 2304 2305 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2306 { 2307 struct buffer_head *bh; 2308 struct nilfs_btree_path *path; 2309 __u64 ptr; 2310 int ret; 2311 2312 path = nilfs_btree_alloc_path(); 2313 if (path == NULL) 2314 return -ENOMEM; 2315 2316 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2317 if (ret < 0) { 2318 WARN_ON(ret == -ENOENT); 2319 goto out; 2320 } 2321 ret = nilfs_btree_get_block(btree, ptr, &bh); 2322 if (ret < 0) { 2323 WARN_ON(ret == -ENOENT); 2324 goto out; 2325 } 2326 2327 if (!buffer_dirty(bh)) 2328 mark_buffer_dirty(bh); 2329 brelse(bh); 2330 if (!nilfs_bmap_dirty(btree)) 2331 nilfs_bmap_set_dirty(btree); 2332 2333 out: 2334 nilfs_btree_free_path(path); 2335 return ret; 2336 } 2337 2338 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2339 .bop_lookup = nilfs_btree_lookup, 2340 .bop_lookup_contig = nilfs_btree_lookup_contig, 2341 .bop_insert = nilfs_btree_insert, 2342 .bop_delete = nilfs_btree_delete, 2343 .bop_clear = NULL, 2344 2345 .bop_propagate = nilfs_btree_propagate, 2346 2347 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2348 2349 .bop_assign = nilfs_btree_assign, 2350 .bop_mark = nilfs_btree_mark, 2351 2352 .bop_seek_key = nilfs_btree_seek_key, 2353 .bop_last_key = nilfs_btree_last_key, 2354 2355 .bop_check_insert = NULL, 2356 .bop_check_delete = nilfs_btree_check_delete, 2357 .bop_gather_data = nilfs_btree_gather_data, 2358 }; 2359 2360 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2361 .bop_lookup = NULL, 2362 .bop_lookup_contig = NULL, 2363 .bop_insert = NULL, 2364 .bop_delete = NULL, 2365 .bop_clear = NULL, 2366 2367 .bop_propagate = nilfs_btree_propagate_gc, 2368 2369 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2370 2371 .bop_assign = nilfs_btree_assign_gc, 2372 .bop_mark = NULL, 2373 2374 .bop_seek_key = NULL, 2375 .bop_last_key = NULL, 2376 2377 .bop_check_insert = NULL, 2378 .bop_check_delete = NULL, 2379 .bop_gather_data = NULL, 2380 }; 2381 2382 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2383 { 2384 bmap->b_ops = &nilfs_btree_ops; 2385 bmap->b_nchildren_per_block = 2386 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2387 } 2388 2389 int nilfs_btree_init(struct nilfs_bmap *bmap) 2390 { 2391 int ret = 0; 2392 2393 __nilfs_btree_init(bmap); 2394 2395 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), 2396 bmap->b_inode->i_ino)) 2397 ret = -EIO; 2398 return ret; 2399 } 2400 2401 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2402 { 2403 bmap->b_ops = &nilfs_btree_ops_gc; 2404 bmap->b_nchildren_per_block = 2405 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2406 } 2407