1 /* 2 * btree.c - NILFS B-tree. 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Koji Sato <koji@osrg.net>. 21 */ 22 23 #include <linux/slab.h> 24 #include <linux/string.h> 25 #include <linux/errno.h> 26 #include <linux/pagevec.h> 27 #include "nilfs.h" 28 #include "page.h" 29 #include "btnode.h" 30 #include "btree.h" 31 #include "alloc.h" 32 #include "dat.h" 33 34 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 35 36 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 37 { 38 struct nilfs_btree_path *path; 39 int level = NILFS_BTREE_LEVEL_DATA; 40 41 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 42 if (path == NULL) 43 goto out; 44 45 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 46 path[level].bp_bh = NULL; 47 path[level].bp_sib_bh = NULL; 48 path[level].bp_index = 0; 49 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 50 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 51 path[level].bp_op = NULL; 52 } 53 54 out: 55 return path; 56 } 57 58 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 59 { 60 int level = NILFS_BTREE_LEVEL_DATA; 61 62 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 63 brelse(path[level].bp_bh); 64 65 kmem_cache_free(nilfs_btree_path_cache, path); 66 } 67 68 /* 69 * B-tree node operations 70 */ 71 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 72 __u64 ptr, struct buffer_head **bhp) 73 { 74 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 75 struct buffer_head *bh; 76 77 bh = nilfs_btnode_create_block(btnc, ptr); 78 if (!bh) 79 return -ENOMEM; 80 81 set_buffer_nilfs_volatile(bh); 82 *bhp = bh; 83 return 0; 84 } 85 86 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 87 { 88 return node->bn_flags; 89 } 90 91 static void 92 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 93 { 94 node->bn_flags = flags; 95 } 96 97 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 98 { 99 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 100 } 101 102 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 103 { 104 return node->bn_level; 105 } 106 107 static void 108 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 109 { 110 node->bn_level = level; 111 } 112 113 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 114 { 115 return le16_to_cpu(node->bn_nchildren); 116 } 117 118 static void 119 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 120 { 121 node->bn_nchildren = cpu_to_le16(nchildren); 122 } 123 124 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 125 { 126 return 1 << btree->b_inode->i_blkbits; 127 } 128 129 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 130 { 131 return btree->b_nchildren_per_block; 132 } 133 134 static __le64 * 135 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 136 { 137 return (__le64 *)((char *)(node + 1) + 138 (nilfs_btree_node_root(node) ? 139 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 140 } 141 142 static __le64 * 143 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 144 { 145 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 146 } 147 148 static __u64 149 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 150 { 151 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 152 } 153 154 static void 155 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 156 { 157 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 158 } 159 160 static __u64 161 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 162 int ncmax) 163 { 164 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 165 } 166 167 static void 168 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 169 int ncmax) 170 { 171 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 172 } 173 174 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 175 int level, int nchildren, int ncmax, 176 const __u64 *keys, const __u64 *ptrs) 177 { 178 __le64 *dkeys; 179 __le64 *dptrs; 180 int i; 181 182 nilfs_btree_node_set_flags(node, flags); 183 nilfs_btree_node_set_level(node, level); 184 nilfs_btree_node_set_nchildren(node, nchildren); 185 186 dkeys = nilfs_btree_node_dkeys(node); 187 dptrs = nilfs_btree_node_dptrs(node, ncmax); 188 for (i = 0; i < nchildren; i++) { 189 dkeys[i] = cpu_to_le64(keys[i]); 190 dptrs[i] = cpu_to_le64(ptrs[i]); 191 } 192 } 193 194 /* Assume the buffer heads corresponding to left and right are locked. */ 195 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 196 struct nilfs_btree_node *right, 197 int n, int lncmax, int rncmax) 198 { 199 __le64 *ldkeys, *rdkeys; 200 __le64 *ldptrs, *rdptrs; 201 int lnchildren, rnchildren; 202 203 ldkeys = nilfs_btree_node_dkeys(left); 204 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 205 lnchildren = nilfs_btree_node_get_nchildren(left); 206 207 rdkeys = nilfs_btree_node_dkeys(right); 208 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 209 rnchildren = nilfs_btree_node_get_nchildren(right); 210 211 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 212 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 213 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 214 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 215 216 lnchildren += n; 217 rnchildren -= n; 218 nilfs_btree_node_set_nchildren(left, lnchildren); 219 nilfs_btree_node_set_nchildren(right, rnchildren); 220 } 221 222 /* Assume that the buffer heads corresponding to left and right are locked. */ 223 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 224 struct nilfs_btree_node *right, 225 int n, int lncmax, int rncmax) 226 { 227 __le64 *ldkeys, *rdkeys; 228 __le64 *ldptrs, *rdptrs; 229 int lnchildren, rnchildren; 230 231 ldkeys = nilfs_btree_node_dkeys(left); 232 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 233 lnchildren = nilfs_btree_node_get_nchildren(left); 234 235 rdkeys = nilfs_btree_node_dkeys(right); 236 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 237 rnchildren = nilfs_btree_node_get_nchildren(right); 238 239 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 240 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 241 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 242 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 243 244 lnchildren -= n; 245 rnchildren += n; 246 nilfs_btree_node_set_nchildren(left, lnchildren); 247 nilfs_btree_node_set_nchildren(right, rnchildren); 248 } 249 250 /* Assume that the buffer head corresponding to node is locked. */ 251 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 252 __u64 key, __u64 ptr, int ncmax) 253 { 254 __le64 *dkeys; 255 __le64 *dptrs; 256 int nchildren; 257 258 dkeys = nilfs_btree_node_dkeys(node); 259 dptrs = nilfs_btree_node_dptrs(node, ncmax); 260 nchildren = nilfs_btree_node_get_nchildren(node); 261 if (index < nchildren) { 262 memmove(dkeys + index + 1, dkeys + index, 263 (nchildren - index) * sizeof(*dkeys)); 264 memmove(dptrs + index + 1, dptrs + index, 265 (nchildren - index) * sizeof(*dptrs)); 266 } 267 dkeys[index] = cpu_to_le64(key); 268 dptrs[index] = cpu_to_le64(ptr); 269 nchildren++; 270 nilfs_btree_node_set_nchildren(node, nchildren); 271 } 272 273 /* Assume that the buffer head corresponding to node is locked. */ 274 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 275 __u64 *keyp, __u64 *ptrp, int ncmax) 276 { 277 __u64 key; 278 __u64 ptr; 279 __le64 *dkeys; 280 __le64 *dptrs; 281 int nchildren; 282 283 dkeys = nilfs_btree_node_dkeys(node); 284 dptrs = nilfs_btree_node_dptrs(node, ncmax); 285 key = le64_to_cpu(dkeys[index]); 286 ptr = le64_to_cpu(dptrs[index]); 287 nchildren = nilfs_btree_node_get_nchildren(node); 288 if (keyp != NULL) 289 *keyp = key; 290 if (ptrp != NULL) 291 *ptrp = ptr; 292 293 if (index < nchildren - 1) { 294 memmove(dkeys + index, dkeys + index + 1, 295 (nchildren - index - 1) * sizeof(*dkeys)); 296 memmove(dptrs + index, dptrs + index + 1, 297 (nchildren - index - 1) * sizeof(*dptrs)); 298 } 299 nchildren--; 300 nilfs_btree_node_set_nchildren(node, nchildren); 301 } 302 303 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 304 __u64 key, int *indexp) 305 { 306 __u64 nkey; 307 int index, low, high, s; 308 309 /* binary search */ 310 low = 0; 311 high = nilfs_btree_node_get_nchildren(node) - 1; 312 index = 0; 313 s = 0; 314 while (low <= high) { 315 index = (low + high) / 2; 316 nkey = nilfs_btree_node_get_key(node, index); 317 if (nkey == key) { 318 s = 0; 319 goto out; 320 } else if (nkey < key) { 321 low = index + 1; 322 s = -1; 323 } else { 324 high = index - 1; 325 s = 1; 326 } 327 } 328 329 /* adjust index */ 330 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 331 if (s > 0 && index > 0) 332 index--; 333 } else if (s < 0) 334 index++; 335 336 out: 337 *indexp = index; 338 339 return s == 0; 340 } 341 342 /** 343 * nilfs_btree_node_broken - verify consistency of btree node 344 * @node: btree node block to be examined 345 * @size: node size (in bytes) 346 * @blocknr: block number 347 * 348 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 349 */ 350 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 351 size_t size, sector_t blocknr) 352 { 353 int level, flags, nchildren; 354 int ret = 0; 355 356 level = nilfs_btree_node_get_level(node); 357 flags = nilfs_btree_node_get_flags(node); 358 nchildren = nilfs_btree_node_get_nchildren(node); 359 360 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 361 level >= NILFS_BTREE_LEVEL_MAX || 362 (flags & NILFS_BTREE_NODE_ROOT) || 363 nchildren < 0 || 364 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 365 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): " 366 "level = %d, flags = 0x%x, nchildren = %d\n", 367 (unsigned long long)blocknr, level, flags, nchildren); 368 ret = 1; 369 } 370 return ret; 371 } 372 373 /** 374 * nilfs_btree_root_broken - verify consistency of btree root node 375 * @node: btree root node to be examined 376 * @ino: inode number 377 * 378 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 379 */ 380 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 381 unsigned long ino) 382 { 383 int level, flags, nchildren; 384 int ret = 0; 385 386 level = nilfs_btree_node_get_level(node); 387 flags = nilfs_btree_node_get_flags(node); 388 nchildren = nilfs_btree_node_get_nchildren(node); 389 390 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 391 level >= NILFS_BTREE_LEVEL_MAX || 392 nchildren < 0 || 393 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 394 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", 395 ino, level, flags, nchildren); 396 ret = 1; 397 } 398 return ret; 399 } 400 401 int nilfs_btree_broken_node_block(struct buffer_head *bh) 402 { 403 int ret; 404 405 if (buffer_nilfs_checked(bh)) 406 return 0; 407 408 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 409 bh->b_size, bh->b_blocknr); 410 if (likely(!ret)) 411 set_buffer_nilfs_checked(bh); 412 return ret; 413 } 414 415 static struct nilfs_btree_node * 416 nilfs_btree_get_root(const struct nilfs_bmap *btree) 417 { 418 return (struct nilfs_btree_node *)btree->b_u.u_data; 419 } 420 421 static struct nilfs_btree_node * 422 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 423 { 424 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 425 } 426 427 static struct nilfs_btree_node * 428 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 429 { 430 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 431 } 432 433 static int nilfs_btree_height(const struct nilfs_bmap *btree) 434 { 435 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 436 } 437 438 static struct nilfs_btree_node * 439 nilfs_btree_get_node(const struct nilfs_bmap *btree, 440 const struct nilfs_btree_path *path, 441 int level, int *ncmaxp) 442 { 443 struct nilfs_btree_node *node; 444 445 if (level == nilfs_btree_height(btree) - 1) { 446 node = nilfs_btree_get_root(btree); 447 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 448 } else { 449 node = nilfs_btree_get_nonroot_node(path, level); 450 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 451 } 452 return node; 453 } 454 455 static int 456 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) 457 { 458 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 459 dump_stack(); 460 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n", 461 nilfs_btree_node_get_level(node), level); 462 return 1; 463 } 464 return 0; 465 } 466 467 struct nilfs_btree_readahead_info { 468 struct nilfs_btree_node *node; /* parent node */ 469 int max_ra_blocks; /* max nof blocks to read ahead */ 470 int index; /* current index on the parent node */ 471 int ncmax; /* nof children in the parent node */ 472 }; 473 474 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 475 struct buffer_head **bhp, 476 const struct nilfs_btree_readahead_info *ra) 477 { 478 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 479 struct buffer_head *bh, *ra_bh; 480 sector_t submit_ptr = 0; 481 int ret; 482 483 ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr); 484 if (ret) { 485 if (ret != -EEXIST) 486 return ret; 487 goto out_check; 488 } 489 490 if (ra) { 491 int i, n; 492 __u64 ptr2; 493 494 /* read ahead sibling nodes */ 495 for (n = ra->max_ra_blocks, i = ra->index + 1; 496 n > 0 && i < ra->ncmax; n--, i++) { 497 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 498 499 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA, 500 &ra_bh, &submit_ptr); 501 if (likely(!ret || ret == -EEXIST)) 502 brelse(ra_bh); 503 else if (ret != -EBUSY) 504 break; 505 if (!buffer_locked(bh)) 506 goto out_no_wait; 507 } 508 } 509 510 wait_on_buffer(bh); 511 512 out_no_wait: 513 if (!buffer_uptodate(bh)) { 514 brelse(bh); 515 return -EIO; 516 } 517 518 out_check: 519 if (nilfs_btree_broken_node_block(bh)) { 520 clear_buffer_uptodate(bh); 521 brelse(bh); 522 return -EINVAL; 523 } 524 525 *bhp = bh; 526 return 0; 527 } 528 529 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 530 struct buffer_head **bhp) 531 { 532 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 533 } 534 535 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 536 struct nilfs_btree_path *path, 537 __u64 key, __u64 *ptrp, int minlevel, 538 int readahead) 539 { 540 struct nilfs_btree_node *node; 541 struct nilfs_btree_readahead_info p, *ra; 542 __u64 ptr; 543 int level, index, found, ncmax, ret; 544 545 node = nilfs_btree_get_root(btree); 546 level = nilfs_btree_node_get_level(node); 547 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 548 return -ENOENT; 549 550 found = nilfs_btree_node_lookup(node, key, &index); 551 ptr = nilfs_btree_node_get_ptr(node, index, 552 NILFS_BTREE_ROOT_NCHILDREN_MAX); 553 path[level].bp_bh = NULL; 554 path[level].bp_index = index; 555 556 ncmax = nilfs_btree_nchildren_per_block(btree); 557 558 while (--level >= minlevel) { 559 ra = NULL; 560 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 561 p.node = nilfs_btree_get_node(btree, path, level + 1, 562 &p.ncmax); 563 p.index = index; 564 p.max_ra_blocks = 7; 565 ra = &p; 566 } 567 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 568 ra); 569 if (ret < 0) 570 return ret; 571 572 node = nilfs_btree_get_nonroot_node(path, level); 573 if (nilfs_btree_bad_node(node, level)) 574 return -EINVAL; 575 if (!found) 576 found = nilfs_btree_node_lookup(node, key, &index); 577 else 578 index = 0; 579 if (index < ncmax) { 580 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 581 } else { 582 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 583 /* insert */ 584 ptr = NILFS_BMAP_INVALID_PTR; 585 } 586 path[level].bp_index = index; 587 } 588 if (!found) 589 return -ENOENT; 590 591 if (ptrp != NULL) 592 *ptrp = ptr; 593 594 return 0; 595 } 596 597 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 598 struct nilfs_btree_path *path, 599 __u64 *keyp, __u64 *ptrp) 600 { 601 struct nilfs_btree_node *node; 602 __u64 ptr; 603 int index, level, ncmax, ret; 604 605 node = nilfs_btree_get_root(btree); 606 index = nilfs_btree_node_get_nchildren(node) - 1; 607 if (index < 0) 608 return -ENOENT; 609 level = nilfs_btree_node_get_level(node); 610 ptr = nilfs_btree_node_get_ptr(node, index, 611 NILFS_BTREE_ROOT_NCHILDREN_MAX); 612 path[level].bp_bh = NULL; 613 path[level].bp_index = index; 614 ncmax = nilfs_btree_nchildren_per_block(btree); 615 616 for (level--; level > 0; level--) { 617 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 618 if (ret < 0) 619 return ret; 620 node = nilfs_btree_get_nonroot_node(path, level); 621 if (nilfs_btree_bad_node(node, level)) 622 return -EINVAL; 623 index = nilfs_btree_node_get_nchildren(node) - 1; 624 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 625 path[level].bp_index = index; 626 } 627 628 if (keyp != NULL) 629 *keyp = nilfs_btree_node_get_key(node, index); 630 if (ptrp != NULL) 631 *ptrp = ptr; 632 633 return 0; 634 } 635 636 /** 637 * nilfs_btree_get_next_key - get next valid key from btree path array 638 * @btree: bmap struct of btree 639 * @path: array of nilfs_btree_path struct 640 * @minlevel: start level 641 * @nextkey: place to store the next valid key 642 * 643 * Return Value: If a next key was found, 0 is returned. Otherwise, 644 * -ENOENT is returned. 645 */ 646 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 647 const struct nilfs_btree_path *path, 648 int minlevel, __u64 *nextkey) 649 { 650 struct nilfs_btree_node *node; 651 int maxlevel = nilfs_btree_height(btree) - 1; 652 int index, next_adj, level; 653 654 /* Next index is already set to bp_index for leaf nodes. */ 655 next_adj = 0; 656 for (level = minlevel; level <= maxlevel; level++) { 657 if (level == maxlevel) 658 node = nilfs_btree_get_root(btree); 659 else 660 node = nilfs_btree_get_nonroot_node(path, level); 661 662 index = path[level].bp_index + next_adj; 663 if (index < nilfs_btree_node_get_nchildren(node)) { 664 /* Next key is in this node */ 665 *nextkey = nilfs_btree_node_get_key(node, index); 666 return 0; 667 } 668 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 669 next_adj = 1; 670 } 671 return -ENOENT; 672 } 673 674 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 675 __u64 key, int level, __u64 *ptrp) 676 { 677 struct nilfs_btree_path *path; 678 int ret; 679 680 path = nilfs_btree_alloc_path(); 681 if (path == NULL) 682 return -ENOMEM; 683 684 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 685 686 nilfs_btree_free_path(path); 687 688 return ret; 689 } 690 691 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 692 __u64 key, __u64 *ptrp, unsigned maxblocks) 693 { 694 struct nilfs_btree_path *path; 695 struct nilfs_btree_node *node; 696 struct inode *dat = NULL; 697 __u64 ptr, ptr2; 698 sector_t blocknr; 699 int level = NILFS_BTREE_LEVEL_NODE_MIN; 700 int ret, cnt, index, maxlevel, ncmax; 701 struct nilfs_btree_readahead_info p; 702 703 path = nilfs_btree_alloc_path(); 704 if (path == NULL) 705 return -ENOMEM; 706 707 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 708 if (ret < 0) 709 goto out; 710 711 if (NILFS_BMAP_USE_VBN(btree)) { 712 dat = nilfs_bmap_get_dat(btree); 713 ret = nilfs_dat_translate(dat, ptr, &blocknr); 714 if (ret < 0) 715 goto out; 716 ptr = blocknr; 717 } 718 cnt = 1; 719 if (cnt == maxblocks) 720 goto end; 721 722 maxlevel = nilfs_btree_height(btree) - 1; 723 node = nilfs_btree_get_node(btree, path, level, &ncmax); 724 index = path[level].bp_index + 1; 725 for (;;) { 726 while (index < nilfs_btree_node_get_nchildren(node)) { 727 if (nilfs_btree_node_get_key(node, index) != 728 key + cnt) 729 goto end; 730 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 731 if (dat) { 732 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 733 if (ret < 0) 734 goto out; 735 ptr2 = blocknr; 736 } 737 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 738 goto end; 739 index++; 740 continue; 741 } 742 if (level == maxlevel) 743 break; 744 745 /* look-up right sibling node */ 746 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 747 p.index = path[level + 1].bp_index + 1; 748 p.max_ra_blocks = 7; 749 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 750 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 751 break; 752 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 753 path[level + 1].bp_index = p.index; 754 755 brelse(path[level].bp_bh); 756 path[level].bp_bh = NULL; 757 758 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 759 &p); 760 if (ret < 0) 761 goto out; 762 node = nilfs_btree_get_nonroot_node(path, level); 763 ncmax = nilfs_btree_nchildren_per_block(btree); 764 index = 0; 765 path[level].bp_index = index; 766 } 767 end: 768 *ptrp = ptr; 769 ret = cnt; 770 out: 771 nilfs_btree_free_path(path); 772 return ret; 773 } 774 775 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 776 struct nilfs_btree_path *path, 777 int level, __u64 key) 778 { 779 if (level < nilfs_btree_height(btree) - 1) { 780 do { 781 nilfs_btree_node_set_key( 782 nilfs_btree_get_nonroot_node(path, level), 783 path[level].bp_index, key); 784 if (!buffer_dirty(path[level].bp_bh)) 785 mark_buffer_dirty(path[level].bp_bh); 786 } while ((path[level].bp_index == 0) && 787 (++level < nilfs_btree_height(btree) - 1)); 788 } 789 790 /* root */ 791 if (level == nilfs_btree_height(btree) - 1) { 792 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 793 path[level].bp_index, key); 794 } 795 } 796 797 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 798 struct nilfs_btree_path *path, 799 int level, __u64 *keyp, __u64 *ptrp) 800 { 801 struct nilfs_btree_node *node; 802 int ncblk; 803 804 if (level < nilfs_btree_height(btree) - 1) { 805 node = nilfs_btree_get_nonroot_node(path, level); 806 ncblk = nilfs_btree_nchildren_per_block(btree); 807 nilfs_btree_node_insert(node, path[level].bp_index, 808 *keyp, *ptrp, ncblk); 809 if (!buffer_dirty(path[level].bp_bh)) 810 mark_buffer_dirty(path[level].bp_bh); 811 812 if (path[level].bp_index == 0) 813 nilfs_btree_promote_key(btree, path, level + 1, 814 nilfs_btree_node_get_key(node, 815 0)); 816 } else { 817 node = nilfs_btree_get_root(btree); 818 nilfs_btree_node_insert(node, path[level].bp_index, 819 *keyp, *ptrp, 820 NILFS_BTREE_ROOT_NCHILDREN_MAX); 821 } 822 } 823 824 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 825 struct nilfs_btree_path *path, 826 int level, __u64 *keyp, __u64 *ptrp) 827 { 828 struct nilfs_btree_node *node, *left; 829 int nchildren, lnchildren, n, move, ncblk; 830 831 node = nilfs_btree_get_nonroot_node(path, level); 832 left = nilfs_btree_get_sib_node(path, level); 833 nchildren = nilfs_btree_node_get_nchildren(node); 834 lnchildren = nilfs_btree_node_get_nchildren(left); 835 ncblk = nilfs_btree_nchildren_per_block(btree); 836 move = 0; 837 838 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 839 if (n > path[level].bp_index) { 840 /* move insert point */ 841 n--; 842 move = 1; 843 } 844 845 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 846 847 if (!buffer_dirty(path[level].bp_bh)) 848 mark_buffer_dirty(path[level].bp_bh); 849 if (!buffer_dirty(path[level].bp_sib_bh)) 850 mark_buffer_dirty(path[level].bp_sib_bh); 851 852 nilfs_btree_promote_key(btree, path, level + 1, 853 nilfs_btree_node_get_key(node, 0)); 854 855 if (move) { 856 brelse(path[level].bp_bh); 857 path[level].bp_bh = path[level].bp_sib_bh; 858 path[level].bp_sib_bh = NULL; 859 path[level].bp_index += lnchildren; 860 path[level + 1].bp_index--; 861 } else { 862 brelse(path[level].bp_sib_bh); 863 path[level].bp_sib_bh = NULL; 864 path[level].bp_index -= n; 865 } 866 867 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 868 } 869 870 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 871 struct nilfs_btree_path *path, 872 int level, __u64 *keyp, __u64 *ptrp) 873 { 874 struct nilfs_btree_node *node, *right; 875 int nchildren, rnchildren, n, move, ncblk; 876 877 node = nilfs_btree_get_nonroot_node(path, level); 878 right = nilfs_btree_get_sib_node(path, level); 879 nchildren = nilfs_btree_node_get_nchildren(node); 880 rnchildren = nilfs_btree_node_get_nchildren(right); 881 ncblk = nilfs_btree_nchildren_per_block(btree); 882 move = 0; 883 884 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 885 if (n > nchildren - path[level].bp_index) { 886 /* move insert point */ 887 n--; 888 move = 1; 889 } 890 891 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 892 893 if (!buffer_dirty(path[level].bp_bh)) 894 mark_buffer_dirty(path[level].bp_bh); 895 if (!buffer_dirty(path[level].bp_sib_bh)) 896 mark_buffer_dirty(path[level].bp_sib_bh); 897 898 path[level + 1].bp_index++; 899 nilfs_btree_promote_key(btree, path, level + 1, 900 nilfs_btree_node_get_key(right, 0)); 901 path[level + 1].bp_index--; 902 903 if (move) { 904 brelse(path[level].bp_bh); 905 path[level].bp_bh = path[level].bp_sib_bh; 906 path[level].bp_sib_bh = NULL; 907 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 908 path[level + 1].bp_index++; 909 } else { 910 brelse(path[level].bp_sib_bh); 911 path[level].bp_sib_bh = NULL; 912 } 913 914 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 915 } 916 917 static void nilfs_btree_split(struct nilfs_bmap *btree, 918 struct nilfs_btree_path *path, 919 int level, __u64 *keyp, __u64 *ptrp) 920 { 921 struct nilfs_btree_node *node, *right; 922 int nchildren, n, move, ncblk; 923 924 node = nilfs_btree_get_nonroot_node(path, level); 925 right = nilfs_btree_get_sib_node(path, level); 926 nchildren = nilfs_btree_node_get_nchildren(node); 927 ncblk = nilfs_btree_nchildren_per_block(btree); 928 move = 0; 929 930 n = (nchildren + 1) / 2; 931 if (n > nchildren - path[level].bp_index) { 932 n--; 933 move = 1; 934 } 935 936 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 937 938 if (!buffer_dirty(path[level].bp_bh)) 939 mark_buffer_dirty(path[level].bp_bh); 940 if (!buffer_dirty(path[level].bp_sib_bh)) 941 mark_buffer_dirty(path[level].bp_sib_bh); 942 943 if (move) { 944 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 945 nilfs_btree_node_insert(right, path[level].bp_index, 946 *keyp, *ptrp, ncblk); 947 948 *keyp = nilfs_btree_node_get_key(right, 0); 949 *ptrp = path[level].bp_newreq.bpr_ptr; 950 951 brelse(path[level].bp_bh); 952 path[level].bp_bh = path[level].bp_sib_bh; 953 path[level].bp_sib_bh = NULL; 954 } else { 955 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 956 957 *keyp = nilfs_btree_node_get_key(right, 0); 958 *ptrp = path[level].bp_newreq.bpr_ptr; 959 960 brelse(path[level].bp_sib_bh); 961 path[level].bp_sib_bh = NULL; 962 } 963 964 path[level + 1].bp_index++; 965 } 966 967 static void nilfs_btree_grow(struct nilfs_bmap *btree, 968 struct nilfs_btree_path *path, 969 int level, __u64 *keyp, __u64 *ptrp) 970 { 971 struct nilfs_btree_node *root, *child; 972 int n, ncblk; 973 974 root = nilfs_btree_get_root(btree); 975 child = nilfs_btree_get_sib_node(path, level); 976 ncblk = nilfs_btree_nchildren_per_block(btree); 977 978 n = nilfs_btree_node_get_nchildren(root); 979 980 nilfs_btree_node_move_right(root, child, n, 981 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 982 nilfs_btree_node_set_level(root, level + 1); 983 984 if (!buffer_dirty(path[level].bp_sib_bh)) 985 mark_buffer_dirty(path[level].bp_sib_bh); 986 987 path[level].bp_bh = path[level].bp_sib_bh; 988 path[level].bp_sib_bh = NULL; 989 990 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 991 992 *keyp = nilfs_btree_node_get_key(child, 0); 993 *ptrp = path[level].bp_newreq.bpr_ptr; 994 } 995 996 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 997 const struct nilfs_btree_path *path) 998 { 999 struct nilfs_btree_node *node; 1000 int level, ncmax; 1001 1002 if (path == NULL) 1003 return NILFS_BMAP_INVALID_PTR; 1004 1005 /* left sibling */ 1006 level = NILFS_BTREE_LEVEL_NODE_MIN; 1007 if (path[level].bp_index > 0) { 1008 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1009 return nilfs_btree_node_get_ptr(node, 1010 path[level].bp_index - 1, 1011 ncmax); 1012 } 1013 1014 /* parent */ 1015 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1016 if (level <= nilfs_btree_height(btree) - 1) { 1017 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1018 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1019 ncmax); 1020 } 1021 1022 return NILFS_BMAP_INVALID_PTR; 1023 } 1024 1025 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1026 const struct nilfs_btree_path *path, 1027 __u64 key) 1028 { 1029 __u64 ptr; 1030 1031 ptr = nilfs_bmap_find_target_seq(btree, key); 1032 if (ptr != NILFS_BMAP_INVALID_PTR) 1033 /* sequential access */ 1034 return ptr; 1035 else { 1036 ptr = nilfs_btree_find_near(btree, path); 1037 if (ptr != NILFS_BMAP_INVALID_PTR) 1038 /* near */ 1039 return ptr; 1040 } 1041 /* block group */ 1042 return nilfs_bmap_find_target_in_group(btree); 1043 } 1044 1045 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1046 struct nilfs_btree_path *path, 1047 int *levelp, __u64 key, __u64 ptr, 1048 struct nilfs_bmap_stats *stats) 1049 { 1050 struct buffer_head *bh; 1051 struct nilfs_btree_node *node, *parent, *sib; 1052 __u64 sibptr; 1053 int pindex, level, ncmax, ncblk, ret; 1054 struct inode *dat = NULL; 1055 1056 stats->bs_nblocks = 0; 1057 level = NILFS_BTREE_LEVEL_DATA; 1058 1059 /* allocate a new ptr for data block */ 1060 if (NILFS_BMAP_USE_VBN(btree)) { 1061 path[level].bp_newreq.bpr_ptr = 1062 nilfs_btree_find_target_v(btree, path, key); 1063 dat = nilfs_bmap_get_dat(btree); 1064 } 1065 1066 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1067 if (ret < 0) 1068 goto err_out_data; 1069 1070 ncblk = nilfs_btree_nchildren_per_block(btree); 1071 1072 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1073 level < nilfs_btree_height(btree) - 1; 1074 level++) { 1075 node = nilfs_btree_get_nonroot_node(path, level); 1076 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1077 path[level].bp_op = nilfs_btree_do_insert; 1078 stats->bs_nblocks++; 1079 goto out; 1080 } 1081 1082 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1083 pindex = path[level + 1].bp_index; 1084 1085 /* left sibling */ 1086 if (pindex > 0) { 1087 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1088 ncmax); 1089 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1090 if (ret < 0) 1091 goto err_out_child_node; 1092 sib = (struct nilfs_btree_node *)bh->b_data; 1093 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1094 path[level].bp_sib_bh = bh; 1095 path[level].bp_op = nilfs_btree_carry_left; 1096 stats->bs_nblocks++; 1097 goto out; 1098 } else { 1099 brelse(bh); 1100 } 1101 } 1102 1103 /* right sibling */ 1104 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1105 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1106 ncmax); 1107 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1108 if (ret < 0) 1109 goto err_out_child_node; 1110 sib = (struct nilfs_btree_node *)bh->b_data; 1111 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1112 path[level].bp_sib_bh = bh; 1113 path[level].bp_op = nilfs_btree_carry_right; 1114 stats->bs_nblocks++; 1115 goto out; 1116 } else { 1117 brelse(bh); 1118 } 1119 } 1120 1121 /* split */ 1122 path[level].bp_newreq.bpr_ptr = 1123 path[level - 1].bp_newreq.bpr_ptr + 1; 1124 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1125 &path[level].bp_newreq, dat); 1126 if (ret < 0) 1127 goto err_out_child_node; 1128 ret = nilfs_btree_get_new_block(btree, 1129 path[level].bp_newreq.bpr_ptr, 1130 &bh); 1131 if (ret < 0) 1132 goto err_out_curr_node; 1133 1134 stats->bs_nblocks++; 1135 1136 sib = (struct nilfs_btree_node *)bh->b_data; 1137 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1138 path[level].bp_sib_bh = bh; 1139 path[level].bp_op = nilfs_btree_split; 1140 } 1141 1142 /* root */ 1143 node = nilfs_btree_get_root(btree); 1144 if (nilfs_btree_node_get_nchildren(node) < 1145 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1146 path[level].bp_op = nilfs_btree_do_insert; 1147 stats->bs_nblocks++; 1148 goto out; 1149 } 1150 1151 /* grow */ 1152 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1153 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1154 if (ret < 0) 1155 goto err_out_child_node; 1156 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1157 &bh); 1158 if (ret < 0) 1159 goto err_out_curr_node; 1160 1161 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1162 0, level, 0, ncblk, NULL, NULL); 1163 path[level].bp_sib_bh = bh; 1164 path[level].bp_op = nilfs_btree_grow; 1165 1166 level++; 1167 path[level].bp_op = nilfs_btree_do_insert; 1168 1169 /* a newly-created node block and a data block are added */ 1170 stats->bs_nblocks += 2; 1171 1172 /* success */ 1173 out: 1174 *levelp = level; 1175 return ret; 1176 1177 /* error */ 1178 err_out_curr_node: 1179 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1180 err_out_child_node: 1181 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1182 nilfs_btnode_delete(path[level].bp_sib_bh); 1183 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1184 1185 } 1186 1187 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1188 err_out_data: 1189 *levelp = level; 1190 stats->bs_nblocks = 0; 1191 return ret; 1192 } 1193 1194 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1195 struct nilfs_btree_path *path, 1196 int maxlevel, __u64 key, __u64 ptr) 1197 { 1198 struct inode *dat = NULL; 1199 int level; 1200 1201 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1202 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1203 if (NILFS_BMAP_USE_VBN(btree)) { 1204 nilfs_bmap_set_target_v(btree, key, ptr); 1205 dat = nilfs_bmap_get_dat(btree); 1206 } 1207 1208 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1209 nilfs_bmap_commit_alloc_ptr(btree, 1210 &path[level - 1].bp_newreq, dat); 1211 path[level].bp_op(btree, path, level, &key, &ptr); 1212 } 1213 1214 if (!nilfs_bmap_dirty(btree)) 1215 nilfs_bmap_set_dirty(btree); 1216 } 1217 1218 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1219 { 1220 struct nilfs_btree_path *path; 1221 struct nilfs_bmap_stats stats; 1222 int level, ret; 1223 1224 path = nilfs_btree_alloc_path(); 1225 if (path == NULL) 1226 return -ENOMEM; 1227 1228 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1229 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1230 if (ret != -ENOENT) { 1231 if (ret == 0) 1232 ret = -EEXIST; 1233 goto out; 1234 } 1235 1236 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1237 if (ret < 0) 1238 goto out; 1239 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1240 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1241 1242 out: 1243 nilfs_btree_free_path(path); 1244 return ret; 1245 } 1246 1247 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1248 struct nilfs_btree_path *path, 1249 int level, __u64 *keyp, __u64 *ptrp) 1250 { 1251 struct nilfs_btree_node *node; 1252 int ncblk; 1253 1254 if (level < nilfs_btree_height(btree) - 1) { 1255 node = nilfs_btree_get_nonroot_node(path, level); 1256 ncblk = nilfs_btree_nchildren_per_block(btree); 1257 nilfs_btree_node_delete(node, path[level].bp_index, 1258 keyp, ptrp, ncblk); 1259 if (!buffer_dirty(path[level].bp_bh)) 1260 mark_buffer_dirty(path[level].bp_bh); 1261 if (path[level].bp_index == 0) 1262 nilfs_btree_promote_key(btree, path, level + 1, 1263 nilfs_btree_node_get_key(node, 0)); 1264 } else { 1265 node = nilfs_btree_get_root(btree); 1266 nilfs_btree_node_delete(node, path[level].bp_index, 1267 keyp, ptrp, 1268 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1269 } 1270 } 1271 1272 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1273 struct nilfs_btree_path *path, 1274 int level, __u64 *keyp, __u64 *ptrp) 1275 { 1276 struct nilfs_btree_node *node, *left; 1277 int nchildren, lnchildren, n, ncblk; 1278 1279 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1280 1281 node = nilfs_btree_get_nonroot_node(path, level); 1282 left = nilfs_btree_get_sib_node(path, level); 1283 nchildren = nilfs_btree_node_get_nchildren(node); 1284 lnchildren = nilfs_btree_node_get_nchildren(left); 1285 ncblk = nilfs_btree_nchildren_per_block(btree); 1286 1287 n = (nchildren + lnchildren) / 2 - nchildren; 1288 1289 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1290 1291 if (!buffer_dirty(path[level].bp_bh)) 1292 mark_buffer_dirty(path[level].bp_bh); 1293 if (!buffer_dirty(path[level].bp_sib_bh)) 1294 mark_buffer_dirty(path[level].bp_sib_bh); 1295 1296 nilfs_btree_promote_key(btree, path, level + 1, 1297 nilfs_btree_node_get_key(node, 0)); 1298 1299 brelse(path[level].bp_sib_bh); 1300 path[level].bp_sib_bh = NULL; 1301 path[level].bp_index += n; 1302 } 1303 1304 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1305 struct nilfs_btree_path *path, 1306 int level, __u64 *keyp, __u64 *ptrp) 1307 { 1308 struct nilfs_btree_node *node, *right; 1309 int nchildren, rnchildren, n, ncblk; 1310 1311 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1312 1313 node = nilfs_btree_get_nonroot_node(path, level); 1314 right = nilfs_btree_get_sib_node(path, level); 1315 nchildren = nilfs_btree_node_get_nchildren(node); 1316 rnchildren = nilfs_btree_node_get_nchildren(right); 1317 ncblk = nilfs_btree_nchildren_per_block(btree); 1318 1319 n = (nchildren + rnchildren) / 2 - nchildren; 1320 1321 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1322 1323 if (!buffer_dirty(path[level].bp_bh)) 1324 mark_buffer_dirty(path[level].bp_bh); 1325 if (!buffer_dirty(path[level].bp_sib_bh)) 1326 mark_buffer_dirty(path[level].bp_sib_bh); 1327 1328 path[level + 1].bp_index++; 1329 nilfs_btree_promote_key(btree, path, level + 1, 1330 nilfs_btree_node_get_key(right, 0)); 1331 path[level + 1].bp_index--; 1332 1333 brelse(path[level].bp_sib_bh); 1334 path[level].bp_sib_bh = NULL; 1335 } 1336 1337 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1338 struct nilfs_btree_path *path, 1339 int level, __u64 *keyp, __u64 *ptrp) 1340 { 1341 struct nilfs_btree_node *node, *left; 1342 int n, ncblk; 1343 1344 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1345 1346 node = nilfs_btree_get_nonroot_node(path, level); 1347 left = nilfs_btree_get_sib_node(path, level); 1348 ncblk = nilfs_btree_nchildren_per_block(btree); 1349 1350 n = nilfs_btree_node_get_nchildren(node); 1351 1352 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1353 1354 if (!buffer_dirty(path[level].bp_sib_bh)) 1355 mark_buffer_dirty(path[level].bp_sib_bh); 1356 1357 nilfs_btnode_delete(path[level].bp_bh); 1358 path[level].bp_bh = path[level].bp_sib_bh; 1359 path[level].bp_sib_bh = NULL; 1360 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1361 } 1362 1363 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1364 struct nilfs_btree_path *path, 1365 int level, __u64 *keyp, __u64 *ptrp) 1366 { 1367 struct nilfs_btree_node *node, *right; 1368 int n, ncblk; 1369 1370 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1371 1372 node = nilfs_btree_get_nonroot_node(path, level); 1373 right = nilfs_btree_get_sib_node(path, level); 1374 ncblk = nilfs_btree_nchildren_per_block(btree); 1375 1376 n = nilfs_btree_node_get_nchildren(right); 1377 1378 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1379 1380 if (!buffer_dirty(path[level].bp_bh)) 1381 mark_buffer_dirty(path[level].bp_bh); 1382 1383 nilfs_btnode_delete(path[level].bp_sib_bh); 1384 path[level].bp_sib_bh = NULL; 1385 path[level + 1].bp_index++; 1386 } 1387 1388 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1389 struct nilfs_btree_path *path, 1390 int level, __u64 *keyp, __u64 *ptrp) 1391 { 1392 struct nilfs_btree_node *root, *child; 1393 int n, ncblk; 1394 1395 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1396 1397 root = nilfs_btree_get_root(btree); 1398 child = nilfs_btree_get_nonroot_node(path, level); 1399 ncblk = nilfs_btree_nchildren_per_block(btree); 1400 1401 nilfs_btree_node_delete(root, 0, NULL, NULL, 1402 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1403 nilfs_btree_node_set_level(root, level); 1404 n = nilfs_btree_node_get_nchildren(child); 1405 nilfs_btree_node_move_left(root, child, n, 1406 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1407 1408 nilfs_btnode_delete(path[level].bp_bh); 1409 path[level].bp_bh = NULL; 1410 } 1411 1412 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1413 struct nilfs_btree_path *path, 1414 int level, __u64 *keyp, __u64 *ptrp) 1415 { 1416 } 1417 1418 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1419 struct nilfs_btree_path *path, 1420 int *levelp, 1421 struct nilfs_bmap_stats *stats, 1422 struct inode *dat) 1423 { 1424 struct buffer_head *bh; 1425 struct nilfs_btree_node *node, *parent, *sib; 1426 __u64 sibptr; 1427 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1428 1429 ret = 0; 1430 stats->bs_nblocks = 0; 1431 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1432 ncblk = nilfs_btree_nchildren_per_block(btree); 1433 1434 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1435 level < nilfs_btree_height(btree) - 1; 1436 level++) { 1437 node = nilfs_btree_get_nonroot_node(path, level); 1438 path[level].bp_oldreq.bpr_ptr = 1439 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1440 ret = nilfs_bmap_prepare_end_ptr(btree, 1441 &path[level].bp_oldreq, dat); 1442 if (ret < 0) 1443 goto err_out_child_node; 1444 1445 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1446 path[level].bp_op = nilfs_btree_do_delete; 1447 stats->bs_nblocks++; 1448 goto out; 1449 } 1450 1451 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1452 pindex = path[level + 1].bp_index; 1453 dindex = pindex; 1454 1455 if (pindex > 0) { 1456 /* left sibling */ 1457 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1458 ncmax); 1459 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1460 if (ret < 0) 1461 goto err_out_curr_node; 1462 sib = (struct nilfs_btree_node *)bh->b_data; 1463 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1464 path[level].bp_sib_bh = bh; 1465 path[level].bp_op = nilfs_btree_borrow_left; 1466 stats->bs_nblocks++; 1467 goto out; 1468 } else { 1469 path[level].bp_sib_bh = bh; 1470 path[level].bp_op = nilfs_btree_concat_left; 1471 stats->bs_nblocks++; 1472 /* continue; */ 1473 } 1474 } else if (pindex < 1475 nilfs_btree_node_get_nchildren(parent) - 1) { 1476 /* right sibling */ 1477 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1478 ncmax); 1479 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1480 if (ret < 0) 1481 goto err_out_curr_node; 1482 sib = (struct nilfs_btree_node *)bh->b_data; 1483 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1484 path[level].bp_sib_bh = bh; 1485 path[level].bp_op = nilfs_btree_borrow_right; 1486 stats->bs_nblocks++; 1487 goto out; 1488 } else { 1489 path[level].bp_sib_bh = bh; 1490 path[level].bp_op = nilfs_btree_concat_right; 1491 stats->bs_nblocks++; 1492 /* 1493 * When merging right sibling node 1494 * into the current node, pointer to 1495 * the right sibling node must be 1496 * terminated instead. The adjustment 1497 * below is required for that. 1498 */ 1499 dindex = pindex + 1; 1500 /* continue; */ 1501 } 1502 } else { 1503 /* no siblings */ 1504 /* the only child of the root node */ 1505 WARN_ON(level != nilfs_btree_height(btree) - 2); 1506 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1507 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1508 path[level].bp_op = nilfs_btree_shrink; 1509 stats->bs_nblocks += 2; 1510 level++; 1511 path[level].bp_op = nilfs_btree_nop; 1512 goto shrink_root_child; 1513 } else { 1514 path[level].bp_op = nilfs_btree_do_delete; 1515 stats->bs_nblocks++; 1516 goto out; 1517 } 1518 } 1519 } 1520 1521 /* child of the root node is deleted */ 1522 path[level].bp_op = nilfs_btree_do_delete; 1523 stats->bs_nblocks++; 1524 1525 shrink_root_child: 1526 node = nilfs_btree_get_root(btree); 1527 path[level].bp_oldreq.bpr_ptr = 1528 nilfs_btree_node_get_ptr(node, dindex, 1529 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1530 1531 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1532 if (ret < 0) 1533 goto err_out_child_node; 1534 1535 /* success */ 1536 out: 1537 *levelp = level; 1538 return ret; 1539 1540 /* error */ 1541 err_out_curr_node: 1542 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1543 err_out_child_node: 1544 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1545 brelse(path[level].bp_sib_bh); 1546 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1547 } 1548 *levelp = level; 1549 stats->bs_nblocks = 0; 1550 return ret; 1551 } 1552 1553 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1554 struct nilfs_btree_path *path, 1555 int maxlevel, struct inode *dat) 1556 { 1557 int level; 1558 1559 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1560 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1561 path[level].bp_op(btree, path, level, NULL, NULL); 1562 } 1563 1564 if (!nilfs_bmap_dirty(btree)) 1565 nilfs_bmap_set_dirty(btree); 1566 } 1567 1568 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1569 1570 { 1571 struct nilfs_btree_path *path; 1572 struct nilfs_bmap_stats stats; 1573 struct inode *dat; 1574 int level, ret; 1575 1576 path = nilfs_btree_alloc_path(); 1577 if (path == NULL) 1578 return -ENOMEM; 1579 1580 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1581 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1582 if (ret < 0) 1583 goto out; 1584 1585 1586 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1587 1588 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1589 if (ret < 0) 1590 goto out; 1591 nilfs_btree_commit_delete(btree, path, level, dat); 1592 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1593 1594 out: 1595 nilfs_btree_free_path(path); 1596 return ret; 1597 } 1598 1599 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1600 __u64 *keyp) 1601 { 1602 struct nilfs_btree_path *path; 1603 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1604 int ret; 1605 1606 path = nilfs_btree_alloc_path(); 1607 if (!path) 1608 return -ENOMEM; 1609 1610 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1611 if (!ret) 1612 *keyp = start; 1613 else if (ret == -ENOENT) 1614 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1615 1616 nilfs_btree_free_path(path); 1617 return ret; 1618 } 1619 1620 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1621 { 1622 struct nilfs_btree_path *path; 1623 int ret; 1624 1625 path = nilfs_btree_alloc_path(); 1626 if (path == NULL) 1627 return -ENOMEM; 1628 1629 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1630 1631 nilfs_btree_free_path(path); 1632 1633 return ret; 1634 } 1635 1636 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1637 { 1638 struct buffer_head *bh; 1639 struct nilfs_btree_node *root, *node; 1640 __u64 maxkey, nextmaxkey; 1641 __u64 ptr; 1642 int nchildren, ret; 1643 1644 root = nilfs_btree_get_root(btree); 1645 switch (nilfs_btree_height(btree)) { 1646 case 2: 1647 bh = NULL; 1648 node = root; 1649 break; 1650 case 3: 1651 nchildren = nilfs_btree_node_get_nchildren(root); 1652 if (nchildren > 1) 1653 return 0; 1654 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1655 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1656 ret = nilfs_btree_get_block(btree, ptr, &bh); 1657 if (ret < 0) 1658 return ret; 1659 node = (struct nilfs_btree_node *)bh->b_data; 1660 break; 1661 default: 1662 return 0; 1663 } 1664 1665 nchildren = nilfs_btree_node_get_nchildren(node); 1666 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1667 nextmaxkey = (nchildren > 1) ? 1668 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1669 if (bh != NULL) 1670 brelse(bh); 1671 1672 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1673 } 1674 1675 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1676 __u64 *keys, __u64 *ptrs, int nitems) 1677 { 1678 struct buffer_head *bh; 1679 struct nilfs_btree_node *node, *root; 1680 __le64 *dkeys; 1681 __le64 *dptrs; 1682 __u64 ptr; 1683 int nchildren, ncmax, i, ret; 1684 1685 root = nilfs_btree_get_root(btree); 1686 switch (nilfs_btree_height(btree)) { 1687 case 2: 1688 bh = NULL; 1689 node = root; 1690 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1691 break; 1692 case 3: 1693 nchildren = nilfs_btree_node_get_nchildren(root); 1694 WARN_ON(nchildren > 1); 1695 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1696 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1697 ret = nilfs_btree_get_block(btree, ptr, &bh); 1698 if (ret < 0) 1699 return ret; 1700 node = (struct nilfs_btree_node *)bh->b_data; 1701 ncmax = nilfs_btree_nchildren_per_block(btree); 1702 break; 1703 default: 1704 node = NULL; 1705 return -EINVAL; 1706 } 1707 1708 nchildren = nilfs_btree_node_get_nchildren(node); 1709 if (nchildren < nitems) 1710 nitems = nchildren; 1711 dkeys = nilfs_btree_node_dkeys(node); 1712 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1713 for (i = 0; i < nitems; i++) { 1714 keys[i] = le64_to_cpu(dkeys[i]); 1715 ptrs[i] = le64_to_cpu(dptrs[i]); 1716 } 1717 1718 if (bh != NULL) 1719 brelse(bh); 1720 1721 return nitems; 1722 } 1723 1724 static int 1725 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1726 union nilfs_bmap_ptr_req *dreq, 1727 union nilfs_bmap_ptr_req *nreq, 1728 struct buffer_head **bhp, 1729 struct nilfs_bmap_stats *stats) 1730 { 1731 struct buffer_head *bh; 1732 struct inode *dat = NULL; 1733 int ret; 1734 1735 stats->bs_nblocks = 0; 1736 1737 /* for data */ 1738 /* cannot find near ptr */ 1739 if (NILFS_BMAP_USE_VBN(btree)) { 1740 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1741 dat = nilfs_bmap_get_dat(btree); 1742 } 1743 1744 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1745 if (ret < 0) 1746 return ret; 1747 1748 *bhp = NULL; 1749 stats->bs_nblocks++; 1750 if (nreq != NULL) { 1751 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1752 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1753 if (ret < 0) 1754 goto err_out_dreq; 1755 1756 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1757 if (ret < 0) 1758 goto err_out_nreq; 1759 1760 *bhp = bh; 1761 stats->bs_nblocks++; 1762 } 1763 1764 /* success */ 1765 return 0; 1766 1767 /* error */ 1768 err_out_nreq: 1769 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1770 err_out_dreq: 1771 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1772 stats->bs_nblocks = 0; 1773 return ret; 1774 1775 } 1776 1777 static void 1778 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1779 __u64 key, __u64 ptr, 1780 const __u64 *keys, const __u64 *ptrs, 1781 int n, 1782 union nilfs_bmap_ptr_req *dreq, 1783 union nilfs_bmap_ptr_req *nreq, 1784 struct buffer_head *bh) 1785 { 1786 struct nilfs_btree_node *node; 1787 struct inode *dat; 1788 __u64 tmpptr; 1789 int ncblk; 1790 1791 /* free resources */ 1792 if (btree->b_ops->bop_clear != NULL) 1793 btree->b_ops->bop_clear(btree); 1794 1795 /* ptr must be a pointer to a buffer head. */ 1796 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1797 1798 /* convert and insert */ 1799 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1800 __nilfs_btree_init(btree); 1801 if (nreq != NULL) { 1802 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1803 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1804 1805 /* create child node at level 1 */ 1806 node = (struct nilfs_btree_node *)bh->b_data; 1807 ncblk = nilfs_btree_nchildren_per_block(btree); 1808 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1809 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1810 if (!buffer_dirty(bh)) 1811 mark_buffer_dirty(bh); 1812 if (!nilfs_bmap_dirty(btree)) 1813 nilfs_bmap_set_dirty(btree); 1814 1815 brelse(bh); 1816 1817 /* create root node at level 2 */ 1818 node = nilfs_btree_get_root(btree); 1819 tmpptr = nreq->bpr_ptr; 1820 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1821 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1822 &keys[0], &tmpptr); 1823 } else { 1824 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1825 1826 /* create root node at level 1 */ 1827 node = nilfs_btree_get_root(btree); 1828 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1829 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1830 keys, ptrs); 1831 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1832 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1833 if (!nilfs_bmap_dirty(btree)) 1834 nilfs_bmap_set_dirty(btree); 1835 } 1836 1837 if (NILFS_BMAP_USE_VBN(btree)) 1838 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1839 } 1840 1841 /** 1842 * nilfs_btree_convert_and_insert - 1843 * @bmap: 1844 * @key: 1845 * @ptr: 1846 * @keys: 1847 * @ptrs: 1848 * @n: 1849 */ 1850 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1851 __u64 key, __u64 ptr, 1852 const __u64 *keys, const __u64 *ptrs, int n) 1853 { 1854 struct buffer_head *bh = NULL; 1855 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1856 struct nilfs_bmap_stats stats; 1857 int ret; 1858 1859 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1860 di = &dreq; 1861 ni = NULL; 1862 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1863 1 << btree->b_inode->i_blkbits)) { 1864 di = &dreq; 1865 ni = &nreq; 1866 } else { 1867 di = NULL; 1868 ni = NULL; 1869 BUG(); 1870 } 1871 1872 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1873 &stats); 1874 if (ret < 0) 1875 return ret; 1876 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1877 di, ni, bh); 1878 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1879 return 0; 1880 } 1881 1882 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1883 struct nilfs_btree_path *path, 1884 int level, 1885 struct buffer_head *bh) 1886 { 1887 while ((++level < nilfs_btree_height(btree) - 1) && 1888 !buffer_dirty(path[level].bp_bh)) 1889 mark_buffer_dirty(path[level].bp_bh); 1890 1891 return 0; 1892 } 1893 1894 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1895 struct nilfs_btree_path *path, 1896 int level, struct inode *dat) 1897 { 1898 struct nilfs_btree_node *parent; 1899 int ncmax, ret; 1900 1901 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1902 path[level].bp_oldreq.bpr_ptr = 1903 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1904 ncmax); 1905 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1906 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1907 &path[level].bp_newreq.bpr_req); 1908 if (ret < 0) 1909 return ret; 1910 1911 if (buffer_nilfs_node(path[level].bp_bh)) { 1912 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1913 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1914 path[level].bp_ctxt.bh = path[level].bp_bh; 1915 ret = nilfs_btnode_prepare_change_key( 1916 &NILFS_BMAP_I(btree)->i_btnode_cache, 1917 &path[level].bp_ctxt); 1918 if (ret < 0) { 1919 nilfs_dat_abort_update(dat, 1920 &path[level].bp_oldreq.bpr_req, 1921 &path[level].bp_newreq.bpr_req); 1922 return ret; 1923 } 1924 } 1925 1926 return 0; 1927 } 1928 1929 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1930 struct nilfs_btree_path *path, 1931 int level, struct inode *dat) 1932 { 1933 struct nilfs_btree_node *parent; 1934 int ncmax; 1935 1936 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1937 &path[level].bp_newreq.bpr_req, 1938 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1939 1940 if (buffer_nilfs_node(path[level].bp_bh)) { 1941 nilfs_btnode_commit_change_key( 1942 &NILFS_BMAP_I(btree)->i_btnode_cache, 1943 &path[level].bp_ctxt); 1944 path[level].bp_bh = path[level].bp_ctxt.bh; 1945 } 1946 set_buffer_nilfs_volatile(path[level].bp_bh); 1947 1948 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1949 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1950 path[level].bp_newreq.bpr_ptr, ncmax); 1951 } 1952 1953 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1954 struct nilfs_btree_path *path, 1955 int level, struct inode *dat) 1956 { 1957 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1958 &path[level].bp_newreq.bpr_req); 1959 if (buffer_nilfs_node(path[level].bp_bh)) 1960 nilfs_btnode_abort_change_key( 1961 &NILFS_BMAP_I(btree)->i_btnode_cache, 1962 &path[level].bp_ctxt); 1963 } 1964 1965 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1966 struct nilfs_btree_path *path, 1967 int minlevel, int *maxlevelp, 1968 struct inode *dat) 1969 { 1970 int level, ret; 1971 1972 level = minlevel; 1973 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1974 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1975 if (ret < 0) 1976 return ret; 1977 } 1978 while ((++level < nilfs_btree_height(btree) - 1) && 1979 !buffer_dirty(path[level].bp_bh)) { 1980 1981 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1982 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1983 if (ret < 0) 1984 goto out; 1985 } 1986 1987 /* success */ 1988 *maxlevelp = level - 1; 1989 return 0; 1990 1991 /* error */ 1992 out: 1993 while (--level > minlevel) 1994 nilfs_btree_abort_update_v(btree, path, level, dat); 1995 if (!buffer_nilfs_volatile(path[level].bp_bh)) 1996 nilfs_btree_abort_update_v(btree, path, level, dat); 1997 return ret; 1998 } 1999 2000 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2001 struct nilfs_btree_path *path, 2002 int minlevel, int maxlevel, 2003 struct buffer_head *bh, 2004 struct inode *dat) 2005 { 2006 int level; 2007 2008 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2009 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2010 2011 for (level = minlevel + 1; level <= maxlevel; level++) 2012 nilfs_btree_commit_update_v(btree, path, level, dat); 2013 } 2014 2015 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2016 struct nilfs_btree_path *path, 2017 int level, struct buffer_head *bh) 2018 { 2019 int maxlevel = 0, ret; 2020 struct nilfs_btree_node *parent; 2021 struct inode *dat = nilfs_bmap_get_dat(btree); 2022 __u64 ptr; 2023 int ncmax; 2024 2025 get_bh(bh); 2026 path[level].bp_bh = bh; 2027 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2028 dat); 2029 if (ret < 0) 2030 goto out; 2031 2032 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2033 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2034 ptr = nilfs_btree_node_get_ptr(parent, 2035 path[level + 1].bp_index, 2036 ncmax); 2037 ret = nilfs_dat_mark_dirty(dat, ptr); 2038 if (ret < 0) 2039 goto out; 2040 } 2041 2042 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2043 2044 out: 2045 brelse(path[level].bp_bh); 2046 path[level].bp_bh = NULL; 2047 return ret; 2048 } 2049 2050 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2051 struct buffer_head *bh) 2052 { 2053 struct nilfs_btree_path *path; 2054 struct nilfs_btree_node *node; 2055 __u64 key; 2056 int level, ret; 2057 2058 WARN_ON(!buffer_dirty(bh)); 2059 2060 path = nilfs_btree_alloc_path(); 2061 if (path == NULL) 2062 return -ENOMEM; 2063 2064 if (buffer_nilfs_node(bh)) { 2065 node = (struct nilfs_btree_node *)bh->b_data; 2066 key = nilfs_btree_node_get_key(node, 0); 2067 level = nilfs_btree_node_get_level(node); 2068 } else { 2069 key = nilfs_bmap_data_get_key(btree, bh); 2070 level = NILFS_BTREE_LEVEL_DATA; 2071 } 2072 2073 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2074 if (ret < 0) { 2075 if (unlikely(ret == -ENOENT)) 2076 printk(KERN_CRIT "%s: key = %llu, level == %d\n", 2077 __func__, (unsigned long long)key, level); 2078 goto out; 2079 } 2080 2081 ret = NILFS_BMAP_USE_VBN(btree) ? 2082 nilfs_btree_propagate_v(btree, path, level, bh) : 2083 nilfs_btree_propagate_p(btree, path, level, bh); 2084 2085 out: 2086 nilfs_btree_free_path(path); 2087 2088 return ret; 2089 } 2090 2091 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2092 struct buffer_head *bh) 2093 { 2094 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2095 } 2096 2097 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2098 struct list_head *lists, 2099 struct buffer_head *bh) 2100 { 2101 struct list_head *head; 2102 struct buffer_head *cbh; 2103 struct nilfs_btree_node *node, *cnode; 2104 __u64 key, ckey; 2105 int level; 2106 2107 get_bh(bh); 2108 node = (struct nilfs_btree_node *)bh->b_data; 2109 key = nilfs_btree_node_get_key(node, 0); 2110 level = nilfs_btree_node_get_level(node); 2111 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2112 level >= NILFS_BTREE_LEVEL_MAX) { 2113 dump_stack(); 2114 printk(KERN_WARNING 2115 "%s: invalid btree level: %d (key=%llu, ino=%lu, " 2116 "blocknr=%llu)\n", 2117 __func__, level, (unsigned long long)key, 2118 NILFS_BMAP_I(btree)->vfs_inode.i_ino, 2119 (unsigned long long)bh->b_blocknr); 2120 return; 2121 } 2122 2123 list_for_each(head, &lists[level]) { 2124 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2125 cnode = (struct nilfs_btree_node *)cbh->b_data; 2126 ckey = nilfs_btree_node_get_key(cnode, 0); 2127 if (key < ckey) 2128 break; 2129 } 2130 list_add_tail(&bh->b_assoc_buffers, head); 2131 } 2132 2133 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2134 struct list_head *listp) 2135 { 2136 struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; 2137 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2138 struct pagevec pvec; 2139 struct buffer_head *bh, *head; 2140 pgoff_t index = 0; 2141 int level, i; 2142 2143 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2144 level < NILFS_BTREE_LEVEL_MAX; 2145 level++) 2146 INIT_LIST_HEAD(&lists[level]); 2147 2148 pagevec_init(&pvec, 0); 2149 2150 while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, 2151 PAGEVEC_SIZE)) { 2152 for (i = 0; i < pagevec_count(&pvec); i++) { 2153 bh = head = page_buffers(pvec.pages[i]); 2154 do { 2155 if (buffer_dirty(bh)) 2156 nilfs_btree_add_dirty_buffer(btree, 2157 lists, bh); 2158 } while ((bh = bh->b_this_page) != head); 2159 } 2160 pagevec_release(&pvec); 2161 cond_resched(); 2162 } 2163 2164 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2165 level < NILFS_BTREE_LEVEL_MAX; 2166 level++) 2167 list_splice_tail(&lists[level], listp); 2168 } 2169 2170 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2171 struct nilfs_btree_path *path, 2172 int level, 2173 struct buffer_head **bh, 2174 sector_t blocknr, 2175 union nilfs_binfo *binfo) 2176 { 2177 struct nilfs_btree_node *parent; 2178 __u64 key; 2179 __u64 ptr; 2180 int ncmax, ret; 2181 2182 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2183 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2184 ncmax); 2185 if (buffer_nilfs_node(*bh)) { 2186 path[level].bp_ctxt.oldkey = ptr; 2187 path[level].bp_ctxt.newkey = blocknr; 2188 path[level].bp_ctxt.bh = *bh; 2189 ret = nilfs_btnode_prepare_change_key( 2190 &NILFS_BMAP_I(btree)->i_btnode_cache, 2191 &path[level].bp_ctxt); 2192 if (ret < 0) 2193 return ret; 2194 nilfs_btnode_commit_change_key( 2195 &NILFS_BMAP_I(btree)->i_btnode_cache, 2196 &path[level].bp_ctxt); 2197 *bh = path[level].bp_ctxt.bh; 2198 } 2199 2200 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2201 ncmax); 2202 2203 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2204 /* on-disk format */ 2205 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2206 binfo->bi_dat.bi_level = level; 2207 2208 return 0; 2209 } 2210 2211 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2212 struct nilfs_btree_path *path, 2213 int level, 2214 struct buffer_head **bh, 2215 sector_t blocknr, 2216 union nilfs_binfo *binfo) 2217 { 2218 struct nilfs_btree_node *parent; 2219 struct inode *dat = nilfs_bmap_get_dat(btree); 2220 __u64 key; 2221 __u64 ptr; 2222 union nilfs_bmap_ptr_req req; 2223 int ncmax, ret; 2224 2225 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2226 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2227 ncmax); 2228 req.bpr_ptr = ptr; 2229 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2230 if (ret < 0) 2231 return ret; 2232 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2233 2234 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2235 /* on-disk format */ 2236 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2237 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2238 2239 return 0; 2240 } 2241 2242 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2243 struct buffer_head **bh, 2244 sector_t blocknr, 2245 union nilfs_binfo *binfo) 2246 { 2247 struct nilfs_btree_path *path; 2248 struct nilfs_btree_node *node; 2249 __u64 key; 2250 int level, ret; 2251 2252 path = nilfs_btree_alloc_path(); 2253 if (path == NULL) 2254 return -ENOMEM; 2255 2256 if (buffer_nilfs_node(*bh)) { 2257 node = (struct nilfs_btree_node *)(*bh)->b_data; 2258 key = nilfs_btree_node_get_key(node, 0); 2259 level = nilfs_btree_node_get_level(node); 2260 } else { 2261 key = nilfs_bmap_data_get_key(btree, *bh); 2262 level = NILFS_BTREE_LEVEL_DATA; 2263 } 2264 2265 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2266 if (ret < 0) { 2267 WARN_ON(ret == -ENOENT); 2268 goto out; 2269 } 2270 2271 ret = NILFS_BMAP_USE_VBN(btree) ? 2272 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2273 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2274 2275 out: 2276 nilfs_btree_free_path(path); 2277 2278 return ret; 2279 } 2280 2281 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2282 struct buffer_head **bh, 2283 sector_t blocknr, 2284 union nilfs_binfo *binfo) 2285 { 2286 struct nilfs_btree_node *node; 2287 __u64 key; 2288 int ret; 2289 2290 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2291 blocknr); 2292 if (ret < 0) 2293 return ret; 2294 2295 if (buffer_nilfs_node(*bh)) { 2296 node = (struct nilfs_btree_node *)(*bh)->b_data; 2297 key = nilfs_btree_node_get_key(node, 0); 2298 } else 2299 key = nilfs_bmap_data_get_key(btree, *bh); 2300 2301 /* on-disk format */ 2302 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2303 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2304 2305 return 0; 2306 } 2307 2308 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2309 { 2310 struct buffer_head *bh; 2311 struct nilfs_btree_path *path; 2312 __u64 ptr; 2313 int ret; 2314 2315 path = nilfs_btree_alloc_path(); 2316 if (path == NULL) 2317 return -ENOMEM; 2318 2319 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2320 if (ret < 0) { 2321 WARN_ON(ret == -ENOENT); 2322 goto out; 2323 } 2324 ret = nilfs_btree_get_block(btree, ptr, &bh); 2325 if (ret < 0) { 2326 WARN_ON(ret == -ENOENT); 2327 goto out; 2328 } 2329 2330 if (!buffer_dirty(bh)) 2331 mark_buffer_dirty(bh); 2332 brelse(bh); 2333 if (!nilfs_bmap_dirty(btree)) 2334 nilfs_bmap_set_dirty(btree); 2335 2336 out: 2337 nilfs_btree_free_path(path); 2338 return ret; 2339 } 2340 2341 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2342 .bop_lookup = nilfs_btree_lookup, 2343 .bop_lookup_contig = nilfs_btree_lookup_contig, 2344 .bop_insert = nilfs_btree_insert, 2345 .bop_delete = nilfs_btree_delete, 2346 .bop_clear = NULL, 2347 2348 .bop_propagate = nilfs_btree_propagate, 2349 2350 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2351 2352 .bop_assign = nilfs_btree_assign, 2353 .bop_mark = nilfs_btree_mark, 2354 2355 .bop_seek_key = nilfs_btree_seek_key, 2356 .bop_last_key = nilfs_btree_last_key, 2357 2358 .bop_check_insert = NULL, 2359 .bop_check_delete = nilfs_btree_check_delete, 2360 .bop_gather_data = nilfs_btree_gather_data, 2361 }; 2362 2363 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2364 .bop_lookup = NULL, 2365 .bop_lookup_contig = NULL, 2366 .bop_insert = NULL, 2367 .bop_delete = NULL, 2368 .bop_clear = NULL, 2369 2370 .bop_propagate = nilfs_btree_propagate_gc, 2371 2372 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2373 2374 .bop_assign = nilfs_btree_assign_gc, 2375 .bop_mark = NULL, 2376 2377 .bop_seek_key = NULL, 2378 .bop_last_key = NULL, 2379 2380 .bop_check_insert = NULL, 2381 .bop_check_delete = NULL, 2382 .bop_gather_data = NULL, 2383 }; 2384 2385 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2386 { 2387 bmap->b_ops = &nilfs_btree_ops; 2388 bmap->b_nchildren_per_block = 2389 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2390 } 2391 2392 int nilfs_btree_init(struct nilfs_bmap *bmap) 2393 { 2394 int ret = 0; 2395 2396 __nilfs_btree_init(bmap); 2397 2398 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), 2399 bmap->b_inode->i_ino)) 2400 ret = -EIO; 2401 return ret; 2402 } 2403 2404 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2405 { 2406 bmap->b_ops = &nilfs_btree_ops_gc; 2407 bmap->b_nchildren_per_block = 2408 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2409 } 2410