1 /* 2 * btree.c - NILFS B-tree. 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * Written by Koji Sato. 17 */ 18 19 #include <linux/slab.h> 20 #include <linux/string.h> 21 #include <linux/errno.h> 22 #include <linux/pagevec.h> 23 #include "nilfs.h" 24 #include "page.h" 25 #include "btnode.h" 26 #include "btree.h" 27 #include "alloc.h" 28 #include "dat.h" 29 30 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 31 32 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 33 { 34 struct nilfs_btree_path *path; 35 int level = NILFS_BTREE_LEVEL_DATA; 36 37 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 38 if (path == NULL) 39 goto out; 40 41 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 42 path[level].bp_bh = NULL; 43 path[level].bp_sib_bh = NULL; 44 path[level].bp_index = 0; 45 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 46 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 47 path[level].bp_op = NULL; 48 } 49 50 out: 51 return path; 52 } 53 54 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 55 { 56 int level = NILFS_BTREE_LEVEL_DATA; 57 58 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 59 brelse(path[level].bp_bh); 60 61 kmem_cache_free(nilfs_btree_path_cache, path); 62 } 63 64 /* 65 * B-tree node operations 66 */ 67 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 68 __u64 ptr, struct buffer_head **bhp) 69 { 70 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 71 struct buffer_head *bh; 72 73 bh = nilfs_btnode_create_block(btnc, ptr); 74 if (!bh) 75 return -ENOMEM; 76 77 set_buffer_nilfs_volatile(bh); 78 *bhp = bh; 79 return 0; 80 } 81 82 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 83 { 84 return node->bn_flags; 85 } 86 87 static void 88 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 89 { 90 node->bn_flags = flags; 91 } 92 93 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 94 { 95 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 96 } 97 98 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 99 { 100 return node->bn_level; 101 } 102 103 static void 104 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 105 { 106 node->bn_level = level; 107 } 108 109 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 110 { 111 return le16_to_cpu(node->bn_nchildren); 112 } 113 114 static void 115 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 116 { 117 node->bn_nchildren = cpu_to_le16(nchildren); 118 } 119 120 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 121 { 122 return 1 << btree->b_inode->i_blkbits; 123 } 124 125 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 126 { 127 return btree->b_nchildren_per_block; 128 } 129 130 static __le64 * 131 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 132 { 133 return (__le64 *)((char *)(node + 1) + 134 (nilfs_btree_node_root(node) ? 135 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 136 } 137 138 static __le64 * 139 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 140 { 141 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 142 } 143 144 static __u64 145 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 146 { 147 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 148 } 149 150 static void 151 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 152 { 153 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 154 } 155 156 static __u64 157 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 158 int ncmax) 159 { 160 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 161 } 162 163 static void 164 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 165 int ncmax) 166 { 167 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 168 } 169 170 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 171 int level, int nchildren, int ncmax, 172 const __u64 *keys, const __u64 *ptrs) 173 { 174 __le64 *dkeys; 175 __le64 *dptrs; 176 int i; 177 178 nilfs_btree_node_set_flags(node, flags); 179 nilfs_btree_node_set_level(node, level); 180 nilfs_btree_node_set_nchildren(node, nchildren); 181 182 dkeys = nilfs_btree_node_dkeys(node); 183 dptrs = nilfs_btree_node_dptrs(node, ncmax); 184 for (i = 0; i < nchildren; i++) { 185 dkeys[i] = cpu_to_le64(keys[i]); 186 dptrs[i] = cpu_to_le64(ptrs[i]); 187 } 188 } 189 190 /* Assume the buffer heads corresponding to left and right are locked. */ 191 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 192 struct nilfs_btree_node *right, 193 int n, int lncmax, int rncmax) 194 { 195 __le64 *ldkeys, *rdkeys; 196 __le64 *ldptrs, *rdptrs; 197 int lnchildren, rnchildren; 198 199 ldkeys = nilfs_btree_node_dkeys(left); 200 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 201 lnchildren = nilfs_btree_node_get_nchildren(left); 202 203 rdkeys = nilfs_btree_node_dkeys(right); 204 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 205 rnchildren = nilfs_btree_node_get_nchildren(right); 206 207 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 208 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 209 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 210 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 211 212 lnchildren += n; 213 rnchildren -= n; 214 nilfs_btree_node_set_nchildren(left, lnchildren); 215 nilfs_btree_node_set_nchildren(right, rnchildren); 216 } 217 218 /* Assume that the buffer heads corresponding to left and right are locked. */ 219 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 220 struct nilfs_btree_node *right, 221 int n, int lncmax, int rncmax) 222 { 223 __le64 *ldkeys, *rdkeys; 224 __le64 *ldptrs, *rdptrs; 225 int lnchildren, rnchildren; 226 227 ldkeys = nilfs_btree_node_dkeys(left); 228 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 229 lnchildren = nilfs_btree_node_get_nchildren(left); 230 231 rdkeys = nilfs_btree_node_dkeys(right); 232 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 233 rnchildren = nilfs_btree_node_get_nchildren(right); 234 235 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 236 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 237 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 238 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 239 240 lnchildren -= n; 241 rnchildren += n; 242 nilfs_btree_node_set_nchildren(left, lnchildren); 243 nilfs_btree_node_set_nchildren(right, rnchildren); 244 } 245 246 /* Assume that the buffer head corresponding to node is locked. */ 247 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 248 __u64 key, __u64 ptr, int ncmax) 249 { 250 __le64 *dkeys; 251 __le64 *dptrs; 252 int nchildren; 253 254 dkeys = nilfs_btree_node_dkeys(node); 255 dptrs = nilfs_btree_node_dptrs(node, ncmax); 256 nchildren = nilfs_btree_node_get_nchildren(node); 257 if (index < nchildren) { 258 memmove(dkeys + index + 1, dkeys + index, 259 (nchildren - index) * sizeof(*dkeys)); 260 memmove(dptrs + index + 1, dptrs + index, 261 (nchildren - index) * sizeof(*dptrs)); 262 } 263 dkeys[index] = cpu_to_le64(key); 264 dptrs[index] = cpu_to_le64(ptr); 265 nchildren++; 266 nilfs_btree_node_set_nchildren(node, nchildren); 267 } 268 269 /* Assume that the buffer head corresponding to node is locked. */ 270 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 271 __u64 *keyp, __u64 *ptrp, int ncmax) 272 { 273 __u64 key; 274 __u64 ptr; 275 __le64 *dkeys; 276 __le64 *dptrs; 277 int nchildren; 278 279 dkeys = nilfs_btree_node_dkeys(node); 280 dptrs = nilfs_btree_node_dptrs(node, ncmax); 281 key = le64_to_cpu(dkeys[index]); 282 ptr = le64_to_cpu(dptrs[index]); 283 nchildren = nilfs_btree_node_get_nchildren(node); 284 if (keyp != NULL) 285 *keyp = key; 286 if (ptrp != NULL) 287 *ptrp = ptr; 288 289 if (index < nchildren - 1) { 290 memmove(dkeys + index, dkeys + index + 1, 291 (nchildren - index - 1) * sizeof(*dkeys)); 292 memmove(dptrs + index, dptrs + index + 1, 293 (nchildren - index - 1) * sizeof(*dptrs)); 294 } 295 nchildren--; 296 nilfs_btree_node_set_nchildren(node, nchildren); 297 } 298 299 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 300 __u64 key, int *indexp) 301 { 302 __u64 nkey; 303 int index, low, high, s; 304 305 /* binary search */ 306 low = 0; 307 high = nilfs_btree_node_get_nchildren(node) - 1; 308 index = 0; 309 s = 0; 310 while (low <= high) { 311 index = (low + high) / 2; 312 nkey = nilfs_btree_node_get_key(node, index); 313 if (nkey == key) { 314 s = 0; 315 goto out; 316 } else if (nkey < key) { 317 low = index + 1; 318 s = -1; 319 } else { 320 high = index - 1; 321 s = 1; 322 } 323 } 324 325 /* adjust index */ 326 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 327 if (s > 0 && index > 0) 328 index--; 329 } else if (s < 0) 330 index++; 331 332 out: 333 *indexp = index; 334 335 return s == 0; 336 } 337 338 /** 339 * nilfs_btree_node_broken - verify consistency of btree node 340 * @node: btree node block to be examined 341 * @size: node size (in bytes) 342 * @blocknr: block number 343 * 344 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 345 */ 346 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 347 size_t size, sector_t blocknr) 348 { 349 int level, flags, nchildren; 350 int ret = 0; 351 352 level = nilfs_btree_node_get_level(node); 353 flags = nilfs_btree_node_get_flags(node); 354 nchildren = nilfs_btree_node_get_nchildren(node); 355 356 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 357 level >= NILFS_BTREE_LEVEL_MAX || 358 (flags & NILFS_BTREE_NODE_ROOT) || 359 nchildren < 0 || 360 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 361 printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): " 362 "level = %d, flags = 0x%x, nchildren = %d\n", 363 (unsigned long long)blocknr, level, flags, nchildren); 364 ret = 1; 365 } 366 return ret; 367 } 368 369 /** 370 * nilfs_btree_root_broken - verify consistency of btree root node 371 * @node: btree root node to be examined 372 * @ino: inode number 373 * 374 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 375 */ 376 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 377 unsigned long ino) 378 { 379 int level, flags, nchildren; 380 int ret = 0; 381 382 level = nilfs_btree_node_get_level(node); 383 flags = nilfs_btree_node_get_flags(node); 384 nchildren = nilfs_btree_node_get_nchildren(node); 385 386 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 387 level >= NILFS_BTREE_LEVEL_MAX || 388 nchildren < 0 || 389 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 390 pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", 391 ino, level, flags, nchildren); 392 ret = 1; 393 } 394 return ret; 395 } 396 397 int nilfs_btree_broken_node_block(struct buffer_head *bh) 398 { 399 int ret; 400 401 if (buffer_nilfs_checked(bh)) 402 return 0; 403 404 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 405 bh->b_size, bh->b_blocknr); 406 if (likely(!ret)) 407 set_buffer_nilfs_checked(bh); 408 return ret; 409 } 410 411 static struct nilfs_btree_node * 412 nilfs_btree_get_root(const struct nilfs_bmap *btree) 413 { 414 return (struct nilfs_btree_node *)btree->b_u.u_data; 415 } 416 417 static struct nilfs_btree_node * 418 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 419 { 420 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 421 } 422 423 static struct nilfs_btree_node * 424 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 425 { 426 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 427 } 428 429 static int nilfs_btree_height(const struct nilfs_bmap *btree) 430 { 431 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 432 } 433 434 static struct nilfs_btree_node * 435 nilfs_btree_get_node(const struct nilfs_bmap *btree, 436 const struct nilfs_btree_path *path, 437 int level, int *ncmaxp) 438 { 439 struct nilfs_btree_node *node; 440 441 if (level == nilfs_btree_height(btree) - 1) { 442 node = nilfs_btree_get_root(btree); 443 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 444 } else { 445 node = nilfs_btree_get_nonroot_node(path, level); 446 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 447 } 448 return node; 449 } 450 451 static int 452 nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) 453 { 454 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 455 dump_stack(); 456 printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n", 457 nilfs_btree_node_get_level(node), level); 458 return 1; 459 } 460 return 0; 461 } 462 463 struct nilfs_btree_readahead_info { 464 struct nilfs_btree_node *node; /* parent node */ 465 int max_ra_blocks; /* max nof blocks to read ahead */ 466 int index; /* current index on the parent node */ 467 int ncmax; /* nof children in the parent node */ 468 }; 469 470 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 471 struct buffer_head **bhp, 472 const struct nilfs_btree_readahead_info *ra) 473 { 474 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 475 struct buffer_head *bh, *ra_bh; 476 sector_t submit_ptr = 0; 477 int ret; 478 479 ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, 480 &submit_ptr); 481 if (ret) { 482 if (ret != -EEXIST) 483 return ret; 484 goto out_check; 485 } 486 487 if (ra) { 488 int i, n; 489 __u64 ptr2; 490 491 /* read ahead sibling nodes */ 492 for (n = ra->max_ra_blocks, i = ra->index + 1; 493 n > 0 && i < ra->ncmax; n--, i++) { 494 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 495 496 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, 497 REQ_OP_READ, REQ_RAHEAD, 498 &ra_bh, &submit_ptr); 499 if (likely(!ret || ret == -EEXIST)) 500 brelse(ra_bh); 501 else if (ret != -EBUSY) 502 break; 503 if (!buffer_locked(bh)) 504 goto out_no_wait; 505 } 506 } 507 508 wait_on_buffer(bh); 509 510 out_no_wait: 511 if (!buffer_uptodate(bh)) { 512 brelse(bh); 513 return -EIO; 514 } 515 516 out_check: 517 if (nilfs_btree_broken_node_block(bh)) { 518 clear_buffer_uptodate(bh); 519 brelse(bh); 520 return -EINVAL; 521 } 522 523 *bhp = bh; 524 return 0; 525 } 526 527 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 528 struct buffer_head **bhp) 529 { 530 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 531 } 532 533 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 534 struct nilfs_btree_path *path, 535 __u64 key, __u64 *ptrp, int minlevel, 536 int readahead) 537 { 538 struct nilfs_btree_node *node; 539 struct nilfs_btree_readahead_info p, *ra; 540 __u64 ptr; 541 int level, index, found, ncmax, ret; 542 543 node = nilfs_btree_get_root(btree); 544 level = nilfs_btree_node_get_level(node); 545 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 546 return -ENOENT; 547 548 found = nilfs_btree_node_lookup(node, key, &index); 549 ptr = nilfs_btree_node_get_ptr(node, index, 550 NILFS_BTREE_ROOT_NCHILDREN_MAX); 551 path[level].bp_bh = NULL; 552 path[level].bp_index = index; 553 554 ncmax = nilfs_btree_nchildren_per_block(btree); 555 556 while (--level >= minlevel) { 557 ra = NULL; 558 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 559 p.node = nilfs_btree_get_node(btree, path, level + 1, 560 &p.ncmax); 561 p.index = index; 562 p.max_ra_blocks = 7; 563 ra = &p; 564 } 565 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 566 ra); 567 if (ret < 0) 568 return ret; 569 570 node = nilfs_btree_get_nonroot_node(path, level); 571 if (nilfs_btree_bad_node(node, level)) 572 return -EINVAL; 573 if (!found) 574 found = nilfs_btree_node_lookup(node, key, &index); 575 else 576 index = 0; 577 if (index < ncmax) { 578 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 579 } else { 580 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 581 /* insert */ 582 ptr = NILFS_BMAP_INVALID_PTR; 583 } 584 path[level].bp_index = index; 585 } 586 if (!found) 587 return -ENOENT; 588 589 if (ptrp != NULL) 590 *ptrp = ptr; 591 592 return 0; 593 } 594 595 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 596 struct nilfs_btree_path *path, 597 __u64 *keyp, __u64 *ptrp) 598 { 599 struct nilfs_btree_node *node; 600 __u64 ptr; 601 int index, level, ncmax, ret; 602 603 node = nilfs_btree_get_root(btree); 604 index = nilfs_btree_node_get_nchildren(node) - 1; 605 if (index < 0) 606 return -ENOENT; 607 level = nilfs_btree_node_get_level(node); 608 ptr = nilfs_btree_node_get_ptr(node, index, 609 NILFS_BTREE_ROOT_NCHILDREN_MAX); 610 path[level].bp_bh = NULL; 611 path[level].bp_index = index; 612 ncmax = nilfs_btree_nchildren_per_block(btree); 613 614 for (level--; level > 0; level--) { 615 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 616 if (ret < 0) 617 return ret; 618 node = nilfs_btree_get_nonroot_node(path, level); 619 if (nilfs_btree_bad_node(node, level)) 620 return -EINVAL; 621 index = nilfs_btree_node_get_nchildren(node) - 1; 622 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 623 path[level].bp_index = index; 624 } 625 626 if (keyp != NULL) 627 *keyp = nilfs_btree_node_get_key(node, index); 628 if (ptrp != NULL) 629 *ptrp = ptr; 630 631 return 0; 632 } 633 634 /** 635 * nilfs_btree_get_next_key - get next valid key from btree path array 636 * @btree: bmap struct of btree 637 * @path: array of nilfs_btree_path struct 638 * @minlevel: start level 639 * @nextkey: place to store the next valid key 640 * 641 * Return Value: If a next key was found, 0 is returned. Otherwise, 642 * -ENOENT is returned. 643 */ 644 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 645 const struct nilfs_btree_path *path, 646 int minlevel, __u64 *nextkey) 647 { 648 struct nilfs_btree_node *node; 649 int maxlevel = nilfs_btree_height(btree) - 1; 650 int index, next_adj, level; 651 652 /* Next index is already set to bp_index for leaf nodes. */ 653 next_adj = 0; 654 for (level = minlevel; level <= maxlevel; level++) { 655 if (level == maxlevel) 656 node = nilfs_btree_get_root(btree); 657 else 658 node = nilfs_btree_get_nonroot_node(path, level); 659 660 index = path[level].bp_index + next_adj; 661 if (index < nilfs_btree_node_get_nchildren(node)) { 662 /* Next key is in this node */ 663 *nextkey = nilfs_btree_node_get_key(node, index); 664 return 0; 665 } 666 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 667 next_adj = 1; 668 } 669 return -ENOENT; 670 } 671 672 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 673 __u64 key, int level, __u64 *ptrp) 674 { 675 struct nilfs_btree_path *path; 676 int ret; 677 678 path = nilfs_btree_alloc_path(); 679 if (path == NULL) 680 return -ENOMEM; 681 682 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 683 684 nilfs_btree_free_path(path); 685 686 return ret; 687 } 688 689 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 690 __u64 key, __u64 *ptrp, 691 unsigned int maxblocks) 692 { 693 struct nilfs_btree_path *path; 694 struct nilfs_btree_node *node; 695 struct inode *dat = NULL; 696 __u64 ptr, ptr2; 697 sector_t blocknr; 698 int level = NILFS_BTREE_LEVEL_NODE_MIN; 699 int ret, cnt, index, maxlevel, ncmax; 700 struct nilfs_btree_readahead_info p; 701 702 path = nilfs_btree_alloc_path(); 703 if (path == NULL) 704 return -ENOMEM; 705 706 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 707 if (ret < 0) 708 goto out; 709 710 if (NILFS_BMAP_USE_VBN(btree)) { 711 dat = nilfs_bmap_get_dat(btree); 712 ret = nilfs_dat_translate(dat, ptr, &blocknr); 713 if (ret < 0) 714 goto out; 715 ptr = blocknr; 716 } 717 cnt = 1; 718 if (cnt == maxblocks) 719 goto end; 720 721 maxlevel = nilfs_btree_height(btree) - 1; 722 node = nilfs_btree_get_node(btree, path, level, &ncmax); 723 index = path[level].bp_index + 1; 724 for (;;) { 725 while (index < nilfs_btree_node_get_nchildren(node)) { 726 if (nilfs_btree_node_get_key(node, index) != 727 key + cnt) 728 goto end; 729 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 730 if (dat) { 731 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 732 if (ret < 0) 733 goto out; 734 ptr2 = blocknr; 735 } 736 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 737 goto end; 738 index++; 739 continue; 740 } 741 if (level == maxlevel) 742 break; 743 744 /* look-up right sibling node */ 745 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 746 p.index = path[level + 1].bp_index + 1; 747 p.max_ra_blocks = 7; 748 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 749 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 750 break; 751 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 752 path[level + 1].bp_index = p.index; 753 754 brelse(path[level].bp_bh); 755 path[level].bp_bh = NULL; 756 757 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 758 &p); 759 if (ret < 0) 760 goto out; 761 node = nilfs_btree_get_nonroot_node(path, level); 762 ncmax = nilfs_btree_nchildren_per_block(btree); 763 index = 0; 764 path[level].bp_index = index; 765 } 766 end: 767 *ptrp = ptr; 768 ret = cnt; 769 out: 770 nilfs_btree_free_path(path); 771 return ret; 772 } 773 774 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 775 struct nilfs_btree_path *path, 776 int level, __u64 key) 777 { 778 if (level < nilfs_btree_height(btree) - 1) { 779 do { 780 nilfs_btree_node_set_key( 781 nilfs_btree_get_nonroot_node(path, level), 782 path[level].bp_index, key); 783 if (!buffer_dirty(path[level].bp_bh)) 784 mark_buffer_dirty(path[level].bp_bh); 785 } while ((path[level].bp_index == 0) && 786 (++level < nilfs_btree_height(btree) - 1)); 787 } 788 789 /* root */ 790 if (level == nilfs_btree_height(btree) - 1) { 791 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 792 path[level].bp_index, key); 793 } 794 } 795 796 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 797 struct nilfs_btree_path *path, 798 int level, __u64 *keyp, __u64 *ptrp) 799 { 800 struct nilfs_btree_node *node; 801 int ncblk; 802 803 if (level < nilfs_btree_height(btree) - 1) { 804 node = nilfs_btree_get_nonroot_node(path, level); 805 ncblk = nilfs_btree_nchildren_per_block(btree); 806 nilfs_btree_node_insert(node, path[level].bp_index, 807 *keyp, *ptrp, ncblk); 808 if (!buffer_dirty(path[level].bp_bh)) 809 mark_buffer_dirty(path[level].bp_bh); 810 811 if (path[level].bp_index == 0) 812 nilfs_btree_promote_key(btree, path, level + 1, 813 nilfs_btree_node_get_key(node, 814 0)); 815 } else { 816 node = nilfs_btree_get_root(btree); 817 nilfs_btree_node_insert(node, path[level].bp_index, 818 *keyp, *ptrp, 819 NILFS_BTREE_ROOT_NCHILDREN_MAX); 820 } 821 } 822 823 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 824 struct nilfs_btree_path *path, 825 int level, __u64 *keyp, __u64 *ptrp) 826 { 827 struct nilfs_btree_node *node, *left; 828 int nchildren, lnchildren, n, move, ncblk; 829 830 node = nilfs_btree_get_nonroot_node(path, level); 831 left = nilfs_btree_get_sib_node(path, level); 832 nchildren = nilfs_btree_node_get_nchildren(node); 833 lnchildren = nilfs_btree_node_get_nchildren(left); 834 ncblk = nilfs_btree_nchildren_per_block(btree); 835 move = 0; 836 837 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 838 if (n > path[level].bp_index) { 839 /* move insert point */ 840 n--; 841 move = 1; 842 } 843 844 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 845 846 if (!buffer_dirty(path[level].bp_bh)) 847 mark_buffer_dirty(path[level].bp_bh); 848 if (!buffer_dirty(path[level].bp_sib_bh)) 849 mark_buffer_dirty(path[level].bp_sib_bh); 850 851 nilfs_btree_promote_key(btree, path, level + 1, 852 nilfs_btree_node_get_key(node, 0)); 853 854 if (move) { 855 brelse(path[level].bp_bh); 856 path[level].bp_bh = path[level].bp_sib_bh; 857 path[level].bp_sib_bh = NULL; 858 path[level].bp_index += lnchildren; 859 path[level + 1].bp_index--; 860 } else { 861 brelse(path[level].bp_sib_bh); 862 path[level].bp_sib_bh = NULL; 863 path[level].bp_index -= n; 864 } 865 866 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 867 } 868 869 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 870 struct nilfs_btree_path *path, 871 int level, __u64 *keyp, __u64 *ptrp) 872 { 873 struct nilfs_btree_node *node, *right; 874 int nchildren, rnchildren, n, move, ncblk; 875 876 node = nilfs_btree_get_nonroot_node(path, level); 877 right = nilfs_btree_get_sib_node(path, level); 878 nchildren = nilfs_btree_node_get_nchildren(node); 879 rnchildren = nilfs_btree_node_get_nchildren(right); 880 ncblk = nilfs_btree_nchildren_per_block(btree); 881 move = 0; 882 883 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 884 if (n > nchildren - path[level].bp_index) { 885 /* move insert point */ 886 n--; 887 move = 1; 888 } 889 890 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 891 892 if (!buffer_dirty(path[level].bp_bh)) 893 mark_buffer_dirty(path[level].bp_bh); 894 if (!buffer_dirty(path[level].bp_sib_bh)) 895 mark_buffer_dirty(path[level].bp_sib_bh); 896 897 path[level + 1].bp_index++; 898 nilfs_btree_promote_key(btree, path, level + 1, 899 nilfs_btree_node_get_key(right, 0)); 900 path[level + 1].bp_index--; 901 902 if (move) { 903 brelse(path[level].bp_bh); 904 path[level].bp_bh = path[level].bp_sib_bh; 905 path[level].bp_sib_bh = NULL; 906 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 907 path[level + 1].bp_index++; 908 } else { 909 brelse(path[level].bp_sib_bh); 910 path[level].bp_sib_bh = NULL; 911 } 912 913 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 914 } 915 916 static void nilfs_btree_split(struct nilfs_bmap *btree, 917 struct nilfs_btree_path *path, 918 int level, __u64 *keyp, __u64 *ptrp) 919 { 920 struct nilfs_btree_node *node, *right; 921 int nchildren, n, move, ncblk; 922 923 node = nilfs_btree_get_nonroot_node(path, level); 924 right = nilfs_btree_get_sib_node(path, level); 925 nchildren = nilfs_btree_node_get_nchildren(node); 926 ncblk = nilfs_btree_nchildren_per_block(btree); 927 move = 0; 928 929 n = (nchildren + 1) / 2; 930 if (n > nchildren - path[level].bp_index) { 931 n--; 932 move = 1; 933 } 934 935 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 936 937 if (!buffer_dirty(path[level].bp_bh)) 938 mark_buffer_dirty(path[level].bp_bh); 939 if (!buffer_dirty(path[level].bp_sib_bh)) 940 mark_buffer_dirty(path[level].bp_sib_bh); 941 942 if (move) { 943 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 944 nilfs_btree_node_insert(right, path[level].bp_index, 945 *keyp, *ptrp, ncblk); 946 947 *keyp = nilfs_btree_node_get_key(right, 0); 948 *ptrp = path[level].bp_newreq.bpr_ptr; 949 950 brelse(path[level].bp_bh); 951 path[level].bp_bh = path[level].bp_sib_bh; 952 path[level].bp_sib_bh = NULL; 953 } else { 954 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 955 956 *keyp = nilfs_btree_node_get_key(right, 0); 957 *ptrp = path[level].bp_newreq.bpr_ptr; 958 959 brelse(path[level].bp_sib_bh); 960 path[level].bp_sib_bh = NULL; 961 } 962 963 path[level + 1].bp_index++; 964 } 965 966 static void nilfs_btree_grow(struct nilfs_bmap *btree, 967 struct nilfs_btree_path *path, 968 int level, __u64 *keyp, __u64 *ptrp) 969 { 970 struct nilfs_btree_node *root, *child; 971 int n, ncblk; 972 973 root = nilfs_btree_get_root(btree); 974 child = nilfs_btree_get_sib_node(path, level); 975 ncblk = nilfs_btree_nchildren_per_block(btree); 976 977 n = nilfs_btree_node_get_nchildren(root); 978 979 nilfs_btree_node_move_right(root, child, n, 980 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 981 nilfs_btree_node_set_level(root, level + 1); 982 983 if (!buffer_dirty(path[level].bp_sib_bh)) 984 mark_buffer_dirty(path[level].bp_sib_bh); 985 986 path[level].bp_bh = path[level].bp_sib_bh; 987 path[level].bp_sib_bh = NULL; 988 989 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 990 991 *keyp = nilfs_btree_node_get_key(child, 0); 992 *ptrp = path[level].bp_newreq.bpr_ptr; 993 } 994 995 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 996 const struct nilfs_btree_path *path) 997 { 998 struct nilfs_btree_node *node; 999 int level, ncmax; 1000 1001 if (path == NULL) 1002 return NILFS_BMAP_INVALID_PTR; 1003 1004 /* left sibling */ 1005 level = NILFS_BTREE_LEVEL_NODE_MIN; 1006 if (path[level].bp_index > 0) { 1007 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1008 return nilfs_btree_node_get_ptr(node, 1009 path[level].bp_index - 1, 1010 ncmax); 1011 } 1012 1013 /* parent */ 1014 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1015 if (level <= nilfs_btree_height(btree) - 1) { 1016 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1017 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1018 ncmax); 1019 } 1020 1021 return NILFS_BMAP_INVALID_PTR; 1022 } 1023 1024 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1025 const struct nilfs_btree_path *path, 1026 __u64 key) 1027 { 1028 __u64 ptr; 1029 1030 ptr = nilfs_bmap_find_target_seq(btree, key); 1031 if (ptr != NILFS_BMAP_INVALID_PTR) 1032 /* sequential access */ 1033 return ptr; 1034 1035 ptr = nilfs_btree_find_near(btree, path); 1036 if (ptr != NILFS_BMAP_INVALID_PTR) 1037 /* near */ 1038 return ptr; 1039 1040 /* block group */ 1041 return nilfs_bmap_find_target_in_group(btree); 1042 } 1043 1044 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1045 struct nilfs_btree_path *path, 1046 int *levelp, __u64 key, __u64 ptr, 1047 struct nilfs_bmap_stats *stats) 1048 { 1049 struct buffer_head *bh; 1050 struct nilfs_btree_node *node, *parent, *sib; 1051 __u64 sibptr; 1052 int pindex, level, ncmax, ncblk, ret; 1053 struct inode *dat = NULL; 1054 1055 stats->bs_nblocks = 0; 1056 level = NILFS_BTREE_LEVEL_DATA; 1057 1058 /* allocate a new ptr for data block */ 1059 if (NILFS_BMAP_USE_VBN(btree)) { 1060 path[level].bp_newreq.bpr_ptr = 1061 nilfs_btree_find_target_v(btree, path, key); 1062 dat = nilfs_bmap_get_dat(btree); 1063 } 1064 1065 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1066 if (ret < 0) 1067 goto err_out_data; 1068 1069 ncblk = nilfs_btree_nchildren_per_block(btree); 1070 1071 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1072 level < nilfs_btree_height(btree) - 1; 1073 level++) { 1074 node = nilfs_btree_get_nonroot_node(path, level); 1075 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1076 path[level].bp_op = nilfs_btree_do_insert; 1077 stats->bs_nblocks++; 1078 goto out; 1079 } 1080 1081 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1082 pindex = path[level + 1].bp_index; 1083 1084 /* left sibling */ 1085 if (pindex > 0) { 1086 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1087 ncmax); 1088 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1089 if (ret < 0) 1090 goto err_out_child_node; 1091 sib = (struct nilfs_btree_node *)bh->b_data; 1092 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1093 path[level].bp_sib_bh = bh; 1094 path[level].bp_op = nilfs_btree_carry_left; 1095 stats->bs_nblocks++; 1096 goto out; 1097 } else { 1098 brelse(bh); 1099 } 1100 } 1101 1102 /* right sibling */ 1103 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1104 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1105 ncmax); 1106 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1107 if (ret < 0) 1108 goto err_out_child_node; 1109 sib = (struct nilfs_btree_node *)bh->b_data; 1110 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1111 path[level].bp_sib_bh = bh; 1112 path[level].bp_op = nilfs_btree_carry_right; 1113 stats->bs_nblocks++; 1114 goto out; 1115 } else { 1116 brelse(bh); 1117 } 1118 } 1119 1120 /* split */ 1121 path[level].bp_newreq.bpr_ptr = 1122 path[level - 1].bp_newreq.bpr_ptr + 1; 1123 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1124 &path[level].bp_newreq, dat); 1125 if (ret < 0) 1126 goto err_out_child_node; 1127 ret = nilfs_btree_get_new_block(btree, 1128 path[level].bp_newreq.bpr_ptr, 1129 &bh); 1130 if (ret < 0) 1131 goto err_out_curr_node; 1132 1133 stats->bs_nblocks++; 1134 1135 sib = (struct nilfs_btree_node *)bh->b_data; 1136 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1137 path[level].bp_sib_bh = bh; 1138 path[level].bp_op = nilfs_btree_split; 1139 } 1140 1141 /* root */ 1142 node = nilfs_btree_get_root(btree); 1143 if (nilfs_btree_node_get_nchildren(node) < 1144 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1145 path[level].bp_op = nilfs_btree_do_insert; 1146 stats->bs_nblocks++; 1147 goto out; 1148 } 1149 1150 /* grow */ 1151 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1152 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1153 if (ret < 0) 1154 goto err_out_child_node; 1155 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1156 &bh); 1157 if (ret < 0) 1158 goto err_out_curr_node; 1159 1160 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1161 0, level, 0, ncblk, NULL, NULL); 1162 path[level].bp_sib_bh = bh; 1163 path[level].bp_op = nilfs_btree_grow; 1164 1165 level++; 1166 path[level].bp_op = nilfs_btree_do_insert; 1167 1168 /* a newly-created node block and a data block are added */ 1169 stats->bs_nblocks += 2; 1170 1171 /* success */ 1172 out: 1173 *levelp = level; 1174 return ret; 1175 1176 /* error */ 1177 err_out_curr_node: 1178 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1179 err_out_child_node: 1180 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1181 nilfs_btnode_delete(path[level].bp_sib_bh); 1182 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1183 1184 } 1185 1186 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1187 err_out_data: 1188 *levelp = level; 1189 stats->bs_nblocks = 0; 1190 return ret; 1191 } 1192 1193 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1194 struct nilfs_btree_path *path, 1195 int maxlevel, __u64 key, __u64 ptr) 1196 { 1197 struct inode *dat = NULL; 1198 int level; 1199 1200 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1201 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1202 if (NILFS_BMAP_USE_VBN(btree)) { 1203 nilfs_bmap_set_target_v(btree, key, ptr); 1204 dat = nilfs_bmap_get_dat(btree); 1205 } 1206 1207 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1208 nilfs_bmap_commit_alloc_ptr(btree, 1209 &path[level - 1].bp_newreq, dat); 1210 path[level].bp_op(btree, path, level, &key, &ptr); 1211 } 1212 1213 if (!nilfs_bmap_dirty(btree)) 1214 nilfs_bmap_set_dirty(btree); 1215 } 1216 1217 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1218 { 1219 struct nilfs_btree_path *path; 1220 struct nilfs_bmap_stats stats; 1221 int level, ret; 1222 1223 path = nilfs_btree_alloc_path(); 1224 if (path == NULL) 1225 return -ENOMEM; 1226 1227 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1228 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1229 if (ret != -ENOENT) { 1230 if (ret == 0) 1231 ret = -EEXIST; 1232 goto out; 1233 } 1234 1235 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1236 if (ret < 0) 1237 goto out; 1238 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1239 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1240 1241 out: 1242 nilfs_btree_free_path(path); 1243 return ret; 1244 } 1245 1246 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1247 struct nilfs_btree_path *path, 1248 int level, __u64 *keyp, __u64 *ptrp) 1249 { 1250 struct nilfs_btree_node *node; 1251 int ncblk; 1252 1253 if (level < nilfs_btree_height(btree) - 1) { 1254 node = nilfs_btree_get_nonroot_node(path, level); 1255 ncblk = nilfs_btree_nchildren_per_block(btree); 1256 nilfs_btree_node_delete(node, path[level].bp_index, 1257 keyp, ptrp, ncblk); 1258 if (!buffer_dirty(path[level].bp_bh)) 1259 mark_buffer_dirty(path[level].bp_bh); 1260 if (path[level].bp_index == 0) 1261 nilfs_btree_promote_key(btree, path, level + 1, 1262 nilfs_btree_node_get_key(node, 0)); 1263 } else { 1264 node = nilfs_btree_get_root(btree); 1265 nilfs_btree_node_delete(node, path[level].bp_index, 1266 keyp, ptrp, 1267 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1268 } 1269 } 1270 1271 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1272 struct nilfs_btree_path *path, 1273 int level, __u64 *keyp, __u64 *ptrp) 1274 { 1275 struct nilfs_btree_node *node, *left; 1276 int nchildren, lnchildren, n, ncblk; 1277 1278 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1279 1280 node = nilfs_btree_get_nonroot_node(path, level); 1281 left = nilfs_btree_get_sib_node(path, level); 1282 nchildren = nilfs_btree_node_get_nchildren(node); 1283 lnchildren = nilfs_btree_node_get_nchildren(left); 1284 ncblk = nilfs_btree_nchildren_per_block(btree); 1285 1286 n = (nchildren + lnchildren) / 2 - nchildren; 1287 1288 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1289 1290 if (!buffer_dirty(path[level].bp_bh)) 1291 mark_buffer_dirty(path[level].bp_bh); 1292 if (!buffer_dirty(path[level].bp_sib_bh)) 1293 mark_buffer_dirty(path[level].bp_sib_bh); 1294 1295 nilfs_btree_promote_key(btree, path, level + 1, 1296 nilfs_btree_node_get_key(node, 0)); 1297 1298 brelse(path[level].bp_sib_bh); 1299 path[level].bp_sib_bh = NULL; 1300 path[level].bp_index += n; 1301 } 1302 1303 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1304 struct nilfs_btree_path *path, 1305 int level, __u64 *keyp, __u64 *ptrp) 1306 { 1307 struct nilfs_btree_node *node, *right; 1308 int nchildren, rnchildren, n, ncblk; 1309 1310 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1311 1312 node = nilfs_btree_get_nonroot_node(path, level); 1313 right = nilfs_btree_get_sib_node(path, level); 1314 nchildren = nilfs_btree_node_get_nchildren(node); 1315 rnchildren = nilfs_btree_node_get_nchildren(right); 1316 ncblk = nilfs_btree_nchildren_per_block(btree); 1317 1318 n = (nchildren + rnchildren) / 2 - nchildren; 1319 1320 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1321 1322 if (!buffer_dirty(path[level].bp_bh)) 1323 mark_buffer_dirty(path[level].bp_bh); 1324 if (!buffer_dirty(path[level].bp_sib_bh)) 1325 mark_buffer_dirty(path[level].bp_sib_bh); 1326 1327 path[level + 1].bp_index++; 1328 nilfs_btree_promote_key(btree, path, level + 1, 1329 nilfs_btree_node_get_key(right, 0)); 1330 path[level + 1].bp_index--; 1331 1332 brelse(path[level].bp_sib_bh); 1333 path[level].bp_sib_bh = NULL; 1334 } 1335 1336 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1337 struct nilfs_btree_path *path, 1338 int level, __u64 *keyp, __u64 *ptrp) 1339 { 1340 struct nilfs_btree_node *node, *left; 1341 int n, ncblk; 1342 1343 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1344 1345 node = nilfs_btree_get_nonroot_node(path, level); 1346 left = nilfs_btree_get_sib_node(path, level); 1347 ncblk = nilfs_btree_nchildren_per_block(btree); 1348 1349 n = nilfs_btree_node_get_nchildren(node); 1350 1351 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1352 1353 if (!buffer_dirty(path[level].bp_sib_bh)) 1354 mark_buffer_dirty(path[level].bp_sib_bh); 1355 1356 nilfs_btnode_delete(path[level].bp_bh); 1357 path[level].bp_bh = path[level].bp_sib_bh; 1358 path[level].bp_sib_bh = NULL; 1359 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1360 } 1361 1362 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1363 struct nilfs_btree_path *path, 1364 int level, __u64 *keyp, __u64 *ptrp) 1365 { 1366 struct nilfs_btree_node *node, *right; 1367 int n, ncblk; 1368 1369 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1370 1371 node = nilfs_btree_get_nonroot_node(path, level); 1372 right = nilfs_btree_get_sib_node(path, level); 1373 ncblk = nilfs_btree_nchildren_per_block(btree); 1374 1375 n = nilfs_btree_node_get_nchildren(right); 1376 1377 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1378 1379 if (!buffer_dirty(path[level].bp_bh)) 1380 mark_buffer_dirty(path[level].bp_bh); 1381 1382 nilfs_btnode_delete(path[level].bp_sib_bh); 1383 path[level].bp_sib_bh = NULL; 1384 path[level + 1].bp_index++; 1385 } 1386 1387 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1388 struct nilfs_btree_path *path, 1389 int level, __u64 *keyp, __u64 *ptrp) 1390 { 1391 struct nilfs_btree_node *root, *child; 1392 int n, ncblk; 1393 1394 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1395 1396 root = nilfs_btree_get_root(btree); 1397 child = nilfs_btree_get_nonroot_node(path, level); 1398 ncblk = nilfs_btree_nchildren_per_block(btree); 1399 1400 nilfs_btree_node_delete(root, 0, NULL, NULL, 1401 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1402 nilfs_btree_node_set_level(root, level); 1403 n = nilfs_btree_node_get_nchildren(child); 1404 nilfs_btree_node_move_left(root, child, n, 1405 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1406 1407 nilfs_btnode_delete(path[level].bp_bh); 1408 path[level].bp_bh = NULL; 1409 } 1410 1411 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1412 struct nilfs_btree_path *path, 1413 int level, __u64 *keyp, __u64 *ptrp) 1414 { 1415 } 1416 1417 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1418 struct nilfs_btree_path *path, 1419 int *levelp, 1420 struct nilfs_bmap_stats *stats, 1421 struct inode *dat) 1422 { 1423 struct buffer_head *bh; 1424 struct nilfs_btree_node *node, *parent, *sib; 1425 __u64 sibptr; 1426 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1427 1428 ret = 0; 1429 stats->bs_nblocks = 0; 1430 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1431 ncblk = nilfs_btree_nchildren_per_block(btree); 1432 1433 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1434 level < nilfs_btree_height(btree) - 1; 1435 level++) { 1436 node = nilfs_btree_get_nonroot_node(path, level); 1437 path[level].bp_oldreq.bpr_ptr = 1438 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1439 ret = nilfs_bmap_prepare_end_ptr(btree, 1440 &path[level].bp_oldreq, dat); 1441 if (ret < 0) 1442 goto err_out_child_node; 1443 1444 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1445 path[level].bp_op = nilfs_btree_do_delete; 1446 stats->bs_nblocks++; 1447 goto out; 1448 } 1449 1450 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1451 pindex = path[level + 1].bp_index; 1452 dindex = pindex; 1453 1454 if (pindex > 0) { 1455 /* left sibling */ 1456 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1457 ncmax); 1458 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1459 if (ret < 0) 1460 goto err_out_curr_node; 1461 sib = (struct nilfs_btree_node *)bh->b_data; 1462 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1463 path[level].bp_sib_bh = bh; 1464 path[level].bp_op = nilfs_btree_borrow_left; 1465 stats->bs_nblocks++; 1466 goto out; 1467 } else { 1468 path[level].bp_sib_bh = bh; 1469 path[level].bp_op = nilfs_btree_concat_left; 1470 stats->bs_nblocks++; 1471 /* continue; */ 1472 } 1473 } else if (pindex < 1474 nilfs_btree_node_get_nchildren(parent) - 1) { 1475 /* right sibling */ 1476 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1477 ncmax); 1478 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1479 if (ret < 0) 1480 goto err_out_curr_node; 1481 sib = (struct nilfs_btree_node *)bh->b_data; 1482 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1483 path[level].bp_sib_bh = bh; 1484 path[level].bp_op = nilfs_btree_borrow_right; 1485 stats->bs_nblocks++; 1486 goto out; 1487 } else { 1488 path[level].bp_sib_bh = bh; 1489 path[level].bp_op = nilfs_btree_concat_right; 1490 stats->bs_nblocks++; 1491 /* 1492 * When merging right sibling node 1493 * into the current node, pointer to 1494 * the right sibling node must be 1495 * terminated instead. The adjustment 1496 * below is required for that. 1497 */ 1498 dindex = pindex + 1; 1499 /* continue; */ 1500 } 1501 } else { 1502 /* no siblings */ 1503 /* the only child of the root node */ 1504 WARN_ON(level != nilfs_btree_height(btree) - 2); 1505 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1506 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1507 path[level].bp_op = nilfs_btree_shrink; 1508 stats->bs_nblocks += 2; 1509 level++; 1510 path[level].bp_op = nilfs_btree_nop; 1511 goto shrink_root_child; 1512 } else { 1513 path[level].bp_op = nilfs_btree_do_delete; 1514 stats->bs_nblocks++; 1515 goto out; 1516 } 1517 } 1518 } 1519 1520 /* child of the root node is deleted */ 1521 path[level].bp_op = nilfs_btree_do_delete; 1522 stats->bs_nblocks++; 1523 1524 shrink_root_child: 1525 node = nilfs_btree_get_root(btree); 1526 path[level].bp_oldreq.bpr_ptr = 1527 nilfs_btree_node_get_ptr(node, dindex, 1528 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1529 1530 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1531 if (ret < 0) 1532 goto err_out_child_node; 1533 1534 /* success */ 1535 out: 1536 *levelp = level; 1537 return ret; 1538 1539 /* error */ 1540 err_out_curr_node: 1541 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1542 err_out_child_node: 1543 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1544 brelse(path[level].bp_sib_bh); 1545 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1546 } 1547 *levelp = level; 1548 stats->bs_nblocks = 0; 1549 return ret; 1550 } 1551 1552 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1553 struct nilfs_btree_path *path, 1554 int maxlevel, struct inode *dat) 1555 { 1556 int level; 1557 1558 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1559 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1560 path[level].bp_op(btree, path, level, NULL, NULL); 1561 } 1562 1563 if (!nilfs_bmap_dirty(btree)) 1564 nilfs_bmap_set_dirty(btree); 1565 } 1566 1567 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1568 1569 { 1570 struct nilfs_btree_path *path; 1571 struct nilfs_bmap_stats stats; 1572 struct inode *dat; 1573 int level, ret; 1574 1575 path = nilfs_btree_alloc_path(); 1576 if (path == NULL) 1577 return -ENOMEM; 1578 1579 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1580 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1581 if (ret < 0) 1582 goto out; 1583 1584 1585 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1586 1587 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1588 if (ret < 0) 1589 goto out; 1590 nilfs_btree_commit_delete(btree, path, level, dat); 1591 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1592 1593 out: 1594 nilfs_btree_free_path(path); 1595 return ret; 1596 } 1597 1598 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1599 __u64 *keyp) 1600 { 1601 struct nilfs_btree_path *path; 1602 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1603 int ret; 1604 1605 path = nilfs_btree_alloc_path(); 1606 if (!path) 1607 return -ENOMEM; 1608 1609 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1610 if (!ret) 1611 *keyp = start; 1612 else if (ret == -ENOENT) 1613 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1614 1615 nilfs_btree_free_path(path); 1616 return ret; 1617 } 1618 1619 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1620 { 1621 struct nilfs_btree_path *path; 1622 int ret; 1623 1624 path = nilfs_btree_alloc_path(); 1625 if (path == NULL) 1626 return -ENOMEM; 1627 1628 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1629 1630 nilfs_btree_free_path(path); 1631 1632 return ret; 1633 } 1634 1635 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1636 { 1637 struct buffer_head *bh; 1638 struct nilfs_btree_node *root, *node; 1639 __u64 maxkey, nextmaxkey; 1640 __u64 ptr; 1641 int nchildren, ret; 1642 1643 root = nilfs_btree_get_root(btree); 1644 switch (nilfs_btree_height(btree)) { 1645 case 2: 1646 bh = NULL; 1647 node = root; 1648 break; 1649 case 3: 1650 nchildren = nilfs_btree_node_get_nchildren(root); 1651 if (nchildren > 1) 1652 return 0; 1653 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1654 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1655 ret = nilfs_btree_get_block(btree, ptr, &bh); 1656 if (ret < 0) 1657 return ret; 1658 node = (struct nilfs_btree_node *)bh->b_data; 1659 break; 1660 default: 1661 return 0; 1662 } 1663 1664 nchildren = nilfs_btree_node_get_nchildren(node); 1665 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1666 nextmaxkey = (nchildren > 1) ? 1667 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1668 if (bh != NULL) 1669 brelse(bh); 1670 1671 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1672 } 1673 1674 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1675 __u64 *keys, __u64 *ptrs, int nitems) 1676 { 1677 struct buffer_head *bh; 1678 struct nilfs_btree_node *node, *root; 1679 __le64 *dkeys; 1680 __le64 *dptrs; 1681 __u64 ptr; 1682 int nchildren, ncmax, i, ret; 1683 1684 root = nilfs_btree_get_root(btree); 1685 switch (nilfs_btree_height(btree)) { 1686 case 2: 1687 bh = NULL; 1688 node = root; 1689 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1690 break; 1691 case 3: 1692 nchildren = nilfs_btree_node_get_nchildren(root); 1693 WARN_ON(nchildren > 1); 1694 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1695 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1696 ret = nilfs_btree_get_block(btree, ptr, &bh); 1697 if (ret < 0) 1698 return ret; 1699 node = (struct nilfs_btree_node *)bh->b_data; 1700 ncmax = nilfs_btree_nchildren_per_block(btree); 1701 break; 1702 default: 1703 node = NULL; 1704 return -EINVAL; 1705 } 1706 1707 nchildren = nilfs_btree_node_get_nchildren(node); 1708 if (nchildren < nitems) 1709 nitems = nchildren; 1710 dkeys = nilfs_btree_node_dkeys(node); 1711 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1712 for (i = 0; i < nitems; i++) { 1713 keys[i] = le64_to_cpu(dkeys[i]); 1714 ptrs[i] = le64_to_cpu(dptrs[i]); 1715 } 1716 1717 if (bh != NULL) 1718 brelse(bh); 1719 1720 return nitems; 1721 } 1722 1723 static int 1724 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1725 union nilfs_bmap_ptr_req *dreq, 1726 union nilfs_bmap_ptr_req *nreq, 1727 struct buffer_head **bhp, 1728 struct nilfs_bmap_stats *stats) 1729 { 1730 struct buffer_head *bh; 1731 struct inode *dat = NULL; 1732 int ret; 1733 1734 stats->bs_nblocks = 0; 1735 1736 /* for data */ 1737 /* cannot find near ptr */ 1738 if (NILFS_BMAP_USE_VBN(btree)) { 1739 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1740 dat = nilfs_bmap_get_dat(btree); 1741 } 1742 1743 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1744 if (ret < 0) 1745 return ret; 1746 1747 *bhp = NULL; 1748 stats->bs_nblocks++; 1749 if (nreq != NULL) { 1750 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1751 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1752 if (ret < 0) 1753 goto err_out_dreq; 1754 1755 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1756 if (ret < 0) 1757 goto err_out_nreq; 1758 1759 *bhp = bh; 1760 stats->bs_nblocks++; 1761 } 1762 1763 /* success */ 1764 return 0; 1765 1766 /* error */ 1767 err_out_nreq: 1768 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1769 err_out_dreq: 1770 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1771 stats->bs_nblocks = 0; 1772 return ret; 1773 1774 } 1775 1776 static void 1777 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1778 __u64 key, __u64 ptr, 1779 const __u64 *keys, const __u64 *ptrs, 1780 int n, 1781 union nilfs_bmap_ptr_req *dreq, 1782 union nilfs_bmap_ptr_req *nreq, 1783 struct buffer_head *bh) 1784 { 1785 struct nilfs_btree_node *node; 1786 struct inode *dat; 1787 __u64 tmpptr; 1788 int ncblk; 1789 1790 /* free resources */ 1791 if (btree->b_ops->bop_clear != NULL) 1792 btree->b_ops->bop_clear(btree); 1793 1794 /* ptr must be a pointer to a buffer head. */ 1795 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1796 1797 /* convert and insert */ 1798 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1799 __nilfs_btree_init(btree); 1800 if (nreq != NULL) { 1801 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1802 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1803 1804 /* create child node at level 1 */ 1805 node = (struct nilfs_btree_node *)bh->b_data; 1806 ncblk = nilfs_btree_nchildren_per_block(btree); 1807 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1808 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1809 if (!buffer_dirty(bh)) 1810 mark_buffer_dirty(bh); 1811 if (!nilfs_bmap_dirty(btree)) 1812 nilfs_bmap_set_dirty(btree); 1813 1814 brelse(bh); 1815 1816 /* create root node at level 2 */ 1817 node = nilfs_btree_get_root(btree); 1818 tmpptr = nreq->bpr_ptr; 1819 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1820 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1821 &keys[0], &tmpptr); 1822 } else { 1823 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1824 1825 /* create root node at level 1 */ 1826 node = nilfs_btree_get_root(btree); 1827 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1828 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1829 keys, ptrs); 1830 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1831 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1832 if (!nilfs_bmap_dirty(btree)) 1833 nilfs_bmap_set_dirty(btree); 1834 } 1835 1836 if (NILFS_BMAP_USE_VBN(btree)) 1837 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1838 } 1839 1840 /** 1841 * nilfs_btree_convert_and_insert - 1842 * @bmap: 1843 * @key: 1844 * @ptr: 1845 * @keys: 1846 * @ptrs: 1847 * @n: 1848 */ 1849 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1850 __u64 key, __u64 ptr, 1851 const __u64 *keys, const __u64 *ptrs, int n) 1852 { 1853 struct buffer_head *bh = NULL; 1854 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1855 struct nilfs_bmap_stats stats; 1856 int ret; 1857 1858 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1859 di = &dreq; 1860 ni = NULL; 1861 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1862 1 << btree->b_inode->i_blkbits)) { 1863 di = &dreq; 1864 ni = &nreq; 1865 } else { 1866 di = NULL; 1867 ni = NULL; 1868 BUG(); 1869 } 1870 1871 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1872 &stats); 1873 if (ret < 0) 1874 return ret; 1875 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1876 di, ni, bh); 1877 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1878 return 0; 1879 } 1880 1881 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1882 struct nilfs_btree_path *path, 1883 int level, 1884 struct buffer_head *bh) 1885 { 1886 while ((++level < nilfs_btree_height(btree) - 1) && 1887 !buffer_dirty(path[level].bp_bh)) 1888 mark_buffer_dirty(path[level].bp_bh); 1889 1890 return 0; 1891 } 1892 1893 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1894 struct nilfs_btree_path *path, 1895 int level, struct inode *dat) 1896 { 1897 struct nilfs_btree_node *parent; 1898 int ncmax, ret; 1899 1900 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1901 path[level].bp_oldreq.bpr_ptr = 1902 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1903 ncmax); 1904 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1905 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1906 &path[level].bp_newreq.bpr_req); 1907 if (ret < 0) 1908 return ret; 1909 1910 if (buffer_nilfs_node(path[level].bp_bh)) { 1911 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1912 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1913 path[level].bp_ctxt.bh = path[level].bp_bh; 1914 ret = nilfs_btnode_prepare_change_key( 1915 &NILFS_BMAP_I(btree)->i_btnode_cache, 1916 &path[level].bp_ctxt); 1917 if (ret < 0) { 1918 nilfs_dat_abort_update(dat, 1919 &path[level].bp_oldreq.bpr_req, 1920 &path[level].bp_newreq.bpr_req); 1921 return ret; 1922 } 1923 } 1924 1925 return 0; 1926 } 1927 1928 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1929 struct nilfs_btree_path *path, 1930 int level, struct inode *dat) 1931 { 1932 struct nilfs_btree_node *parent; 1933 int ncmax; 1934 1935 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1936 &path[level].bp_newreq.bpr_req, 1937 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1938 1939 if (buffer_nilfs_node(path[level].bp_bh)) { 1940 nilfs_btnode_commit_change_key( 1941 &NILFS_BMAP_I(btree)->i_btnode_cache, 1942 &path[level].bp_ctxt); 1943 path[level].bp_bh = path[level].bp_ctxt.bh; 1944 } 1945 set_buffer_nilfs_volatile(path[level].bp_bh); 1946 1947 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1948 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1949 path[level].bp_newreq.bpr_ptr, ncmax); 1950 } 1951 1952 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1953 struct nilfs_btree_path *path, 1954 int level, struct inode *dat) 1955 { 1956 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1957 &path[level].bp_newreq.bpr_req); 1958 if (buffer_nilfs_node(path[level].bp_bh)) 1959 nilfs_btnode_abort_change_key( 1960 &NILFS_BMAP_I(btree)->i_btnode_cache, 1961 &path[level].bp_ctxt); 1962 } 1963 1964 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1965 struct nilfs_btree_path *path, 1966 int minlevel, int *maxlevelp, 1967 struct inode *dat) 1968 { 1969 int level, ret; 1970 1971 level = minlevel; 1972 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1973 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1974 if (ret < 0) 1975 return ret; 1976 } 1977 while ((++level < nilfs_btree_height(btree) - 1) && 1978 !buffer_dirty(path[level].bp_bh)) { 1979 1980 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1981 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1982 if (ret < 0) 1983 goto out; 1984 } 1985 1986 /* success */ 1987 *maxlevelp = level - 1; 1988 return 0; 1989 1990 /* error */ 1991 out: 1992 while (--level > minlevel) 1993 nilfs_btree_abort_update_v(btree, path, level, dat); 1994 if (!buffer_nilfs_volatile(path[level].bp_bh)) 1995 nilfs_btree_abort_update_v(btree, path, level, dat); 1996 return ret; 1997 } 1998 1999 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2000 struct nilfs_btree_path *path, 2001 int minlevel, int maxlevel, 2002 struct buffer_head *bh, 2003 struct inode *dat) 2004 { 2005 int level; 2006 2007 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2008 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2009 2010 for (level = minlevel + 1; level <= maxlevel; level++) 2011 nilfs_btree_commit_update_v(btree, path, level, dat); 2012 } 2013 2014 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2015 struct nilfs_btree_path *path, 2016 int level, struct buffer_head *bh) 2017 { 2018 int maxlevel = 0, ret; 2019 struct nilfs_btree_node *parent; 2020 struct inode *dat = nilfs_bmap_get_dat(btree); 2021 __u64 ptr; 2022 int ncmax; 2023 2024 get_bh(bh); 2025 path[level].bp_bh = bh; 2026 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2027 dat); 2028 if (ret < 0) 2029 goto out; 2030 2031 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2032 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2033 ptr = nilfs_btree_node_get_ptr(parent, 2034 path[level + 1].bp_index, 2035 ncmax); 2036 ret = nilfs_dat_mark_dirty(dat, ptr); 2037 if (ret < 0) 2038 goto out; 2039 } 2040 2041 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2042 2043 out: 2044 brelse(path[level].bp_bh); 2045 path[level].bp_bh = NULL; 2046 return ret; 2047 } 2048 2049 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2050 struct buffer_head *bh) 2051 { 2052 struct nilfs_btree_path *path; 2053 struct nilfs_btree_node *node; 2054 __u64 key; 2055 int level, ret; 2056 2057 WARN_ON(!buffer_dirty(bh)); 2058 2059 path = nilfs_btree_alloc_path(); 2060 if (path == NULL) 2061 return -ENOMEM; 2062 2063 if (buffer_nilfs_node(bh)) { 2064 node = (struct nilfs_btree_node *)bh->b_data; 2065 key = nilfs_btree_node_get_key(node, 0); 2066 level = nilfs_btree_node_get_level(node); 2067 } else { 2068 key = nilfs_bmap_data_get_key(btree, bh); 2069 level = NILFS_BTREE_LEVEL_DATA; 2070 } 2071 2072 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2073 if (ret < 0) { 2074 if (unlikely(ret == -ENOENT)) 2075 printk(KERN_CRIT "%s: key = %llu, level == %d\n", 2076 __func__, (unsigned long long)key, level); 2077 goto out; 2078 } 2079 2080 ret = NILFS_BMAP_USE_VBN(btree) ? 2081 nilfs_btree_propagate_v(btree, path, level, bh) : 2082 nilfs_btree_propagate_p(btree, path, level, bh); 2083 2084 out: 2085 nilfs_btree_free_path(path); 2086 2087 return ret; 2088 } 2089 2090 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2091 struct buffer_head *bh) 2092 { 2093 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2094 } 2095 2096 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2097 struct list_head *lists, 2098 struct buffer_head *bh) 2099 { 2100 struct list_head *head; 2101 struct buffer_head *cbh; 2102 struct nilfs_btree_node *node, *cnode; 2103 __u64 key, ckey; 2104 int level; 2105 2106 get_bh(bh); 2107 node = (struct nilfs_btree_node *)bh->b_data; 2108 key = nilfs_btree_node_get_key(node, 0); 2109 level = nilfs_btree_node_get_level(node); 2110 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2111 level >= NILFS_BTREE_LEVEL_MAX) { 2112 dump_stack(); 2113 printk(KERN_WARNING 2114 "%s: invalid btree level: %d (key=%llu, ino=%lu, " 2115 "blocknr=%llu)\n", 2116 __func__, level, (unsigned long long)key, 2117 NILFS_BMAP_I(btree)->vfs_inode.i_ino, 2118 (unsigned long long)bh->b_blocknr); 2119 return; 2120 } 2121 2122 list_for_each(head, &lists[level]) { 2123 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2124 cnode = (struct nilfs_btree_node *)cbh->b_data; 2125 ckey = nilfs_btree_node_get_key(cnode, 0); 2126 if (key < ckey) 2127 break; 2128 } 2129 list_add_tail(&bh->b_assoc_buffers, head); 2130 } 2131 2132 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2133 struct list_head *listp) 2134 { 2135 struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; 2136 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2137 struct pagevec pvec; 2138 struct buffer_head *bh, *head; 2139 pgoff_t index = 0; 2140 int level, i; 2141 2142 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2143 level < NILFS_BTREE_LEVEL_MAX; 2144 level++) 2145 INIT_LIST_HEAD(&lists[level]); 2146 2147 pagevec_init(&pvec, 0); 2148 2149 while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY, 2150 PAGEVEC_SIZE)) { 2151 for (i = 0; i < pagevec_count(&pvec); i++) { 2152 bh = head = page_buffers(pvec.pages[i]); 2153 do { 2154 if (buffer_dirty(bh)) 2155 nilfs_btree_add_dirty_buffer(btree, 2156 lists, bh); 2157 } while ((bh = bh->b_this_page) != head); 2158 } 2159 pagevec_release(&pvec); 2160 cond_resched(); 2161 } 2162 2163 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2164 level < NILFS_BTREE_LEVEL_MAX; 2165 level++) 2166 list_splice_tail(&lists[level], listp); 2167 } 2168 2169 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2170 struct nilfs_btree_path *path, 2171 int level, 2172 struct buffer_head **bh, 2173 sector_t blocknr, 2174 union nilfs_binfo *binfo) 2175 { 2176 struct nilfs_btree_node *parent; 2177 __u64 key; 2178 __u64 ptr; 2179 int ncmax, ret; 2180 2181 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2182 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2183 ncmax); 2184 if (buffer_nilfs_node(*bh)) { 2185 path[level].bp_ctxt.oldkey = ptr; 2186 path[level].bp_ctxt.newkey = blocknr; 2187 path[level].bp_ctxt.bh = *bh; 2188 ret = nilfs_btnode_prepare_change_key( 2189 &NILFS_BMAP_I(btree)->i_btnode_cache, 2190 &path[level].bp_ctxt); 2191 if (ret < 0) 2192 return ret; 2193 nilfs_btnode_commit_change_key( 2194 &NILFS_BMAP_I(btree)->i_btnode_cache, 2195 &path[level].bp_ctxt); 2196 *bh = path[level].bp_ctxt.bh; 2197 } 2198 2199 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2200 ncmax); 2201 2202 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2203 /* on-disk format */ 2204 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2205 binfo->bi_dat.bi_level = level; 2206 2207 return 0; 2208 } 2209 2210 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2211 struct nilfs_btree_path *path, 2212 int level, 2213 struct buffer_head **bh, 2214 sector_t blocknr, 2215 union nilfs_binfo *binfo) 2216 { 2217 struct nilfs_btree_node *parent; 2218 struct inode *dat = nilfs_bmap_get_dat(btree); 2219 __u64 key; 2220 __u64 ptr; 2221 union nilfs_bmap_ptr_req req; 2222 int ncmax, ret; 2223 2224 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2225 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2226 ncmax); 2227 req.bpr_ptr = ptr; 2228 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2229 if (ret < 0) 2230 return ret; 2231 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2232 2233 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2234 /* on-disk format */ 2235 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2236 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2237 2238 return 0; 2239 } 2240 2241 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2242 struct buffer_head **bh, 2243 sector_t blocknr, 2244 union nilfs_binfo *binfo) 2245 { 2246 struct nilfs_btree_path *path; 2247 struct nilfs_btree_node *node; 2248 __u64 key; 2249 int level, ret; 2250 2251 path = nilfs_btree_alloc_path(); 2252 if (path == NULL) 2253 return -ENOMEM; 2254 2255 if (buffer_nilfs_node(*bh)) { 2256 node = (struct nilfs_btree_node *)(*bh)->b_data; 2257 key = nilfs_btree_node_get_key(node, 0); 2258 level = nilfs_btree_node_get_level(node); 2259 } else { 2260 key = nilfs_bmap_data_get_key(btree, *bh); 2261 level = NILFS_BTREE_LEVEL_DATA; 2262 } 2263 2264 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2265 if (ret < 0) { 2266 WARN_ON(ret == -ENOENT); 2267 goto out; 2268 } 2269 2270 ret = NILFS_BMAP_USE_VBN(btree) ? 2271 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2272 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2273 2274 out: 2275 nilfs_btree_free_path(path); 2276 2277 return ret; 2278 } 2279 2280 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2281 struct buffer_head **bh, 2282 sector_t blocknr, 2283 union nilfs_binfo *binfo) 2284 { 2285 struct nilfs_btree_node *node; 2286 __u64 key; 2287 int ret; 2288 2289 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2290 blocknr); 2291 if (ret < 0) 2292 return ret; 2293 2294 if (buffer_nilfs_node(*bh)) { 2295 node = (struct nilfs_btree_node *)(*bh)->b_data; 2296 key = nilfs_btree_node_get_key(node, 0); 2297 } else 2298 key = nilfs_bmap_data_get_key(btree, *bh); 2299 2300 /* on-disk format */ 2301 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2302 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2303 2304 return 0; 2305 } 2306 2307 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2308 { 2309 struct buffer_head *bh; 2310 struct nilfs_btree_path *path; 2311 __u64 ptr; 2312 int ret; 2313 2314 path = nilfs_btree_alloc_path(); 2315 if (path == NULL) 2316 return -ENOMEM; 2317 2318 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2319 if (ret < 0) { 2320 WARN_ON(ret == -ENOENT); 2321 goto out; 2322 } 2323 ret = nilfs_btree_get_block(btree, ptr, &bh); 2324 if (ret < 0) { 2325 WARN_ON(ret == -ENOENT); 2326 goto out; 2327 } 2328 2329 if (!buffer_dirty(bh)) 2330 mark_buffer_dirty(bh); 2331 brelse(bh); 2332 if (!nilfs_bmap_dirty(btree)) 2333 nilfs_bmap_set_dirty(btree); 2334 2335 out: 2336 nilfs_btree_free_path(path); 2337 return ret; 2338 } 2339 2340 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2341 .bop_lookup = nilfs_btree_lookup, 2342 .bop_lookup_contig = nilfs_btree_lookup_contig, 2343 .bop_insert = nilfs_btree_insert, 2344 .bop_delete = nilfs_btree_delete, 2345 .bop_clear = NULL, 2346 2347 .bop_propagate = nilfs_btree_propagate, 2348 2349 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2350 2351 .bop_assign = nilfs_btree_assign, 2352 .bop_mark = nilfs_btree_mark, 2353 2354 .bop_seek_key = nilfs_btree_seek_key, 2355 .bop_last_key = nilfs_btree_last_key, 2356 2357 .bop_check_insert = NULL, 2358 .bop_check_delete = nilfs_btree_check_delete, 2359 .bop_gather_data = nilfs_btree_gather_data, 2360 }; 2361 2362 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2363 .bop_lookup = NULL, 2364 .bop_lookup_contig = NULL, 2365 .bop_insert = NULL, 2366 .bop_delete = NULL, 2367 .bop_clear = NULL, 2368 2369 .bop_propagate = nilfs_btree_propagate_gc, 2370 2371 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2372 2373 .bop_assign = nilfs_btree_assign_gc, 2374 .bop_mark = NULL, 2375 2376 .bop_seek_key = NULL, 2377 .bop_last_key = NULL, 2378 2379 .bop_check_insert = NULL, 2380 .bop_check_delete = NULL, 2381 .bop_gather_data = NULL, 2382 }; 2383 2384 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2385 { 2386 bmap->b_ops = &nilfs_btree_ops; 2387 bmap->b_nchildren_per_block = 2388 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2389 } 2390 2391 int nilfs_btree_init(struct nilfs_bmap *bmap) 2392 { 2393 int ret = 0; 2394 2395 __nilfs_btree_init(bmap); 2396 2397 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), 2398 bmap->b_inode->i_ino)) 2399 ret = -EIO; 2400 return ret; 2401 } 2402 2403 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2404 { 2405 bmap->b_ops = &nilfs_btree_ops_gc; 2406 bmap->b_nchildren_per_block = 2407 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2408 } 2409