1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * btree.c - NILFS B-tree. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/string.h> 12 #include <linux/errno.h> 13 #include <linux/pagevec.h> 14 #include "nilfs.h" 15 #include "page.h" 16 #include "btnode.h" 17 #include "btree.h" 18 #include "alloc.h" 19 #include "dat.h" 20 21 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 22 23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 24 { 25 struct nilfs_btree_path *path; 26 int level = NILFS_BTREE_LEVEL_DATA; 27 28 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 29 if (path == NULL) 30 goto out; 31 32 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 33 path[level].bp_bh = NULL; 34 path[level].bp_sib_bh = NULL; 35 path[level].bp_index = 0; 36 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 37 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 38 path[level].bp_op = NULL; 39 } 40 41 out: 42 return path; 43 } 44 45 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 46 { 47 int level = NILFS_BTREE_LEVEL_DATA; 48 49 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 50 brelse(path[level].bp_bh); 51 52 kmem_cache_free(nilfs_btree_path_cache, path); 53 } 54 55 /* 56 * B-tree node operations 57 */ 58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 59 __u64 ptr, struct buffer_head **bhp) 60 { 61 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 62 struct buffer_head *bh; 63 64 bh = nilfs_btnode_create_block(btnc, ptr); 65 if (!bh) 66 return -ENOMEM; 67 68 set_buffer_nilfs_volatile(bh); 69 *bhp = bh; 70 return 0; 71 } 72 73 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 74 { 75 return node->bn_flags; 76 } 77 78 static void 79 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 80 { 81 node->bn_flags = flags; 82 } 83 84 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 85 { 86 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 87 } 88 89 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 90 { 91 return node->bn_level; 92 } 93 94 static void 95 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 96 { 97 node->bn_level = level; 98 } 99 100 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 101 { 102 return le16_to_cpu(node->bn_nchildren); 103 } 104 105 static void 106 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 107 { 108 node->bn_nchildren = cpu_to_le16(nchildren); 109 } 110 111 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 112 { 113 return i_blocksize(btree->b_inode); 114 } 115 116 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 117 { 118 return btree->b_nchildren_per_block; 119 } 120 121 static __le64 * 122 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 123 { 124 return (__le64 *)((char *)(node + 1) + 125 (nilfs_btree_node_root(node) ? 126 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 127 } 128 129 static __le64 * 130 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 131 { 132 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 133 } 134 135 static __u64 136 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 137 { 138 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 139 } 140 141 static void 142 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 143 { 144 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 145 } 146 147 static __u64 148 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 149 int ncmax) 150 { 151 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 152 } 153 154 static void 155 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 156 int ncmax) 157 { 158 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 159 } 160 161 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 162 int level, int nchildren, int ncmax, 163 const __u64 *keys, const __u64 *ptrs) 164 { 165 __le64 *dkeys; 166 __le64 *dptrs; 167 int i; 168 169 nilfs_btree_node_set_flags(node, flags); 170 nilfs_btree_node_set_level(node, level); 171 nilfs_btree_node_set_nchildren(node, nchildren); 172 173 dkeys = nilfs_btree_node_dkeys(node); 174 dptrs = nilfs_btree_node_dptrs(node, ncmax); 175 for (i = 0; i < nchildren; i++) { 176 dkeys[i] = cpu_to_le64(keys[i]); 177 dptrs[i] = cpu_to_le64(ptrs[i]); 178 } 179 } 180 181 /* Assume the buffer heads corresponding to left and right are locked. */ 182 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 183 struct nilfs_btree_node *right, 184 int n, int lncmax, int rncmax) 185 { 186 __le64 *ldkeys, *rdkeys; 187 __le64 *ldptrs, *rdptrs; 188 int lnchildren, rnchildren; 189 190 ldkeys = nilfs_btree_node_dkeys(left); 191 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 192 lnchildren = nilfs_btree_node_get_nchildren(left); 193 194 rdkeys = nilfs_btree_node_dkeys(right); 195 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 196 rnchildren = nilfs_btree_node_get_nchildren(right); 197 198 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 199 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 200 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 201 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 202 203 lnchildren += n; 204 rnchildren -= n; 205 nilfs_btree_node_set_nchildren(left, lnchildren); 206 nilfs_btree_node_set_nchildren(right, rnchildren); 207 } 208 209 /* Assume that the buffer heads corresponding to left and right are locked. */ 210 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 211 struct nilfs_btree_node *right, 212 int n, int lncmax, int rncmax) 213 { 214 __le64 *ldkeys, *rdkeys; 215 __le64 *ldptrs, *rdptrs; 216 int lnchildren, rnchildren; 217 218 ldkeys = nilfs_btree_node_dkeys(left); 219 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 220 lnchildren = nilfs_btree_node_get_nchildren(left); 221 222 rdkeys = nilfs_btree_node_dkeys(right); 223 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 224 rnchildren = nilfs_btree_node_get_nchildren(right); 225 226 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 227 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 228 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 229 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 230 231 lnchildren -= n; 232 rnchildren += n; 233 nilfs_btree_node_set_nchildren(left, lnchildren); 234 nilfs_btree_node_set_nchildren(right, rnchildren); 235 } 236 237 /* Assume that the buffer head corresponding to node is locked. */ 238 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 239 __u64 key, __u64 ptr, int ncmax) 240 { 241 __le64 *dkeys; 242 __le64 *dptrs; 243 int nchildren; 244 245 dkeys = nilfs_btree_node_dkeys(node); 246 dptrs = nilfs_btree_node_dptrs(node, ncmax); 247 nchildren = nilfs_btree_node_get_nchildren(node); 248 if (index < nchildren) { 249 memmove(dkeys + index + 1, dkeys + index, 250 (nchildren - index) * sizeof(*dkeys)); 251 memmove(dptrs + index + 1, dptrs + index, 252 (nchildren - index) * sizeof(*dptrs)); 253 } 254 dkeys[index] = cpu_to_le64(key); 255 dptrs[index] = cpu_to_le64(ptr); 256 nchildren++; 257 nilfs_btree_node_set_nchildren(node, nchildren); 258 } 259 260 /* Assume that the buffer head corresponding to node is locked. */ 261 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 262 __u64 *keyp, __u64 *ptrp, int ncmax) 263 { 264 __u64 key; 265 __u64 ptr; 266 __le64 *dkeys; 267 __le64 *dptrs; 268 int nchildren; 269 270 dkeys = nilfs_btree_node_dkeys(node); 271 dptrs = nilfs_btree_node_dptrs(node, ncmax); 272 key = le64_to_cpu(dkeys[index]); 273 ptr = le64_to_cpu(dptrs[index]); 274 nchildren = nilfs_btree_node_get_nchildren(node); 275 if (keyp != NULL) 276 *keyp = key; 277 if (ptrp != NULL) 278 *ptrp = ptr; 279 280 if (index < nchildren - 1) { 281 memmove(dkeys + index, dkeys + index + 1, 282 (nchildren - index - 1) * sizeof(*dkeys)); 283 memmove(dptrs + index, dptrs + index + 1, 284 (nchildren - index - 1) * sizeof(*dptrs)); 285 } 286 nchildren--; 287 nilfs_btree_node_set_nchildren(node, nchildren); 288 } 289 290 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 291 __u64 key, int *indexp) 292 { 293 __u64 nkey; 294 int index, low, high, s; 295 296 /* binary search */ 297 low = 0; 298 high = nilfs_btree_node_get_nchildren(node) - 1; 299 index = 0; 300 s = 0; 301 while (low <= high) { 302 index = (low + high) / 2; 303 nkey = nilfs_btree_node_get_key(node, index); 304 if (nkey == key) { 305 s = 0; 306 goto out; 307 } else if (nkey < key) { 308 low = index + 1; 309 s = -1; 310 } else { 311 high = index - 1; 312 s = 1; 313 } 314 } 315 316 /* adjust index */ 317 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 318 if (s > 0 && index > 0) 319 index--; 320 } else if (s < 0) 321 index++; 322 323 out: 324 *indexp = index; 325 326 return s == 0; 327 } 328 329 /** 330 * nilfs_btree_node_broken - verify consistency of btree node 331 * @node: btree node block to be examined 332 * @size: node size (in bytes) 333 * @inode: host inode of btree 334 * @blocknr: block number 335 * 336 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 337 */ 338 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 339 size_t size, struct inode *inode, 340 sector_t blocknr) 341 { 342 int level, flags, nchildren; 343 int ret = 0; 344 345 level = nilfs_btree_node_get_level(node); 346 flags = nilfs_btree_node_get_flags(node); 347 nchildren = nilfs_btree_node_get_nchildren(node); 348 349 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 350 level >= NILFS_BTREE_LEVEL_MAX || 351 (flags & NILFS_BTREE_NODE_ROOT) || 352 nchildren < 0 || 353 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 354 nilfs_crit(inode->i_sb, 355 "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", 356 inode->i_ino, (unsigned long long)blocknr, level, 357 flags, nchildren); 358 ret = 1; 359 } 360 return ret; 361 } 362 363 /** 364 * nilfs_btree_root_broken - verify consistency of btree root node 365 * @node: btree root node to be examined 366 * @inode: host inode of btree 367 * 368 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 369 */ 370 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 371 struct inode *inode) 372 { 373 int level, flags, nchildren; 374 int ret = 0; 375 376 level = nilfs_btree_node_get_level(node); 377 flags = nilfs_btree_node_get_flags(node); 378 nchildren = nilfs_btree_node_get_nchildren(node); 379 380 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 381 level >= NILFS_BTREE_LEVEL_MAX || 382 nchildren < 0 || 383 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { 384 nilfs_crit(inode->i_sb, 385 "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", 386 inode->i_ino, level, flags, nchildren); 387 ret = 1; 388 } 389 return ret; 390 } 391 392 int nilfs_btree_broken_node_block(struct buffer_head *bh) 393 { 394 struct inode *inode; 395 int ret; 396 397 if (buffer_nilfs_checked(bh)) 398 return 0; 399 400 inode = bh->b_page->mapping->host; 401 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 402 bh->b_size, inode, bh->b_blocknr); 403 if (likely(!ret)) 404 set_buffer_nilfs_checked(bh); 405 return ret; 406 } 407 408 static struct nilfs_btree_node * 409 nilfs_btree_get_root(const struct nilfs_bmap *btree) 410 { 411 return (struct nilfs_btree_node *)btree->b_u.u_data; 412 } 413 414 static struct nilfs_btree_node * 415 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 416 { 417 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 418 } 419 420 static struct nilfs_btree_node * 421 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 422 { 423 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 424 } 425 426 static int nilfs_btree_height(const struct nilfs_bmap *btree) 427 { 428 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 429 } 430 431 static struct nilfs_btree_node * 432 nilfs_btree_get_node(const struct nilfs_bmap *btree, 433 const struct nilfs_btree_path *path, 434 int level, int *ncmaxp) 435 { 436 struct nilfs_btree_node *node; 437 438 if (level == nilfs_btree_height(btree) - 1) { 439 node = nilfs_btree_get_root(btree); 440 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 441 } else { 442 node = nilfs_btree_get_nonroot_node(path, level); 443 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 444 } 445 return node; 446 } 447 448 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, 449 struct nilfs_btree_node *node, int level) 450 { 451 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 452 dump_stack(); 453 nilfs_crit(btree->b_inode->i_sb, 454 "btree level mismatch (ino=%lu): %d != %d", 455 btree->b_inode->i_ino, 456 nilfs_btree_node_get_level(node), level); 457 return 1; 458 } 459 return 0; 460 } 461 462 struct nilfs_btree_readahead_info { 463 struct nilfs_btree_node *node; /* parent node */ 464 int max_ra_blocks; /* max nof blocks to read ahead */ 465 int index; /* current index on the parent node */ 466 int ncmax; /* nof children in the parent node */ 467 }; 468 469 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 470 struct buffer_head **bhp, 471 const struct nilfs_btree_readahead_info *ra) 472 { 473 struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; 474 struct buffer_head *bh, *ra_bh; 475 sector_t submit_ptr = 0; 476 int ret; 477 478 ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, 479 &submit_ptr); 480 if (ret) { 481 if (ret != -EEXIST) 482 return ret; 483 goto out_check; 484 } 485 486 if (ra) { 487 int i, n; 488 __u64 ptr2; 489 490 /* read ahead sibling nodes */ 491 for (n = ra->max_ra_blocks, i = ra->index + 1; 492 n > 0 && i < ra->ncmax; n--, i++) { 493 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 494 495 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, 496 REQ_OP_READ, REQ_RAHEAD, 497 &ra_bh, &submit_ptr); 498 if (likely(!ret || ret == -EEXIST)) 499 brelse(ra_bh); 500 else if (ret != -EBUSY) 501 break; 502 if (!buffer_locked(bh)) 503 goto out_no_wait; 504 } 505 } 506 507 wait_on_buffer(bh); 508 509 out_no_wait: 510 if (!buffer_uptodate(bh)) { 511 nilfs_err(btree->b_inode->i_sb, 512 "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", 513 btree->b_inode->i_ino, (unsigned long long)ptr); 514 brelse(bh); 515 return -EIO; 516 } 517 518 out_check: 519 if (nilfs_btree_broken_node_block(bh)) { 520 clear_buffer_uptodate(bh); 521 brelse(bh); 522 return -EINVAL; 523 } 524 525 *bhp = bh; 526 return 0; 527 } 528 529 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 530 struct buffer_head **bhp) 531 { 532 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 533 } 534 535 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 536 struct nilfs_btree_path *path, 537 __u64 key, __u64 *ptrp, int minlevel, 538 int readahead) 539 { 540 struct nilfs_btree_node *node; 541 struct nilfs_btree_readahead_info p, *ra; 542 __u64 ptr; 543 int level, index, found, ncmax, ret; 544 545 node = nilfs_btree_get_root(btree); 546 level = nilfs_btree_node_get_level(node); 547 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 548 return -ENOENT; 549 550 found = nilfs_btree_node_lookup(node, key, &index); 551 ptr = nilfs_btree_node_get_ptr(node, index, 552 NILFS_BTREE_ROOT_NCHILDREN_MAX); 553 path[level].bp_bh = NULL; 554 path[level].bp_index = index; 555 556 ncmax = nilfs_btree_nchildren_per_block(btree); 557 558 while (--level >= minlevel) { 559 ra = NULL; 560 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 561 p.node = nilfs_btree_get_node(btree, path, level + 1, 562 &p.ncmax); 563 p.index = index; 564 p.max_ra_blocks = 7; 565 ra = &p; 566 } 567 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 568 ra); 569 if (ret < 0) 570 return ret; 571 572 node = nilfs_btree_get_nonroot_node(path, level); 573 if (nilfs_btree_bad_node(btree, node, level)) 574 return -EINVAL; 575 if (!found) 576 found = nilfs_btree_node_lookup(node, key, &index); 577 else 578 index = 0; 579 if (index < ncmax) { 580 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 581 } else { 582 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 583 /* insert */ 584 ptr = NILFS_BMAP_INVALID_PTR; 585 } 586 path[level].bp_index = index; 587 } 588 if (!found) 589 return -ENOENT; 590 591 if (ptrp != NULL) 592 *ptrp = ptr; 593 594 return 0; 595 } 596 597 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 598 struct nilfs_btree_path *path, 599 __u64 *keyp, __u64 *ptrp) 600 { 601 struct nilfs_btree_node *node; 602 __u64 ptr; 603 int index, level, ncmax, ret; 604 605 node = nilfs_btree_get_root(btree); 606 index = nilfs_btree_node_get_nchildren(node) - 1; 607 if (index < 0) 608 return -ENOENT; 609 level = nilfs_btree_node_get_level(node); 610 ptr = nilfs_btree_node_get_ptr(node, index, 611 NILFS_BTREE_ROOT_NCHILDREN_MAX); 612 path[level].bp_bh = NULL; 613 path[level].bp_index = index; 614 ncmax = nilfs_btree_nchildren_per_block(btree); 615 616 for (level--; level > 0; level--) { 617 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 618 if (ret < 0) 619 return ret; 620 node = nilfs_btree_get_nonroot_node(path, level); 621 if (nilfs_btree_bad_node(btree, node, level)) 622 return -EINVAL; 623 index = nilfs_btree_node_get_nchildren(node) - 1; 624 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 625 path[level].bp_index = index; 626 } 627 628 if (keyp != NULL) 629 *keyp = nilfs_btree_node_get_key(node, index); 630 if (ptrp != NULL) 631 *ptrp = ptr; 632 633 return 0; 634 } 635 636 /** 637 * nilfs_btree_get_next_key - get next valid key from btree path array 638 * @btree: bmap struct of btree 639 * @path: array of nilfs_btree_path struct 640 * @minlevel: start level 641 * @nextkey: place to store the next valid key 642 * 643 * Return Value: If a next key was found, 0 is returned. Otherwise, 644 * -ENOENT is returned. 645 */ 646 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 647 const struct nilfs_btree_path *path, 648 int minlevel, __u64 *nextkey) 649 { 650 struct nilfs_btree_node *node; 651 int maxlevel = nilfs_btree_height(btree) - 1; 652 int index, next_adj, level; 653 654 /* Next index is already set to bp_index for leaf nodes. */ 655 next_adj = 0; 656 for (level = minlevel; level <= maxlevel; level++) { 657 if (level == maxlevel) 658 node = nilfs_btree_get_root(btree); 659 else 660 node = nilfs_btree_get_nonroot_node(path, level); 661 662 index = path[level].bp_index + next_adj; 663 if (index < nilfs_btree_node_get_nchildren(node)) { 664 /* Next key is in this node */ 665 *nextkey = nilfs_btree_node_get_key(node, index); 666 return 0; 667 } 668 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 669 next_adj = 1; 670 } 671 return -ENOENT; 672 } 673 674 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 675 __u64 key, int level, __u64 *ptrp) 676 { 677 struct nilfs_btree_path *path; 678 int ret; 679 680 path = nilfs_btree_alloc_path(); 681 if (path == NULL) 682 return -ENOMEM; 683 684 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 685 686 nilfs_btree_free_path(path); 687 688 return ret; 689 } 690 691 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 692 __u64 key, __u64 *ptrp, 693 unsigned int maxblocks) 694 { 695 struct nilfs_btree_path *path; 696 struct nilfs_btree_node *node; 697 struct inode *dat = NULL; 698 __u64 ptr, ptr2; 699 sector_t blocknr; 700 int level = NILFS_BTREE_LEVEL_NODE_MIN; 701 int ret, cnt, index, maxlevel, ncmax; 702 struct nilfs_btree_readahead_info p; 703 704 path = nilfs_btree_alloc_path(); 705 if (path == NULL) 706 return -ENOMEM; 707 708 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 709 if (ret < 0) 710 goto out; 711 712 if (NILFS_BMAP_USE_VBN(btree)) { 713 dat = nilfs_bmap_get_dat(btree); 714 ret = nilfs_dat_translate(dat, ptr, &blocknr); 715 if (ret < 0) 716 goto out; 717 ptr = blocknr; 718 } 719 cnt = 1; 720 if (cnt == maxblocks) 721 goto end; 722 723 maxlevel = nilfs_btree_height(btree) - 1; 724 node = nilfs_btree_get_node(btree, path, level, &ncmax); 725 index = path[level].bp_index + 1; 726 for (;;) { 727 while (index < nilfs_btree_node_get_nchildren(node)) { 728 if (nilfs_btree_node_get_key(node, index) != 729 key + cnt) 730 goto end; 731 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 732 if (dat) { 733 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 734 if (ret < 0) 735 goto out; 736 ptr2 = blocknr; 737 } 738 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 739 goto end; 740 index++; 741 continue; 742 } 743 if (level == maxlevel) 744 break; 745 746 /* look-up right sibling node */ 747 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 748 p.index = path[level + 1].bp_index + 1; 749 p.max_ra_blocks = 7; 750 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 751 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 752 break; 753 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 754 path[level + 1].bp_index = p.index; 755 756 brelse(path[level].bp_bh); 757 path[level].bp_bh = NULL; 758 759 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 760 &p); 761 if (ret < 0) 762 goto out; 763 node = nilfs_btree_get_nonroot_node(path, level); 764 ncmax = nilfs_btree_nchildren_per_block(btree); 765 index = 0; 766 path[level].bp_index = index; 767 } 768 end: 769 *ptrp = ptr; 770 ret = cnt; 771 out: 772 nilfs_btree_free_path(path); 773 return ret; 774 } 775 776 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 777 struct nilfs_btree_path *path, 778 int level, __u64 key) 779 { 780 if (level < nilfs_btree_height(btree) - 1) { 781 do { 782 nilfs_btree_node_set_key( 783 nilfs_btree_get_nonroot_node(path, level), 784 path[level].bp_index, key); 785 if (!buffer_dirty(path[level].bp_bh)) 786 mark_buffer_dirty(path[level].bp_bh); 787 } while ((path[level].bp_index == 0) && 788 (++level < nilfs_btree_height(btree) - 1)); 789 } 790 791 /* root */ 792 if (level == nilfs_btree_height(btree) - 1) { 793 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 794 path[level].bp_index, key); 795 } 796 } 797 798 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 799 struct nilfs_btree_path *path, 800 int level, __u64 *keyp, __u64 *ptrp) 801 { 802 struct nilfs_btree_node *node; 803 int ncblk; 804 805 if (level < nilfs_btree_height(btree) - 1) { 806 node = nilfs_btree_get_nonroot_node(path, level); 807 ncblk = nilfs_btree_nchildren_per_block(btree); 808 nilfs_btree_node_insert(node, path[level].bp_index, 809 *keyp, *ptrp, ncblk); 810 if (!buffer_dirty(path[level].bp_bh)) 811 mark_buffer_dirty(path[level].bp_bh); 812 813 if (path[level].bp_index == 0) 814 nilfs_btree_promote_key(btree, path, level + 1, 815 nilfs_btree_node_get_key(node, 816 0)); 817 } else { 818 node = nilfs_btree_get_root(btree); 819 nilfs_btree_node_insert(node, path[level].bp_index, 820 *keyp, *ptrp, 821 NILFS_BTREE_ROOT_NCHILDREN_MAX); 822 } 823 } 824 825 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 826 struct nilfs_btree_path *path, 827 int level, __u64 *keyp, __u64 *ptrp) 828 { 829 struct nilfs_btree_node *node, *left; 830 int nchildren, lnchildren, n, move, ncblk; 831 832 node = nilfs_btree_get_nonroot_node(path, level); 833 left = nilfs_btree_get_sib_node(path, level); 834 nchildren = nilfs_btree_node_get_nchildren(node); 835 lnchildren = nilfs_btree_node_get_nchildren(left); 836 ncblk = nilfs_btree_nchildren_per_block(btree); 837 move = 0; 838 839 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 840 if (n > path[level].bp_index) { 841 /* move insert point */ 842 n--; 843 move = 1; 844 } 845 846 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 847 848 if (!buffer_dirty(path[level].bp_bh)) 849 mark_buffer_dirty(path[level].bp_bh); 850 if (!buffer_dirty(path[level].bp_sib_bh)) 851 mark_buffer_dirty(path[level].bp_sib_bh); 852 853 nilfs_btree_promote_key(btree, path, level + 1, 854 nilfs_btree_node_get_key(node, 0)); 855 856 if (move) { 857 brelse(path[level].bp_bh); 858 path[level].bp_bh = path[level].bp_sib_bh; 859 path[level].bp_sib_bh = NULL; 860 path[level].bp_index += lnchildren; 861 path[level + 1].bp_index--; 862 } else { 863 brelse(path[level].bp_sib_bh); 864 path[level].bp_sib_bh = NULL; 865 path[level].bp_index -= n; 866 } 867 868 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 869 } 870 871 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 872 struct nilfs_btree_path *path, 873 int level, __u64 *keyp, __u64 *ptrp) 874 { 875 struct nilfs_btree_node *node, *right; 876 int nchildren, rnchildren, n, move, ncblk; 877 878 node = nilfs_btree_get_nonroot_node(path, level); 879 right = nilfs_btree_get_sib_node(path, level); 880 nchildren = nilfs_btree_node_get_nchildren(node); 881 rnchildren = nilfs_btree_node_get_nchildren(right); 882 ncblk = nilfs_btree_nchildren_per_block(btree); 883 move = 0; 884 885 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 886 if (n > nchildren - path[level].bp_index) { 887 /* move insert point */ 888 n--; 889 move = 1; 890 } 891 892 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 893 894 if (!buffer_dirty(path[level].bp_bh)) 895 mark_buffer_dirty(path[level].bp_bh); 896 if (!buffer_dirty(path[level].bp_sib_bh)) 897 mark_buffer_dirty(path[level].bp_sib_bh); 898 899 path[level + 1].bp_index++; 900 nilfs_btree_promote_key(btree, path, level + 1, 901 nilfs_btree_node_get_key(right, 0)); 902 path[level + 1].bp_index--; 903 904 if (move) { 905 brelse(path[level].bp_bh); 906 path[level].bp_bh = path[level].bp_sib_bh; 907 path[level].bp_sib_bh = NULL; 908 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 909 path[level + 1].bp_index++; 910 } else { 911 brelse(path[level].bp_sib_bh); 912 path[level].bp_sib_bh = NULL; 913 } 914 915 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 916 } 917 918 static void nilfs_btree_split(struct nilfs_bmap *btree, 919 struct nilfs_btree_path *path, 920 int level, __u64 *keyp, __u64 *ptrp) 921 { 922 struct nilfs_btree_node *node, *right; 923 int nchildren, n, move, ncblk; 924 925 node = nilfs_btree_get_nonroot_node(path, level); 926 right = nilfs_btree_get_sib_node(path, level); 927 nchildren = nilfs_btree_node_get_nchildren(node); 928 ncblk = nilfs_btree_nchildren_per_block(btree); 929 move = 0; 930 931 n = (nchildren + 1) / 2; 932 if (n > nchildren - path[level].bp_index) { 933 n--; 934 move = 1; 935 } 936 937 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 938 939 if (!buffer_dirty(path[level].bp_bh)) 940 mark_buffer_dirty(path[level].bp_bh); 941 if (!buffer_dirty(path[level].bp_sib_bh)) 942 mark_buffer_dirty(path[level].bp_sib_bh); 943 944 if (move) { 945 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 946 nilfs_btree_node_insert(right, path[level].bp_index, 947 *keyp, *ptrp, ncblk); 948 949 *keyp = nilfs_btree_node_get_key(right, 0); 950 *ptrp = path[level].bp_newreq.bpr_ptr; 951 952 brelse(path[level].bp_bh); 953 path[level].bp_bh = path[level].bp_sib_bh; 954 path[level].bp_sib_bh = NULL; 955 } else { 956 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 957 958 *keyp = nilfs_btree_node_get_key(right, 0); 959 *ptrp = path[level].bp_newreq.bpr_ptr; 960 961 brelse(path[level].bp_sib_bh); 962 path[level].bp_sib_bh = NULL; 963 } 964 965 path[level + 1].bp_index++; 966 } 967 968 static void nilfs_btree_grow(struct nilfs_bmap *btree, 969 struct nilfs_btree_path *path, 970 int level, __u64 *keyp, __u64 *ptrp) 971 { 972 struct nilfs_btree_node *root, *child; 973 int n, ncblk; 974 975 root = nilfs_btree_get_root(btree); 976 child = nilfs_btree_get_sib_node(path, level); 977 ncblk = nilfs_btree_nchildren_per_block(btree); 978 979 n = nilfs_btree_node_get_nchildren(root); 980 981 nilfs_btree_node_move_right(root, child, n, 982 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 983 nilfs_btree_node_set_level(root, level + 1); 984 985 if (!buffer_dirty(path[level].bp_sib_bh)) 986 mark_buffer_dirty(path[level].bp_sib_bh); 987 988 path[level].bp_bh = path[level].bp_sib_bh; 989 path[level].bp_sib_bh = NULL; 990 991 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 992 993 *keyp = nilfs_btree_node_get_key(child, 0); 994 *ptrp = path[level].bp_newreq.bpr_ptr; 995 } 996 997 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 998 const struct nilfs_btree_path *path) 999 { 1000 struct nilfs_btree_node *node; 1001 int level, ncmax; 1002 1003 if (path == NULL) 1004 return NILFS_BMAP_INVALID_PTR; 1005 1006 /* left sibling */ 1007 level = NILFS_BTREE_LEVEL_NODE_MIN; 1008 if (path[level].bp_index > 0) { 1009 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1010 return nilfs_btree_node_get_ptr(node, 1011 path[level].bp_index - 1, 1012 ncmax); 1013 } 1014 1015 /* parent */ 1016 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1017 if (level <= nilfs_btree_height(btree) - 1) { 1018 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1019 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1020 ncmax); 1021 } 1022 1023 return NILFS_BMAP_INVALID_PTR; 1024 } 1025 1026 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1027 const struct nilfs_btree_path *path, 1028 __u64 key) 1029 { 1030 __u64 ptr; 1031 1032 ptr = nilfs_bmap_find_target_seq(btree, key); 1033 if (ptr != NILFS_BMAP_INVALID_PTR) 1034 /* sequential access */ 1035 return ptr; 1036 1037 ptr = nilfs_btree_find_near(btree, path); 1038 if (ptr != NILFS_BMAP_INVALID_PTR) 1039 /* near */ 1040 return ptr; 1041 1042 /* block group */ 1043 return nilfs_bmap_find_target_in_group(btree); 1044 } 1045 1046 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1047 struct nilfs_btree_path *path, 1048 int *levelp, __u64 key, __u64 ptr, 1049 struct nilfs_bmap_stats *stats) 1050 { 1051 struct buffer_head *bh; 1052 struct nilfs_btree_node *node, *parent, *sib; 1053 __u64 sibptr; 1054 int pindex, level, ncmax, ncblk, ret; 1055 struct inode *dat = NULL; 1056 1057 stats->bs_nblocks = 0; 1058 level = NILFS_BTREE_LEVEL_DATA; 1059 1060 /* allocate a new ptr for data block */ 1061 if (NILFS_BMAP_USE_VBN(btree)) { 1062 path[level].bp_newreq.bpr_ptr = 1063 nilfs_btree_find_target_v(btree, path, key); 1064 dat = nilfs_bmap_get_dat(btree); 1065 } 1066 1067 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1068 if (ret < 0) 1069 goto err_out_data; 1070 1071 ncblk = nilfs_btree_nchildren_per_block(btree); 1072 1073 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1074 level < nilfs_btree_height(btree) - 1; 1075 level++) { 1076 node = nilfs_btree_get_nonroot_node(path, level); 1077 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1078 path[level].bp_op = nilfs_btree_do_insert; 1079 stats->bs_nblocks++; 1080 goto out; 1081 } 1082 1083 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1084 pindex = path[level + 1].bp_index; 1085 1086 /* left sibling */ 1087 if (pindex > 0) { 1088 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1089 ncmax); 1090 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1091 if (ret < 0) 1092 goto err_out_child_node; 1093 sib = (struct nilfs_btree_node *)bh->b_data; 1094 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1095 path[level].bp_sib_bh = bh; 1096 path[level].bp_op = nilfs_btree_carry_left; 1097 stats->bs_nblocks++; 1098 goto out; 1099 } else { 1100 brelse(bh); 1101 } 1102 } 1103 1104 /* right sibling */ 1105 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1106 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1107 ncmax); 1108 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1109 if (ret < 0) 1110 goto err_out_child_node; 1111 sib = (struct nilfs_btree_node *)bh->b_data; 1112 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1113 path[level].bp_sib_bh = bh; 1114 path[level].bp_op = nilfs_btree_carry_right; 1115 stats->bs_nblocks++; 1116 goto out; 1117 } else { 1118 brelse(bh); 1119 } 1120 } 1121 1122 /* split */ 1123 path[level].bp_newreq.bpr_ptr = 1124 path[level - 1].bp_newreq.bpr_ptr + 1; 1125 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1126 &path[level].bp_newreq, dat); 1127 if (ret < 0) 1128 goto err_out_child_node; 1129 ret = nilfs_btree_get_new_block(btree, 1130 path[level].bp_newreq.bpr_ptr, 1131 &bh); 1132 if (ret < 0) 1133 goto err_out_curr_node; 1134 1135 stats->bs_nblocks++; 1136 1137 sib = (struct nilfs_btree_node *)bh->b_data; 1138 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1139 path[level].bp_sib_bh = bh; 1140 path[level].bp_op = nilfs_btree_split; 1141 } 1142 1143 /* root */ 1144 node = nilfs_btree_get_root(btree); 1145 if (nilfs_btree_node_get_nchildren(node) < 1146 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1147 path[level].bp_op = nilfs_btree_do_insert; 1148 stats->bs_nblocks++; 1149 goto out; 1150 } 1151 1152 /* grow */ 1153 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1154 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1155 if (ret < 0) 1156 goto err_out_child_node; 1157 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1158 &bh); 1159 if (ret < 0) 1160 goto err_out_curr_node; 1161 1162 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1163 0, level, 0, ncblk, NULL, NULL); 1164 path[level].bp_sib_bh = bh; 1165 path[level].bp_op = nilfs_btree_grow; 1166 1167 level++; 1168 path[level].bp_op = nilfs_btree_do_insert; 1169 1170 /* a newly-created node block and a data block are added */ 1171 stats->bs_nblocks += 2; 1172 1173 /* success */ 1174 out: 1175 *levelp = level; 1176 return ret; 1177 1178 /* error */ 1179 err_out_curr_node: 1180 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1181 err_out_child_node: 1182 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1183 nilfs_btnode_delete(path[level].bp_sib_bh); 1184 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1185 1186 } 1187 1188 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1189 err_out_data: 1190 *levelp = level; 1191 stats->bs_nblocks = 0; 1192 return ret; 1193 } 1194 1195 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1196 struct nilfs_btree_path *path, 1197 int maxlevel, __u64 key, __u64 ptr) 1198 { 1199 struct inode *dat = NULL; 1200 int level; 1201 1202 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1203 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1204 if (NILFS_BMAP_USE_VBN(btree)) { 1205 nilfs_bmap_set_target_v(btree, key, ptr); 1206 dat = nilfs_bmap_get_dat(btree); 1207 } 1208 1209 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1210 nilfs_bmap_commit_alloc_ptr(btree, 1211 &path[level - 1].bp_newreq, dat); 1212 path[level].bp_op(btree, path, level, &key, &ptr); 1213 } 1214 1215 if (!nilfs_bmap_dirty(btree)) 1216 nilfs_bmap_set_dirty(btree); 1217 } 1218 1219 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1220 { 1221 struct nilfs_btree_path *path; 1222 struct nilfs_bmap_stats stats; 1223 int level, ret; 1224 1225 path = nilfs_btree_alloc_path(); 1226 if (path == NULL) 1227 return -ENOMEM; 1228 1229 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1230 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1231 if (ret != -ENOENT) { 1232 if (ret == 0) 1233 ret = -EEXIST; 1234 goto out; 1235 } 1236 1237 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1238 if (ret < 0) 1239 goto out; 1240 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1241 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1242 1243 out: 1244 nilfs_btree_free_path(path); 1245 return ret; 1246 } 1247 1248 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1249 struct nilfs_btree_path *path, 1250 int level, __u64 *keyp, __u64 *ptrp) 1251 { 1252 struct nilfs_btree_node *node; 1253 int ncblk; 1254 1255 if (level < nilfs_btree_height(btree) - 1) { 1256 node = nilfs_btree_get_nonroot_node(path, level); 1257 ncblk = nilfs_btree_nchildren_per_block(btree); 1258 nilfs_btree_node_delete(node, path[level].bp_index, 1259 keyp, ptrp, ncblk); 1260 if (!buffer_dirty(path[level].bp_bh)) 1261 mark_buffer_dirty(path[level].bp_bh); 1262 if (path[level].bp_index == 0) 1263 nilfs_btree_promote_key(btree, path, level + 1, 1264 nilfs_btree_node_get_key(node, 0)); 1265 } else { 1266 node = nilfs_btree_get_root(btree); 1267 nilfs_btree_node_delete(node, path[level].bp_index, 1268 keyp, ptrp, 1269 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1270 } 1271 } 1272 1273 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1274 struct nilfs_btree_path *path, 1275 int level, __u64 *keyp, __u64 *ptrp) 1276 { 1277 struct nilfs_btree_node *node, *left; 1278 int nchildren, lnchildren, n, ncblk; 1279 1280 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1281 1282 node = nilfs_btree_get_nonroot_node(path, level); 1283 left = nilfs_btree_get_sib_node(path, level); 1284 nchildren = nilfs_btree_node_get_nchildren(node); 1285 lnchildren = nilfs_btree_node_get_nchildren(left); 1286 ncblk = nilfs_btree_nchildren_per_block(btree); 1287 1288 n = (nchildren + lnchildren) / 2 - nchildren; 1289 1290 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1291 1292 if (!buffer_dirty(path[level].bp_bh)) 1293 mark_buffer_dirty(path[level].bp_bh); 1294 if (!buffer_dirty(path[level].bp_sib_bh)) 1295 mark_buffer_dirty(path[level].bp_sib_bh); 1296 1297 nilfs_btree_promote_key(btree, path, level + 1, 1298 nilfs_btree_node_get_key(node, 0)); 1299 1300 brelse(path[level].bp_sib_bh); 1301 path[level].bp_sib_bh = NULL; 1302 path[level].bp_index += n; 1303 } 1304 1305 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1306 struct nilfs_btree_path *path, 1307 int level, __u64 *keyp, __u64 *ptrp) 1308 { 1309 struct nilfs_btree_node *node, *right; 1310 int nchildren, rnchildren, n, ncblk; 1311 1312 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1313 1314 node = nilfs_btree_get_nonroot_node(path, level); 1315 right = nilfs_btree_get_sib_node(path, level); 1316 nchildren = nilfs_btree_node_get_nchildren(node); 1317 rnchildren = nilfs_btree_node_get_nchildren(right); 1318 ncblk = nilfs_btree_nchildren_per_block(btree); 1319 1320 n = (nchildren + rnchildren) / 2 - nchildren; 1321 1322 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1323 1324 if (!buffer_dirty(path[level].bp_bh)) 1325 mark_buffer_dirty(path[level].bp_bh); 1326 if (!buffer_dirty(path[level].bp_sib_bh)) 1327 mark_buffer_dirty(path[level].bp_sib_bh); 1328 1329 path[level + 1].bp_index++; 1330 nilfs_btree_promote_key(btree, path, level + 1, 1331 nilfs_btree_node_get_key(right, 0)); 1332 path[level + 1].bp_index--; 1333 1334 brelse(path[level].bp_sib_bh); 1335 path[level].bp_sib_bh = NULL; 1336 } 1337 1338 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1339 struct nilfs_btree_path *path, 1340 int level, __u64 *keyp, __u64 *ptrp) 1341 { 1342 struct nilfs_btree_node *node, *left; 1343 int n, ncblk; 1344 1345 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1346 1347 node = nilfs_btree_get_nonroot_node(path, level); 1348 left = nilfs_btree_get_sib_node(path, level); 1349 ncblk = nilfs_btree_nchildren_per_block(btree); 1350 1351 n = nilfs_btree_node_get_nchildren(node); 1352 1353 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1354 1355 if (!buffer_dirty(path[level].bp_sib_bh)) 1356 mark_buffer_dirty(path[level].bp_sib_bh); 1357 1358 nilfs_btnode_delete(path[level].bp_bh); 1359 path[level].bp_bh = path[level].bp_sib_bh; 1360 path[level].bp_sib_bh = NULL; 1361 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1362 } 1363 1364 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1365 struct nilfs_btree_path *path, 1366 int level, __u64 *keyp, __u64 *ptrp) 1367 { 1368 struct nilfs_btree_node *node, *right; 1369 int n, ncblk; 1370 1371 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1372 1373 node = nilfs_btree_get_nonroot_node(path, level); 1374 right = nilfs_btree_get_sib_node(path, level); 1375 ncblk = nilfs_btree_nchildren_per_block(btree); 1376 1377 n = nilfs_btree_node_get_nchildren(right); 1378 1379 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1380 1381 if (!buffer_dirty(path[level].bp_bh)) 1382 mark_buffer_dirty(path[level].bp_bh); 1383 1384 nilfs_btnode_delete(path[level].bp_sib_bh); 1385 path[level].bp_sib_bh = NULL; 1386 path[level + 1].bp_index++; 1387 } 1388 1389 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1390 struct nilfs_btree_path *path, 1391 int level, __u64 *keyp, __u64 *ptrp) 1392 { 1393 struct nilfs_btree_node *root, *child; 1394 int n, ncblk; 1395 1396 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1397 1398 root = nilfs_btree_get_root(btree); 1399 child = nilfs_btree_get_nonroot_node(path, level); 1400 ncblk = nilfs_btree_nchildren_per_block(btree); 1401 1402 nilfs_btree_node_delete(root, 0, NULL, NULL, 1403 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1404 nilfs_btree_node_set_level(root, level); 1405 n = nilfs_btree_node_get_nchildren(child); 1406 nilfs_btree_node_move_left(root, child, n, 1407 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1408 1409 nilfs_btnode_delete(path[level].bp_bh); 1410 path[level].bp_bh = NULL; 1411 } 1412 1413 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1414 struct nilfs_btree_path *path, 1415 int level, __u64 *keyp, __u64 *ptrp) 1416 { 1417 } 1418 1419 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1420 struct nilfs_btree_path *path, 1421 int *levelp, 1422 struct nilfs_bmap_stats *stats, 1423 struct inode *dat) 1424 { 1425 struct buffer_head *bh; 1426 struct nilfs_btree_node *node, *parent, *sib; 1427 __u64 sibptr; 1428 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1429 1430 ret = 0; 1431 stats->bs_nblocks = 0; 1432 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1433 ncblk = nilfs_btree_nchildren_per_block(btree); 1434 1435 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1436 level < nilfs_btree_height(btree) - 1; 1437 level++) { 1438 node = nilfs_btree_get_nonroot_node(path, level); 1439 path[level].bp_oldreq.bpr_ptr = 1440 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1441 ret = nilfs_bmap_prepare_end_ptr(btree, 1442 &path[level].bp_oldreq, dat); 1443 if (ret < 0) 1444 goto err_out_child_node; 1445 1446 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1447 path[level].bp_op = nilfs_btree_do_delete; 1448 stats->bs_nblocks++; 1449 goto out; 1450 } 1451 1452 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1453 pindex = path[level + 1].bp_index; 1454 dindex = pindex; 1455 1456 if (pindex > 0) { 1457 /* left sibling */ 1458 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1459 ncmax); 1460 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1461 if (ret < 0) 1462 goto err_out_curr_node; 1463 sib = (struct nilfs_btree_node *)bh->b_data; 1464 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1465 path[level].bp_sib_bh = bh; 1466 path[level].bp_op = nilfs_btree_borrow_left; 1467 stats->bs_nblocks++; 1468 goto out; 1469 } else { 1470 path[level].bp_sib_bh = bh; 1471 path[level].bp_op = nilfs_btree_concat_left; 1472 stats->bs_nblocks++; 1473 /* continue; */ 1474 } 1475 } else if (pindex < 1476 nilfs_btree_node_get_nchildren(parent) - 1) { 1477 /* right sibling */ 1478 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1479 ncmax); 1480 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1481 if (ret < 0) 1482 goto err_out_curr_node; 1483 sib = (struct nilfs_btree_node *)bh->b_data; 1484 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1485 path[level].bp_sib_bh = bh; 1486 path[level].bp_op = nilfs_btree_borrow_right; 1487 stats->bs_nblocks++; 1488 goto out; 1489 } else { 1490 path[level].bp_sib_bh = bh; 1491 path[level].bp_op = nilfs_btree_concat_right; 1492 stats->bs_nblocks++; 1493 /* 1494 * When merging right sibling node 1495 * into the current node, pointer to 1496 * the right sibling node must be 1497 * terminated instead. The adjustment 1498 * below is required for that. 1499 */ 1500 dindex = pindex + 1; 1501 /* continue; */ 1502 } 1503 } else { 1504 /* no siblings */ 1505 /* the only child of the root node */ 1506 WARN_ON(level != nilfs_btree_height(btree) - 2); 1507 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1508 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1509 path[level].bp_op = nilfs_btree_shrink; 1510 stats->bs_nblocks += 2; 1511 level++; 1512 path[level].bp_op = nilfs_btree_nop; 1513 goto shrink_root_child; 1514 } else { 1515 path[level].bp_op = nilfs_btree_do_delete; 1516 stats->bs_nblocks++; 1517 goto out; 1518 } 1519 } 1520 } 1521 1522 /* child of the root node is deleted */ 1523 path[level].bp_op = nilfs_btree_do_delete; 1524 stats->bs_nblocks++; 1525 1526 shrink_root_child: 1527 node = nilfs_btree_get_root(btree); 1528 path[level].bp_oldreq.bpr_ptr = 1529 nilfs_btree_node_get_ptr(node, dindex, 1530 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1531 1532 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1533 if (ret < 0) 1534 goto err_out_child_node; 1535 1536 /* success */ 1537 out: 1538 *levelp = level; 1539 return ret; 1540 1541 /* error */ 1542 err_out_curr_node: 1543 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1544 err_out_child_node: 1545 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1546 brelse(path[level].bp_sib_bh); 1547 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1548 } 1549 *levelp = level; 1550 stats->bs_nblocks = 0; 1551 return ret; 1552 } 1553 1554 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1555 struct nilfs_btree_path *path, 1556 int maxlevel, struct inode *dat) 1557 { 1558 int level; 1559 1560 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1561 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1562 path[level].bp_op(btree, path, level, NULL, NULL); 1563 } 1564 1565 if (!nilfs_bmap_dirty(btree)) 1566 nilfs_bmap_set_dirty(btree); 1567 } 1568 1569 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1570 1571 { 1572 struct nilfs_btree_path *path; 1573 struct nilfs_bmap_stats stats; 1574 struct inode *dat; 1575 int level, ret; 1576 1577 path = nilfs_btree_alloc_path(); 1578 if (path == NULL) 1579 return -ENOMEM; 1580 1581 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1582 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1583 if (ret < 0) 1584 goto out; 1585 1586 1587 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1588 1589 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1590 if (ret < 0) 1591 goto out; 1592 nilfs_btree_commit_delete(btree, path, level, dat); 1593 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1594 1595 out: 1596 nilfs_btree_free_path(path); 1597 return ret; 1598 } 1599 1600 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1601 __u64 *keyp) 1602 { 1603 struct nilfs_btree_path *path; 1604 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1605 int ret; 1606 1607 path = nilfs_btree_alloc_path(); 1608 if (!path) 1609 return -ENOMEM; 1610 1611 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1612 if (!ret) 1613 *keyp = start; 1614 else if (ret == -ENOENT) 1615 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1616 1617 nilfs_btree_free_path(path); 1618 return ret; 1619 } 1620 1621 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1622 { 1623 struct nilfs_btree_path *path; 1624 int ret; 1625 1626 path = nilfs_btree_alloc_path(); 1627 if (path == NULL) 1628 return -ENOMEM; 1629 1630 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1631 1632 nilfs_btree_free_path(path); 1633 1634 return ret; 1635 } 1636 1637 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1638 { 1639 struct buffer_head *bh; 1640 struct nilfs_btree_node *root, *node; 1641 __u64 maxkey, nextmaxkey; 1642 __u64 ptr; 1643 int nchildren, ret; 1644 1645 root = nilfs_btree_get_root(btree); 1646 switch (nilfs_btree_height(btree)) { 1647 case 2: 1648 bh = NULL; 1649 node = root; 1650 break; 1651 case 3: 1652 nchildren = nilfs_btree_node_get_nchildren(root); 1653 if (nchildren > 1) 1654 return 0; 1655 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1656 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1657 ret = nilfs_btree_get_block(btree, ptr, &bh); 1658 if (ret < 0) 1659 return ret; 1660 node = (struct nilfs_btree_node *)bh->b_data; 1661 break; 1662 default: 1663 return 0; 1664 } 1665 1666 nchildren = nilfs_btree_node_get_nchildren(node); 1667 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1668 nextmaxkey = (nchildren > 1) ? 1669 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1670 if (bh != NULL) 1671 brelse(bh); 1672 1673 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1674 } 1675 1676 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1677 __u64 *keys, __u64 *ptrs, int nitems) 1678 { 1679 struct buffer_head *bh; 1680 struct nilfs_btree_node *node, *root; 1681 __le64 *dkeys; 1682 __le64 *dptrs; 1683 __u64 ptr; 1684 int nchildren, ncmax, i, ret; 1685 1686 root = nilfs_btree_get_root(btree); 1687 switch (nilfs_btree_height(btree)) { 1688 case 2: 1689 bh = NULL; 1690 node = root; 1691 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1692 break; 1693 case 3: 1694 nchildren = nilfs_btree_node_get_nchildren(root); 1695 WARN_ON(nchildren > 1); 1696 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1697 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1698 ret = nilfs_btree_get_block(btree, ptr, &bh); 1699 if (ret < 0) 1700 return ret; 1701 node = (struct nilfs_btree_node *)bh->b_data; 1702 ncmax = nilfs_btree_nchildren_per_block(btree); 1703 break; 1704 default: 1705 node = NULL; 1706 return -EINVAL; 1707 } 1708 1709 nchildren = nilfs_btree_node_get_nchildren(node); 1710 if (nchildren < nitems) 1711 nitems = nchildren; 1712 dkeys = nilfs_btree_node_dkeys(node); 1713 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1714 for (i = 0; i < nitems; i++) { 1715 keys[i] = le64_to_cpu(dkeys[i]); 1716 ptrs[i] = le64_to_cpu(dptrs[i]); 1717 } 1718 1719 if (bh != NULL) 1720 brelse(bh); 1721 1722 return nitems; 1723 } 1724 1725 static int 1726 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1727 union nilfs_bmap_ptr_req *dreq, 1728 union nilfs_bmap_ptr_req *nreq, 1729 struct buffer_head **bhp, 1730 struct nilfs_bmap_stats *stats) 1731 { 1732 struct buffer_head *bh; 1733 struct inode *dat = NULL; 1734 int ret; 1735 1736 stats->bs_nblocks = 0; 1737 1738 /* for data */ 1739 /* cannot find near ptr */ 1740 if (NILFS_BMAP_USE_VBN(btree)) { 1741 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1742 dat = nilfs_bmap_get_dat(btree); 1743 } 1744 1745 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1746 if (ret < 0) 1747 return ret; 1748 1749 *bhp = NULL; 1750 stats->bs_nblocks++; 1751 if (nreq != NULL) { 1752 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1753 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1754 if (ret < 0) 1755 goto err_out_dreq; 1756 1757 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1758 if (ret < 0) 1759 goto err_out_nreq; 1760 1761 *bhp = bh; 1762 stats->bs_nblocks++; 1763 } 1764 1765 /* success */ 1766 return 0; 1767 1768 /* error */ 1769 err_out_nreq: 1770 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1771 err_out_dreq: 1772 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1773 stats->bs_nblocks = 0; 1774 return ret; 1775 1776 } 1777 1778 static void 1779 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1780 __u64 key, __u64 ptr, 1781 const __u64 *keys, const __u64 *ptrs, 1782 int n, 1783 union nilfs_bmap_ptr_req *dreq, 1784 union nilfs_bmap_ptr_req *nreq, 1785 struct buffer_head *bh) 1786 { 1787 struct nilfs_btree_node *node; 1788 struct inode *dat; 1789 __u64 tmpptr; 1790 int ncblk; 1791 1792 /* free resources */ 1793 if (btree->b_ops->bop_clear != NULL) 1794 btree->b_ops->bop_clear(btree); 1795 1796 /* ptr must be a pointer to a buffer head. */ 1797 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1798 1799 /* convert and insert */ 1800 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1801 __nilfs_btree_init(btree); 1802 if (nreq != NULL) { 1803 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1804 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1805 1806 /* create child node at level 1 */ 1807 node = (struct nilfs_btree_node *)bh->b_data; 1808 ncblk = nilfs_btree_nchildren_per_block(btree); 1809 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1810 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1811 if (!buffer_dirty(bh)) 1812 mark_buffer_dirty(bh); 1813 if (!nilfs_bmap_dirty(btree)) 1814 nilfs_bmap_set_dirty(btree); 1815 1816 brelse(bh); 1817 1818 /* create root node at level 2 */ 1819 node = nilfs_btree_get_root(btree); 1820 tmpptr = nreq->bpr_ptr; 1821 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1822 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1823 &keys[0], &tmpptr); 1824 } else { 1825 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1826 1827 /* create root node at level 1 */ 1828 node = nilfs_btree_get_root(btree); 1829 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1830 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1831 keys, ptrs); 1832 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1833 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1834 if (!nilfs_bmap_dirty(btree)) 1835 nilfs_bmap_set_dirty(btree); 1836 } 1837 1838 if (NILFS_BMAP_USE_VBN(btree)) 1839 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1840 } 1841 1842 /** 1843 * nilfs_btree_convert_and_insert - 1844 * @bmap: 1845 * @key: 1846 * @ptr: 1847 * @keys: 1848 * @ptrs: 1849 * @n: 1850 */ 1851 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1852 __u64 key, __u64 ptr, 1853 const __u64 *keys, const __u64 *ptrs, int n) 1854 { 1855 struct buffer_head *bh = NULL; 1856 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1857 struct nilfs_bmap_stats stats; 1858 int ret; 1859 1860 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1861 di = &dreq; 1862 ni = NULL; 1863 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1864 nilfs_btree_node_size(btree))) { 1865 di = &dreq; 1866 ni = &nreq; 1867 } else { 1868 di = NULL; 1869 ni = NULL; 1870 BUG(); 1871 } 1872 1873 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1874 &stats); 1875 if (ret < 0) 1876 return ret; 1877 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1878 di, ni, bh); 1879 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1880 return 0; 1881 } 1882 1883 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1884 struct nilfs_btree_path *path, 1885 int level, 1886 struct buffer_head *bh) 1887 { 1888 while ((++level < nilfs_btree_height(btree) - 1) && 1889 !buffer_dirty(path[level].bp_bh)) 1890 mark_buffer_dirty(path[level].bp_bh); 1891 1892 return 0; 1893 } 1894 1895 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1896 struct nilfs_btree_path *path, 1897 int level, struct inode *dat) 1898 { 1899 struct nilfs_btree_node *parent; 1900 int ncmax, ret; 1901 1902 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1903 path[level].bp_oldreq.bpr_ptr = 1904 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1905 ncmax); 1906 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1907 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1908 &path[level].bp_newreq.bpr_req); 1909 if (ret < 0) 1910 return ret; 1911 1912 if (buffer_nilfs_node(path[level].bp_bh)) { 1913 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1914 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1915 path[level].bp_ctxt.bh = path[level].bp_bh; 1916 ret = nilfs_btnode_prepare_change_key( 1917 &NILFS_BMAP_I(btree)->i_btnode_cache, 1918 &path[level].bp_ctxt); 1919 if (ret < 0) { 1920 nilfs_dat_abort_update(dat, 1921 &path[level].bp_oldreq.bpr_req, 1922 &path[level].bp_newreq.bpr_req); 1923 return ret; 1924 } 1925 } 1926 1927 return 0; 1928 } 1929 1930 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1931 struct nilfs_btree_path *path, 1932 int level, struct inode *dat) 1933 { 1934 struct nilfs_btree_node *parent; 1935 int ncmax; 1936 1937 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1938 &path[level].bp_newreq.bpr_req, 1939 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1940 1941 if (buffer_nilfs_node(path[level].bp_bh)) { 1942 nilfs_btnode_commit_change_key( 1943 &NILFS_BMAP_I(btree)->i_btnode_cache, 1944 &path[level].bp_ctxt); 1945 path[level].bp_bh = path[level].bp_ctxt.bh; 1946 } 1947 set_buffer_nilfs_volatile(path[level].bp_bh); 1948 1949 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1950 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1951 path[level].bp_newreq.bpr_ptr, ncmax); 1952 } 1953 1954 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1955 struct nilfs_btree_path *path, 1956 int level, struct inode *dat) 1957 { 1958 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1959 &path[level].bp_newreq.bpr_req); 1960 if (buffer_nilfs_node(path[level].bp_bh)) 1961 nilfs_btnode_abort_change_key( 1962 &NILFS_BMAP_I(btree)->i_btnode_cache, 1963 &path[level].bp_ctxt); 1964 } 1965 1966 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1967 struct nilfs_btree_path *path, 1968 int minlevel, int *maxlevelp, 1969 struct inode *dat) 1970 { 1971 int level, ret; 1972 1973 level = minlevel; 1974 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1975 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1976 if (ret < 0) 1977 return ret; 1978 } 1979 while ((++level < nilfs_btree_height(btree) - 1) && 1980 !buffer_dirty(path[level].bp_bh)) { 1981 1982 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 1983 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1984 if (ret < 0) 1985 goto out; 1986 } 1987 1988 /* success */ 1989 *maxlevelp = level - 1; 1990 return 0; 1991 1992 /* error */ 1993 out: 1994 while (--level > minlevel) 1995 nilfs_btree_abort_update_v(btree, path, level, dat); 1996 if (!buffer_nilfs_volatile(path[level].bp_bh)) 1997 nilfs_btree_abort_update_v(btree, path, level, dat); 1998 return ret; 1999 } 2000 2001 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2002 struct nilfs_btree_path *path, 2003 int minlevel, int maxlevel, 2004 struct buffer_head *bh, 2005 struct inode *dat) 2006 { 2007 int level; 2008 2009 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2010 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2011 2012 for (level = minlevel + 1; level <= maxlevel; level++) 2013 nilfs_btree_commit_update_v(btree, path, level, dat); 2014 } 2015 2016 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2017 struct nilfs_btree_path *path, 2018 int level, struct buffer_head *bh) 2019 { 2020 int maxlevel = 0, ret; 2021 struct nilfs_btree_node *parent; 2022 struct inode *dat = nilfs_bmap_get_dat(btree); 2023 __u64 ptr; 2024 int ncmax; 2025 2026 get_bh(bh); 2027 path[level].bp_bh = bh; 2028 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2029 dat); 2030 if (ret < 0) 2031 goto out; 2032 2033 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2034 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2035 ptr = nilfs_btree_node_get_ptr(parent, 2036 path[level + 1].bp_index, 2037 ncmax); 2038 ret = nilfs_dat_mark_dirty(dat, ptr); 2039 if (ret < 0) 2040 goto out; 2041 } 2042 2043 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2044 2045 out: 2046 brelse(path[level].bp_bh); 2047 path[level].bp_bh = NULL; 2048 return ret; 2049 } 2050 2051 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2052 struct buffer_head *bh) 2053 { 2054 struct nilfs_btree_path *path; 2055 struct nilfs_btree_node *node; 2056 __u64 key; 2057 int level, ret; 2058 2059 WARN_ON(!buffer_dirty(bh)); 2060 2061 path = nilfs_btree_alloc_path(); 2062 if (path == NULL) 2063 return -ENOMEM; 2064 2065 if (buffer_nilfs_node(bh)) { 2066 node = (struct nilfs_btree_node *)bh->b_data; 2067 key = nilfs_btree_node_get_key(node, 0); 2068 level = nilfs_btree_node_get_level(node); 2069 } else { 2070 key = nilfs_bmap_data_get_key(btree, bh); 2071 level = NILFS_BTREE_LEVEL_DATA; 2072 } 2073 2074 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2075 if (ret < 0) { 2076 if (unlikely(ret == -ENOENT)) 2077 nilfs_crit(btree->b_inode->i_sb, 2078 "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", 2079 btree->b_inode->i_ino, 2080 (unsigned long long)key, level); 2081 goto out; 2082 } 2083 2084 ret = NILFS_BMAP_USE_VBN(btree) ? 2085 nilfs_btree_propagate_v(btree, path, level, bh) : 2086 nilfs_btree_propagate_p(btree, path, level, bh); 2087 2088 out: 2089 nilfs_btree_free_path(path); 2090 2091 return ret; 2092 } 2093 2094 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2095 struct buffer_head *bh) 2096 { 2097 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2098 } 2099 2100 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2101 struct list_head *lists, 2102 struct buffer_head *bh) 2103 { 2104 struct list_head *head; 2105 struct buffer_head *cbh; 2106 struct nilfs_btree_node *node, *cnode; 2107 __u64 key, ckey; 2108 int level; 2109 2110 get_bh(bh); 2111 node = (struct nilfs_btree_node *)bh->b_data; 2112 key = nilfs_btree_node_get_key(node, 0); 2113 level = nilfs_btree_node_get_level(node); 2114 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2115 level >= NILFS_BTREE_LEVEL_MAX) { 2116 dump_stack(); 2117 nilfs_warn(btree->b_inode->i_sb, 2118 "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", 2119 level, (unsigned long long)key, 2120 btree->b_inode->i_ino, 2121 (unsigned long long)bh->b_blocknr); 2122 return; 2123 } 2124 2125 list_for_each(head, &lists[level]) { 2126 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2127 cnode = (struct nilfs_btree_node *)cbh->b_data; 2128 ckey = nilfs_btree_node_get_key(cnode, 0); 2129 if (key < ckey) 2130 break; 2131 } 2132 list_add_tail(&bh->b_assoc_buffers, head); 2133 } 2134 2135 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2136 struct list_head *listp) 2137 { 2138 struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; 2139 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2140 struct pagevec pvec; 2141 struct buffer_head *bh, *head; 2142 pgoff_t index = 0; 2143 int level, i; 2144 2145 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2146 level < NILFS_BTREE_LEVEL_MAX; 2147 level++) 2148 INIT_LIST_HEAD(&lists[level]); 2149 2150 pagevec_init(&pvec); 2151 2152 while (pagevec_lookup_tag(&pvec, btcache, &index, 2153 PAGECACHE_TAG_DIRTY)) { 2154 for (i = 0; i < pagevec_count(&pvec); i++) { 2155 bh = head = page_buffers(pvec.pages[i]); 2156 do { 2157 if (buffer_dirty(bh)) 2158 nilfs_btree_add_dirty_buffer(btree, 2159 lists, bh); 2160 } while ((bh = bh->b_this_page) != head); 2161 } 2162 pagevec_release(&pvec); 2163 cond_resched(); 2164 } 2165 2166 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2167 level < NILFS_BTREE_LEVEL_MAX; 2168 level++) 2169 list_splice_tail(&lists[level], listp); 2170 } 2171 2172 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2173 struct nilfs_btree_path *path, 2174 int level, 2175 struct buffer_head **bh, 2176 sector_t blocknr, 2177 union nilfs_binfo *binfo) 2178 { 2179 struct nilfs_btree_node *parent; 2180 __u64 key; 2181 __u64 ptr; 2182 int ncmax, ret; 2183 2184 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2185 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2186 ncmax); 2187 if (buffer_nilfs_node(*bh)) { 2188 path[level].bp_ctxt.oldkey = ptr; 2189 path[level].bp_ctxt.newkey = blocknr; 2190 path[level].bp_ctxt.bh = *bh; 2191 ret = nilfs_btnode_prepare_change_key( 2192 &NILFS_BMAP_I(btree)->i_btnode_cache, 2193 &path[level].bp_ctxt); 2194 if (ret < 0) 2195 return ret; 2196 nilfs_btnode_commit_change_key( 2197 &NILFS_BMAP_I(btree)->i_btnode_cache, 2198 &path[level].bp_ctxt); 2199 *bh = path[level].bp_ctxt.bh; 2200 } 2201 2202 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2203 ncmax); 2204 2205 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2206 /* on-disk format */ 2207 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2208 binfo->bi_dat.bi_level = level; 2209 2210 return 0; 2211 } 2212 2213 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2214 struct nilfs_btree_path *path, 2215 int level, 2216 struct buffer_head **bh, 2217 sector_t blocknr, 2218 union nilfs_binfo *binfo) 2219 { 2220 struct nilfs_btree_node *parent; 2221 struct inode *dat = nilfs_bmap_get_dat(btree); 2222 __u64 key; 2223 __u64 ptr; 2224 union nilfs_bmap_ptr_req req; 2225 int ncmax, ret; 2226 2227 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2228 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2229 ncmax); 2230 req.bpr_ptr = ptr; 2231 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2232 if (ret < 0) 2233 return ret; 2234 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2235 2236 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2237 /* on-disk format */ 2238 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2239 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2240 2241 return 0; 2242 } 2243 2244 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2245 struct buffer_head **bh, 2246 sector_t blocknr, 2247 union nilfs_binfo *binfo) 2248 { 2249 struct nilfs_btree_path *path; 2250 struct nilfs_btree_node *node; 2251 __u64 key; 2252 int level, ret; 2253 2254 path = nilfs_btree_alloc_path(); 2255 if (path == NULL) 2256 return -ENOMEM; 2257 2258 if (buffer_nilfs_node(*bh)) { 2259 node = (struct nilfs_btree_node *)(*bh)->b_data; 2260 key = nilfs_btree_node_get_key(node, 0); 2261 level = nilfs_btree_node_get_level(node); 2262 } else { 2263 key = nilfs_bmap_data_get_key(btree, *bh); 2264 level = NILFS_BTREE_LEVEL_DATA; 2265 } 2266 2267 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2268 if (ret < 0) { 2269 WARN_ON(ret == -ENOENT); 2270 goto out; 2271 } 2272 2273 ret = NILFS_BMAP_USE_VBN(btree) ? 2274 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2275 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2276 2277 out: 2278 nilfs_btree_free_path(path); 2279 2280 return ret; 2281 } 2282 2283 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2284 struct buffer_head **bh, 2285 sector_t blocknr, 2286 union nilfs_binfo *binfo) 2287 { 2288 struct nilfs_btree_node *node; 2289 __u64 key; 2290 int ret; 2291 2292 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2293 blocknr); 2294 if (ret < 0) 2295 return ret; 2296 2297 if (buffer_nilfs_node(*bh)) { 2298 node = (struct nilfs_btree_node *)(*bh)->b_data; 2299 key = nilfs_btree_node_get_key(node, 0); 2300 } else 2301 key = nilfs_bmap_data_get_key(btree, *bh); 2302 2303 /* on-disk format */ 2304 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2305 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2306 2307 return 0; 2308 } 2309 2310 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2311 { 2312 struct buffer_head *bh; 2313 struct nilfs_btree_path *path; 2314 __u64 ptr; 2315 int ret; 2316 2317 path = nilfs_btree_alloc_path(); 2318 if (path == NULL) 2319 return -ENOMEM; 2320 2321 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2322 if (ret < 0) { 2323 WARN_ON(ret == -ENOENT); 2324 goto out; 2325 } 2326 ret = nilfs_btree_get_block(btree, ptr, &bh); 2327 if (ret < 0) { 2328 WARN_ON(ret == -ENOENT); 2329 goto out; 2330 } 2331 2332 if (!buffer_dirty(bh)) 2333 mark_buffer_dirty(bh); 2334 brelse(bh); 2335 if (!nilfs_bmap_dirty(btree)) 2336 nilfs_bmap_set_dirty(btree); 2337 2338 out: 2339 nilfs_btree_free_path(path); 2340 return ret; 2341 } 2342 2343 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2344 .bop_lookup = nilfs_btree_lookup, 2345 .bop_lookup_contig = nilfs_btree_lookup_contig, 2346 .bop_insert = nilfs_btree_insert, 2347 .bop_delete = nilfs_btree_delete, 2348 .bop_clear = NULL, 2349 2350 .bop_propagate = nilfs_btree_propagate, 2351 2352 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2353 2354 .bop_assign = nilfs_btree_assign, 2355 .bop_mark = nilfs_btree_mark, 2356 2357 .bop_seek_key = nilfs_btree_seek_key, 2358 .bop_last_key = nilfs_btree_last_key, 2359 2360 .bop_check_insert = NULL, 2361 .bop_check_delete = nilfs_btree_check_delete, 2362 .bop_gather_data = nilfs_btree_gather_data, 2363 }; 2364 2365 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2366 .bop_lookup = NULL, 2367 .bop_lookup_contig = NULL, 2368 .bop_insert = NULL, 2369 .bop_delete = NULL, 2370 .bop_clear = NULL, 2371 2372 .bop_propagate = nilfs_btree_propagate_gc, 2373 2374 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2375 2376 .bop_assign = nilfs_btree_assign_gc, 2377 .bop_mark = NULL, 2378 2379 .bop_seek_key = NULL, 2380 .bop_last_key = NULL, 2381 2382 .bop_check_insert = NULL, 2383 .bop_check_delete = NULL, 2384 .bop_gather_data = NULL, 2385 }; 2386 2387 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2388 { 2389 bmap->b_ops = &nilfs_btree_ops; 2390 bmap->b_nchildren_per_block = 2391 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2392 } 2393 2394 int nilfs_btree_init(struct nilfs_bmap *bmap) 2395 { 2396 int ret = 0; 2397 2398 __nilfs_btree_init(bmap); 2399 2400 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) 2401 ret = -EIO; 2402 return ret; 2403 } 2404 2405 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2406 { 2407 bmap->b_ops = &nilfs_btree_ops_gc; 2408 bmap->b_nchildren_per_block = 2409 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2410 } 2411