1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * NILFS B-tree. 4 * 5 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 6 * 7 * Written by Koji Sato. 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/string.h> 12 #include <linux/errno.h> 13 #include <linux/pagevec.h> 14 #include "nilfs.h" 15 #include "page.h" 16 #include "btnode.h" 17 #include "btree.h" 18 #include "alloc.h" 19 #include "dat.h" 20 21 static void __nilfs_btree_init(struct nilfs_bmap *bmap); 22 23 static struct nilfs_btree_path *nilfs_btree_alloc_path(void) 24 { 25 struct nilfs_btree_path *path; 26 int level = NILFS_BTREE_LEVEL_DATA; 27 28 path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); 29 if (path == NULL) 30 goto out; 31 32 for (; level < NILFS_BTREE_LEVEL_MAX; level++) { 33 path[level].bp_bh = NULL; 34 path[level].bp_sib_bh = NULL; 35 path[level].bp_index = 0; 36 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 37 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; 38 path[level].bp_op = NULL; 39 } 40 41 out: 42 return path; 43 } 44 45 static void nilfs_btree_free_path(struct nilfs_btree_path *path) 46 { 47 int level = NILFS_BTREE_LEVEL_DATA; 48 49 for (; level < NILFS_BTREE_LEVEL_MAX; level++) 50 brelse(path[level].bp_bh); 51 52 kmem_cache_free(nilfs_btree_path_cache, path); 53 } 54 55 /* 56 * B-tree node operations 57 */ 58 static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, 59 __u64 ptr, struct buffer_head **bhp) 60 { 61 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 62 struct address_space *btnc = btnc_inode->i_mapping; 63 struct buffer_head *bh; 64 65 bh = nilfs_btnode_create_block(btnc, ptr); 66 if (IS_ERR(bh)) 67 return PTR_ERR(bh); 68 69 set_buffer_nilfs_volatile(bh); 70 *bhp = bh; 71 return 0; 72 } 73 74 static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) 75 { 76 return node->bn_flags; 77 } 78 79 static void 80 nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) 81 { 82 node->bn_flags = flags; 83 } 84 85 static int nilfs_btree_node_root(const struct nilfs_btree_node *node) 86 { 87 return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; 88 } 89 90 static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) 91 { 92 return node->bn_level; 93 } 94 95 static void 96 nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) 97 { 98 node->bn_level = level; 99 } 100 101 static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) 102 { 103 return le16_to_cpu(node->bn_nchildren); 104 } 105 106 static void 107 nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) 108 { 109 node->bn_nchildren = cpu_to_le16(nchildren); 110 } 111 112 static int nilfs_btree_node_size(const struct nilfs_bmap *btree) 113 { 114 return i_blocksize(btree->b_inode); 115 } 116 117 static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) 118 { 119 return btree->b_nchildren_per_block; 120 } 121 122 static __le64 * 123 nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 124 { 125 return (__le64 *)((char *)(node + 1) + 126 (nilfs_btree_node_root(node) ? 127 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); 128 } 129 130 static __le64 * 131 nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) 132 { 133 return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); 134 } 135 136 static __u64 137 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) 138 { 139 return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); 140 } 141 142 static void 143 nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) 144 { 145 *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); 146 } 147 148 static __u64 149 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, 150 int ncmax) 151 { 152 return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); 153 } 154 155 static void 156 nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, 157 int ncmax) 158 { 159 *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); 160 } 161 162 static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, 163 int level, int nchildren, int ncmax, 164 const __u64 *keys, const __u64 *ptrs) 165 { 166 __le64 *dkeys; 167 __le64 *dptrs; 168 int i; 169 170 nilfs_btree_node_set_flags(node, flags); 171 nilfs_btree_node_set_level(node, level); 172 nilfs_btree_node_set_nchildren(node, nchildren); 173 174 dkeys = nilfs_btree_node_dkeys(node); 175 dptrs = nilfs_btree_node_dptrs(node, ncmax); 176 for (i = 0; i < nchildren; i++) { 177 dkeys[i] = cpu_to_le64(keys[i]); 178 dptrs[i] = cpu_to_le64(ptrs[i]); 179 } 180 } 181 182 /* Assume the buffer heads corresponding to left and right are locked. */ 183 static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, 184 struct nilfs_btree_node *right, 185 int n, int lncmax, int rncmax) 186 { 187 __le64 *ldkeys, *rdkeys; 188 __le64 *ldptrs, *rdptrs; 189 int lnchildren, rnchildren; 190 191 ldkeys = nilfs_btree_node_dkeys(left); 192 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 193 lnchildren = nilfs_btree_node_get_nchildren(left); 194 195 rdkeys = nilfs_btree_node_dkeys(right); 196 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 197 rnchildren = nilfs_btree_node_get_nchildren(right); 198 199 memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); 200 memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); 201 memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); 202 memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); 203 204 lnchildren += n; 205 rnchildren -= n; 206 nilfs_btree_node_set_nchildren(left, lnchildren); 207 nilfs_btree_node_set_nchildren(right, rnchildren); 208 } 209 210 /* Assume that the buffer heads corresponding to left and right are locked. */ 211 static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, 212 struct nilfs_btree_node *right, 213 int n, int lncmax, int rncmax) 214 { 215 __le64 *ldkeys, *rdkeys; 216 __le64 *ldptrs, *rdptrs; 217 int lnchildren, rnchildren; 218 219 ldkeys = nilfs_btree_node_dkeys(left); 220 ldptrs = nilfs_btree_node_dptrs(left, lncmax); 221 lnchildren = nilfs_btree_node_get_nchildren(left); 222 223 rdkeys = nilfs_btree_node_dkeys(right); 224 rdptrs = nilfs_btree_node_dptrs(right, rncmax); 225 rnchildren = nilfs_btree_node_get_nchildren(right); 226 227 memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); 228 memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); 229 memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); 230 memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); 231 232 lnchildren -= n; 233 rnchildren += n; 234 nilfs_btree_node_set_nchildren(left, lnchildren); 235 nilfs_btree_node_set_nchildren(right, rnchildren); 236 } 237 238 /* Assume that the buffer head corresponding to node is locked. */ 239 static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, 240 __u64 key, __u64 ptr, int ncmax) 241 { 242 __le64 *dkeys; 243 __le64 *dptrs; 244 int nchildren; 245 246 dkeys = nilfs_btree_node_dkeys(node); 247 dptrs = nilfs_btree_node_dptrs(node, ncmax); 248 nchildren = nilfs_btree_node_get_nchildren(node); 249 if (index < nchildren) { 250 memmove(dkeys + index + 1, dkeys + index, 251 (nchildren - index) * sizeof(*dkeys)); 252 memmove(dptrs + index + 1, dptrs + index, 253 (nchildren - index) * sizeof(*dptrs)); 254 } 255 dkeys[index] = cpu_to_le64(key); 256 dptrs[index] = cpu_to_le64(ptr); 257 nchildren++; 258 nilfs_btree_node_set_nchildren(node, nchildren); 259 } 260 261 /* Assume that the buffer head corresponding to node is locked. */ 262 static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, 263 __u64 *keyp, __u64 *ptrp, int ncmax) 264 { 265 __u64 key; 266 __u64 ptr; 267 __le64 *dkeys; 268 __le64 *dptrs; 269 int nchildren; 270 271 dkeys = nilfs_btree_node_dkeys(node); 272 dptrs = nilfs_btree_node_dptrs(node, ncmax); 273 key = le64_to_cpu(dkeys[index]); 274 ptr = le64_to_cpu(dptrs[index]); 275 nchildren = nilfs_btree_node_get_nchildren(node); 276 if (keyp != NULL) 277 *keyp = key; 278 if (ptrp != NULL) 279 *ptrp = ptr; 280 281 if (index < nchildren - 1) { 282 memmove(dkeys + index, dkeys + index + 1, 283 (nchildren - index - 1) * sizeof(*dkeys)); 284 memmove(dptrs + index, dptrs + index + 1, 285 (nchildren - index - 1) * sizeof(*dptrs)); 286 } 287 nchildren--; 288 nilfs_btree_node_set_nchildren(node, nchildren); 289 } 290 291 static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, 292 __u64 key, int *indexp) 293 { 294 __u64 nkey; 295 int index, low, high, s; 296 297 /* binary search */ 298 low = 0; 299 high = nilfs_btree_node_get_nchildren(node) - 1; 300 index = 0; 301 s = 0; 302 while (low <= high) { 303 index = (low + high) / 2; 304 nkey = nilfs_btree_node_get_key(node, index); 305 if (nkey == key) { 306 s = 0; 307 goto out; 308 } else if (nkey < key) { 309 low = index + 1; 310 s = -1; 311 } else { 312 high = index - 1; 313 s = 1; 314 } 315 } 316 317 /* adjust index */ 318 if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { 319 if (s > 0 && index > 0) 320 index--; 321 } else if (s < 0) 322 index++; 323 324 out: 325 *indexp = index; 326 327 return s == 0; 328 } 329 330 /** 331 * nilfs_btree_node_broken - verify consistency of btree node 332 * @node: btree node block to be examined 333 * @size: node size (in bytes) 334 * @inode: host inode of btree 335 * @blocknr: block number 336 * 337 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 338 */ 339 static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, 340 size_t size, struct inode *inode, 341 sector_t blocknr) 342 { 343 int level, flags, nchildren; 344 int ret = 0; 345 346 level = nilfs_btree_node_get_level(node); 347 flags = nilfs_btree_node_get_flags(node); 348 nchildren = nilfs_btree_node_get_nchildren(node); 349 350 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 351 level >= NILFS_BTREE_LEVEL_MAX || 352 (flags & NILFS_BTREE_NODE_ROOT) || 353 nchildren <= 0 || 354 nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { 355 nilfs_crit(inode->i_sb, 356 "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", 357 inode->i_ino, (unsigned long long)blocknr, level, 358 flags, nchildren); 359 ret = 1; 360 } 361 return ret; 362 } 363 364 /** 365 * nilfs_btree_root_broken - verify consistency of btree root node 366 * @node: btree root node to be examined 367 * @inode: host inode of btree 368 * 369 * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. 370 */ 371 static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, 372 struct inode *inode) 373 { 374 int level, flags, nchildren; 375 int ret = 0; 376 377 level = nilfs_btree_node_get_level(node); 378 flags = nilfs_btree_node_get_flags(node); 379 nchildren = nilfs_btree_node_get_nchildren(node); 380 381 if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || 382 level >= NILFS_BTREE_LEVEL_MAX || 383 nchildren < 0 || 384 nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX || 385 (nchildren == 0 && level > NILFS_BTREE_LEVEL_NODE_MIN))) { 386 nilfs_crit(inode->i_sb, 387 "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", 388 inode->i_ino, level, flags, nchildren); 389 ret = 1; 390 } 391 return ret; 392 } 393 394 int nilfs_btree_broken_node_block(struct buffer_head *bh) 395 { 396 struct inode *inode; 397 int ret; 398 399 if (buffer_nilfs_checked(bh)) 400 return 0; 401 402 inode = bh->b_folio->mapping->host; 403 ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, 404 bh->b_size, inode, bh->b_blocknr); 405 if (likely(!ret)) 406 set_buffer_nilfs_checked(bh); 407 return ret; 408 } 409 410 static struct nilfs_btree_node * 411 nilfs_btree_get_root(const struct nilfs_bmap *btree) 412 { 413 return (struct nilfs_btree_node *)btree->b_u.u_data; 414 } 415 416 static struct nilfs_btree_node * 417 nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) 418 { 419 return (struct nilfs_btree_node *)path[level].bp_bh->b_data; 420 } 421 422 static struct nilfs_btree_node * 423 nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) 424 { 425 return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; 426 } 427 428 static int nilfs_btree_height(const struct nilfs_bmap *btree) 429 { 430 return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; 431 } 432 433 static struct nilfs_btree_node * 434 nilfs_btree_get_node(const struct nilfs_bmap *btree, 435 const struct nilfs_btree_path *path, 436 int level, int *ncmaxp) 437 { 438 struct nilfs_btree_node *node; 439 440 if (level == nilfs_btree_height(btree) - 1) { 441 node = nilfs_btree_get_root(btree); 442 *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; 443 } else { 444 node = nilfs_btree_get_nonroot_node(path, level); 445 *ncmaxp = nilfs_btree_nchildren_per_block(btree); 446 } 447 return node; 448 } 449 450 static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, 451 struct nilfs_btree_node *node, int level) 452 { 453 if (unlikely(nilfs_btree_node_get_level(node) != level)) { 454 dump_stack(); 455 nilfs_crit(btree->b_inode->i_sb, 456 "btree level mismatch (ino=%lu): %d != %d", 457 btree->b_inode->i_ino, 458 nilfs_btree_node_get_level(node), level); 459 return 1; 460 } 461 return 0; 462 } 463 464 struct nilfs_btree_readahead_info { 465 struct nilfs_btree_node *node; /* parent node */ 466 int max_ra_blocks; /* max nof blocks to read ahead */ 467 int index; /* current index on the parent node */ 468 int ncmax; /* nof children in the parent node */ 469 }; 470 471 static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 472 struct buffer_head **bhp, 473 const struct nilfs_btree_readahead_info *ra) 474 { 475 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 476 struct address_space *btnc = btnc_inode->i_mapping; 477 struct buffer_head *bh, *ra_bh; 478 sector_t submit_ptr = 0; 479 int ret; 480 481 ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, 482 &submit_ptr); 483 if (ret) { 484 if (likely(ret == -EEXIST)) 485 goto out_check; 486 if (ret == -ENOENT) { 487 /* 488 * Block address translation failed due to invalid 489 * value of 'ptr'. In this case, return internal code 490 * -EINVAL (broken bmap) to notify bmap layer of fatal 491 * metadata corruption. 492 */ 493 ret = -EINVAL; 494 } 495 return ret; 496 } 497 498 if (ra) { 499 int i, n; 500 __u64 ptr2; 501 502 /* read ahead sibling nodes */ 503 for (n = ra->max_ra_blocks, i = ra->index + 1; 504 n > 0 && i < ra->ncmax; n--, i++) { 505 ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); 506 507 ret = nilfs_btnode_submit_block(btnc, ptr2, 0, 508 REQ_OP_READ | REQ_RAHEAD, 509 &ra_bh, &submit_ptr); 510 if (likely(!ret || ret == -EEXIST)) 511 brelse(ra_bh); 512 else if (ret != -EBUSY) 513 break; 514 if (!buffer_locked(bh)) 515 goto out_no_wait; 516 } 517 } 518 519 wait_on_buffer(bh); 520 521 out_no_wait: 522 if (!buffer_uptodate(bh)) { 523 nilfs_err(btree->b_inode->i_sb, 524 "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", 525 btree->b_inode->i_ino, (unsigned long long)ptr); 526 brelse(bh); 527 return -EIO; 528 } 529 530 out_check: 531 if (nilfs_btree_broken_node_block(bh)) { 532 clear_buffer_uptodate(bh); 533 brelse(bh); 534 return -EINVAL; 535 } 536 537 *bhp = bh; 538 return 0; 539 } 540 541 static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, 542 struct buffer_head **bhp) 543 { 544 return __nilfs_btree_get_block(btree, ptr, bhp, NULL); 545 } 546 547 static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, 548 struct nilfs_btree_path *path, 549 __u64 key, __u64 *ptrp, int minlevel, 550 int readahead) 551 { 552 struct nilfs_btree_node *node; 553 struct nilfs_btree_readahead_info p, *ra; 554 __u64 ptr; 555 int level, index, found, ncmax, ret; 556 557 node = nilfs_btree_get_root(btree); 558 level = nilfs_btree_node_get_level(node); 559 if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) 560 return -ENOENT; 561 562 found = nilfs_btree_node_lookup(node, key, &index); 563 ptr = nilfs_btree_node_get_ptr(node, index, 564 NILFS_BTREE_ROOT_NCHILDREN_MAX); 565 path[level].bp_bh = NULL; 566 path[level].bp_index = index; 567 568 ncmax = nilfs_btree_nchildren_per_block(btree); 569 570 while (--level >= minlevel) { 571 ra = NULL; 572 if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { 573 p.node = nilfs_btree_get_node(btree, path, level + 1, 574 &p.ncmax); 575 p.index = index; 576 p.max_ra_blocks = 7; 577 ra = &p; 578 } 579 ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, 580 ra); 581 if (ret < 0) 582 return ret; 583 584 node = nilfs_btree_get_nonroot_node(path, level); 585 if (nilfs_btree_bad_node(btree, node, level)) 586 return -EINVAL; 587 if (!found) 588 found = nilfs_btree_node_lookup(node, key, &index); 589 else 590 index = 0; 591 if (index < ncmax) { 592 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 593 } else { 594 WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); 595 /* insert */ 596 ptr = NILFS_BMAP_INVALID_PTR; 597 } 598 path[level].bp_index = index; 599 } 600 if (!found) 601 return -ENOENT; 602 603 if (ptrp != NULL) 604 *ptrp = ptr; 605 606 return 0; 607 } 608 609 static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, 610 struct nilfs_btree_path *path, 611 __u64 *keyp, __u64 *ptrp) 612 { 613 struct nilfs_btree_node *node; 614 __u64 ptr; 615 int index, level, ncmax, ret; 616 617 node = nilfs_btree_get_root(btree); 618 index = nilfs_btree_node_get_nchildren(node) - 1; 619 if (index < 0) 620 return -ENOENT; 621 level = nilfs_btree_node_get_level(node); 622 ptr = nilfs_btree_node_get_ptr(node, index, 623 NILFS_BTREE_ROOT_NCHILDREN_MAX); 624 path[level].bp_bh = NULL; 625 path[level].bp_index = index; 626 ncmax = nilfs_btree_nchildren_per_block(btree); 627 628 for (level--; level > 0; level--) { 629 ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); 630 if (ret < 0) 631 return ret; 632 node = nilfs_btree_get_nonroot_node(path, level); 633 if (nilfs_btree_bad_node(btree, node, level)) 634 return -EINVAL; 635 index = nilfs_btree_node_get_nchildren(node) - 1; 636 ptr = nilfs_btree_node_get_ptr(node, index, ncmax); 637 path[level].bp_index = index; 638 } 639 640 if (keyp != NULL) 641 *keyp = nilfs_btree_node_get_key(node, index); 642 if (ptrp != NULL) 643 *ptrp = ptr; 644 645 return 0; 646 } 647 648 /** 649 * nilfs_btree_get_next_key - get next valid key from btree path array 650 * @btree: bmap struct of btree 651 * @path: array of nilfs_btree_path struct 652 * @minlevel: start level 653 * @nextkey: place to store the next valid key 654 * 655 * Return Value: If a next key was found, 0 is returned. Otherwise, 656 * -ENOENT is returned. 657 */ 658 static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, 659 const struct nilfs_btree_path *path, 660 int minlevel, __u64 *nextkey) 661 { 662 struct nilfs_btree_node *node; 663 int maxlevel = nilfs_btree_height(btree) - 1; 664 int index, next_adj, level; 665 666 /* Next index is already set to bp_index for leaf nodes. */ 667 next_adj = 0; 668 for (level = minlevel; level <= maxlevel; level++) { 669 if (level == maxlevel) 670 node = nilfs_btree_get_root(btree); 671 else 672 node = nilfs_btree_get_nonroot_node(path, level); 673 674 index = path[level].bp_index + next_adj; 675 if (index < nilfs_btree_node_get_nchildren(node)) { 676 /* Next key is in this node */ 677 *nextkey = nilfs_btree_node_get_key(node, index); 678 return 0; 679 } 680 /* For non-leaf nodes, next index is stored at bp_index + 1. */ 681 next_adj = 1; 682 } 683 return -ENOENT; 684 } 685 686 static int nilfs_btree_lookup(const struct nilfs_bmap *btree, 687 __u64 key, int level, __u64 *ptrp) 688 { 689 struct nilfs_btree_path *path; 690 int ret; 691 692 path = nilfs_btree_alloc_path(); 693 if (path == NULL) 694 return -ENOMEM; 695 696 ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); 697 698 nilfs_btree_free_path(path); 699 700 return ret; 701 } 702 703 static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, 704 __u64 key, __u64 *ptrp, 705 unsigned int maxblocks) 706 { 707 struct nilfs_btree_path *path; 708 struct nilfs_btree_node *node; 709 struct inode *dat = NULL; 710 __u64 ptr, ptr2; 711 sector_t blocknr; 712 int level = NILFS_BTREE_LEVEL_NODE_MIN; 713 int ret, cnt, index, maxlevel, ncmax; 714 struct nilfs_btree_readahead_info p; 715 716 path = nilfs_btree_alloc_path(); 717 if (path == NULL) 718 return -ENOMEM; 719 720 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); 721 if (ret < 0) 722 goto out; 723 724 if (NILFS_BMAP_USE_VBN(btree)) { 725 dat = nilfs_bmap_get_dat(btree); 726 ret = nilfs_dat_translate(dat, ptr, &blocknr); 727 if (ret < 0) 728 goto dat_error; 729 ptr = blocknr; 730 } 731 cnt = 1; 732 if (cnt == maxblocks) 733 goto end; 734 735 maxlevel = nilfs_btree_height(btree) - 1; 736 node = nilfs_btree_get_node(btree, path, level, &ncmax); 737 index = path[level].bp_index + 1; 738 for (;;) { 739 while (index < nilfs_btree_node_get_nchildren(node)) { 740 if (nilfs_btree_node_get_key(node, index) != 741 key + cnt) 742 goto end; 743 ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); 744 if (dat) { 745 ret = nilfs_dat_translate(dat, ptr2, &blocknr); 746 if (ret < 0) 747 goto dat_error; 748 ptr2 = blocknr; 749 } 750 if (ptr2 != ptr + cnt || ++cnt == maxblocks) 751 goto end; 752 index++; 753 } 754 if (level == maxlevel) 755 break; 756 757 /* look-up right sibling node */ 758 p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); 759 p.index = path[level + 1].bp_index + 1; 760 p.max_ra_blocks = 7; 761 if (p.index >= nilfs_btree_node_get_nchildren(p.node) || 762 nilfs_btree_node_get_key(p.node, p.index) != key + cnt) 763 break; 764 ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); 765 path[level + 1].bp_index = p.index; 766 767 brelse(path[level].bp_bh); 768 path[level].bp_bh = NULL; 769 770 ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, 771 &p); 772 if (ret < 0) 773 goto out; 774 node = nilfs_btree_get_nonroot_node(path, level); 775 ncmax = nilfs_btree_nchildren_per_block(btree); 776 index = 0; 777 path[level].bp_index = index; 778 } 779 end: 780 *ptrp = ptr; 781 ret = cnt; 782 out: 783 nilfs_btree_free_path(path); 784 return ret; 785 786 dat_error: 787 if (ret == -ENOENT) 788 ret = -EINVAL; /* Notify bmap layer of metadata corruption */ 789 goto out; 790 } 791 792 static void nilfs_btree_promote_key(struct nilfs_bmap *btree, 793 struct nilfs_btree_path *path, 794 int level, __u64 key) 795 { 796 if (level < nilfs_btree_height(btree) - 1) { 797 do { 798 nilfs_btree_node_set_key( 799 nilfs_btree_get_nonroot_node(path, level), 800 path[level].bp_index, key); 801 if (!buffer_dirty(path[level].bp_bh)) 802 mark_buffer_dirty(path[level].bp_bh); 803 } while ((path[level].bp_index == 0) && 804 (++level < nilfs_btree_height(btree) - 1)); 805 } 806 807 /* root */ 808 if (level == nilfs_btree_height(btree) - 1) { 809 nilfs_btree_node_set_key(nilfs_btree_get_root(btree), 810 path[level].bp_index, key); 811 } 812 } 813 814 static void nilfs_btree_do_insert(struct nilfs_bmap *btree, 815 struct nilfs_btree_path *path, 816 int level, __u64 *keyp, __u64 *ptrp) 817 { 818 struct nilfs_btree_node *node; 819 int ncblk; 820 821 if (level < nilfs_btree_height(btree) - 1) { 822 node = nilfs_btree_get_nonroot_node(path, level); 823 ncblk = nilfs_btree_nchildren_per_block(btree); 824 nilfs_btree_node_insert(node, path[level].bp_index, 825 *keyp, *ptrp, ncblk); 826 if (!buffer_dirty(path[level].bp_bh)) 827 mark_buffer_dirty(path[level].bp_bh); 828 829 if (path[level].bp_index == 0) 830 nilfs_btree_promote_key(btree, path, level + 1, 831 nilfs_btree_node_get_key(node, 832 0)); 833 } else { 834 node = nilfs_btree_get_root(btree); 835 nilfs_btree_node_insert(node, path[level].bp_index, 836 *keyp, *ptrp, 837 NILFS_BTREE_ROOT_NCHILDREN_MAX); 838 } 839 } 840 841 static void nilfs_btree_carry_left(struct nilfs_bmap *btree, 842 struct nilfs_btree_path *path, 843 int level, __u64 *keyp, __u64 *ptrp) 844 { 845 struct nilfs_btree_node *node, *left; 846 int nchildren, lnchildren, n, move, ncblk; 847 848 node = nilfs_btree_get_nonroot_node(path, level); 849 left = nilfs_btree_get_sib_node(path, level); 850 nchildren = nilfs_btree_node_get_nchildren(node); 851 lnchildren = nilfs_btree_node_get_nchildren(left); 852 ncblk = nilfs_btree_nchildren_per_block(btree); 853 move = 0; 854 855 n = (nchildren + lnchildren + 1) / 2 - lnchildren; 856 if (n > path[level].bp_index) { 857 /* move insert point */ 858 n--; 859 move = 1; 860 } 861 862 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 863 864 if (!buffer_dirty(path[level].bp_bh)) 865 mark_buffer_dirty(path[level].bp_bh); 866 if (!buffer_dirty(path[level].bp_sib_bh)) 867 mark_buffer_dirty(path[level].bp_sib_bh); 868 869 nilfs_btree_promote_key(btree, path, level + 1, 870 nilfs_btree_node_get_key(node, 0)); 871 872 if (move) { 873 brelse(path[level].bp_bh); 874 path[level].bp_bh = path[level].bp_sib_bh; 875 path[level].bp_sib_bh = NULL; 876 path[level].bp_index += lnchildren; 877 path[level + 1].bp_index--; 878 } else { 879 brelse(path[level].bp_sib_bh); 880 path[level].bp_sib_bh = NULL; 881 path[level].bp_index -= n; 882 } 883 884 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 885 } 886 887 static void nilfs_btree_carry_right(struct nilfs_bmap *btree, 888 struct nilfs_btree_path *path, 889 int level, __u64 *keyp, __u64 *ptrp) 890 { 891 struct nilfs_btree_node *node, *right; 892 int nchildren, rnchildren, n, move, ncblk; 893 894 node = nilfs_btree_get_nonroot_node(path, level); 895 right = nilfs_btree_get_sib_node(path, level); 896 nchildren = nilfs_btree_node_get_nchildren(node); 897 rnchildren = nilfs_btree_node_get_nchildren(right); 898 ncblk = nilfs_btree_nchildren_per_block(btree); 899 move = 0; 900 901 n = (nchildren + rnchildren + 1) / 2 - rnchildren; 902 if (n > nchildren - path[level].bp_index) { 903 /* move insert point */ 904 n--; 905 move = 1; 906 } 907 908 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 909 910 if (!buffer_dirty(path[level].bp_bh)) 911 mark_buffer_dirty(path[level].bp_bh); 912 if (!buffer_dirty(path[level].bp_sib_bh)) 913 mark_buffer_dirty(path[level].bp_sib_bh); 914 915 path[level + 1].bp_index++; 916 nilfs_btree_promote_key(btree, path, level + 1, 917 nilfs_btree_node_get_key(right, 0)); 918 path[level + 1].bp_index--; 919 920 if (move) { 921 brelse(path[level].bp_bh); 922 path[level].bp_bh = path[level].bp_sib_bh; 923 path[level].bp_sib_bh = NULL; 924 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 925 path[level + 1].bp_index++; 926 } else { 927 brelse(path[level].bp_sib_bh); 928 path[level].bp_sib_bh = NULL; 929 } 930 931 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 932 } 933 934 static void nilfs_btree_split(struct nilfs_bmap *btree, 935 struct nilfs_btree_path *path, 936 int level, __u64 *keyp, __u64 *ptrp) 937 { 938 struct nilfs_btree_node *node, *right; 939 int nchildren, n, move, ncblk; 940 941 node = nilfs_btree_get_nonroot_node(path, level); 942 right = nilfs_btree_get_sib_node(path, level); 943 nchildren = nilfs_btree_node_get_nchildren(node); 944 ncblk = nilfs_btree_nchildren_per_block(btree); 945 move = 0; 946 947 n = (nchildren + 1) / 2; 948 if (n > nchildren - path[level].bp_index) { 949 n--; 950 move = 1; 951 } 952 953 nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); 954 955 if (!buffer_dirty(path[level].bp_bh)) 956 mark_buffer_dirty(path[level].bp_bh); 957 if (!buffer_dirty(path[level].bp_sib_bh)) 958 mark_buffer_dirty(path[level].bp_sib_bh); 959 960 if (move) { 961 path[level].bp_index -= nilfs_btree_node_get_nchildren(node); 962 nilfs_btree_node_insert(right, path[level].bp_index, 963 *keyp, *ptrp, ncblk); 964 965 *keyp = nilfs_btree_node_get_key(right, 0); 966 *ptrp = path[level].bp_newreq.bpr_ptr; 967 968 brelse(path[level].bp_bh); 969 path[level].bp_bh = path[level].bp_sib_bh; 970 path[level].bp_sib_bh = NULL; 971 } else { 972 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 973 974 *keyp = nilfs_btree_node_get_key(right, 0); 975 *ptrp = path[level].bp_newreq.bpr_ptr; 976 977 brelse(path[level].bp_sib_bh); 978 path[level].bp_sib_bh = NULL; 979 } 980 981 path[level + 1].bp_index++; 982 } 983 984 static void nilfs_btree_grow(struct nilfs_bmap *btree, 985 struct nilfs_btree_path *path, 986 int level, __u64 *keyp, __u64 *ptrp) 987 { 988 struct nilfs_btree_node *root, *child; 989 int n, ncblk; 990 991 root = nilfs_btree_get_root(btree); 992 child = nilfs_btree_get_sib_node(path, level); 993 ncblk = nilfs_btree_nchildren_per_block(btree); 994 995 n = nilfs_btree_node_get_nchildren(root); 996 997 nilfs_btree_node_move_right(root, child, n, 998 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 999 nilfs_btree_node_set_level(root, level + 1); 1000 1001 if (!buffer_dirty(path[level].bp_sib_bh)) 1002 mark_buffer_dirty(path[level].bp_sib_bh); 1003 1004 path[level].bp_bh = path[level].bp_sib_bh; 1005 path[level].bp_sib_bh = NULL; 1006 1007 nilfs_btree_do_insert(btree, path, level, keyp, ptrp); 1008 1009 *keyp = nilfs_btree_node_get_key(child, 0); 1010 *ptrp = path[level].bp_newreq.bpr_ptr; 1011 } 1012 1013 static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, 1014 const struct nilfs_btree_path *path) 1015 { 1016 struct nilfs_btree_node *node; 1017 int level, ncmax; 1018 1019 if (path == NULL) 1020 return NILFS_BMAP_INVALID_PTR; 1021 1022 /* left sibling */ 1023 level = NILFS_BTREE_LEVEL_NODE_MIN; 1024 if (path[level].bp_index > 0) { 1025 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1026 return nilfs_btree_node_get_ptr(node, 1027 path[level].bp_index - 1, 1028 ncmax); 1029 } 1030 1031 /* parent */ 1032 level = NILFS_BTREE_LEVEL_NODE_MIN + 1; 1033 if (level <= nilfs_btree_height(btree) - 1) { 1034 node = nilfs_btree_get_node(btree, path, level, &ncmax); 1035 return nilfs_btree_node_get_ptr(node, path[level].bp_index, 1036 ncmax); 1037 } 1038 1039 return NILFS_BMAP_INVALID_PTR; 1040 } 1041 1042 static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, 1043 const struct nilfs_btree_path *path, 1044 __u64 key) 1045 { 1046 __u64 ptr; 1047 1048 ptr = nilfs_bmap_find_target_seq(btree, key); 1049 if (ptr != NILFS_BMAP_INVALID_PTR) 1050 /* sequential access */ 1051 return ptr; 1052 1053 ptr = nilfs_btree_find_near(btree, path); 1054 if (ptr != NILFS_BMAP_INVALID_PTR) 1055 /* near */ 1056 return ptr; 1057 1058 /* block group */ 1059 return nilfs_bmap_find_target_in_group(btree); 1060 } 1061 1062 static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, 1063 struct nilfs_btree_path *path, 1064 int *levelp, __u64 key, __u64 ptr, 1065 struct nilfs_bmap_stats *stats) 1066 { 1067 struct buffer_head *bh; 1068 struct nilfs_btree_node *node, *parent, *sib; 1069 __u64 sibptr; 1070 int pindex, level, ncmax, ncblk, ret; 1071 struct inode *dat = NULL; 1072 1073 stats->bs_nblocks = 0; 1074 level = NILFS_BTREE_LEVEL_DATA; 1075 1076 /* allocate a new ptr for data block */ 1077 if (NILFS_BMAP_USE_VBN(btree)) { 1078 path[level].bp_newreq.bpr_ptr = 1079 nilfs_btree_find_target_v(btree, path, key); 1080 dat = nilfs_bmap_get_dat(btree); 1081 } 1082 1083 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1084 if (ret < 0) 1085 goto err_out_data; 1086 1087 ncblk = nilfs_btree_nchildren_per_block(btree); 1088 1089 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 1090 level < nilfs_btree_height(btree) - 1; 1091 level++) { 1092 node = nilfs_btree_get_nonroot_node(path, level); 1093 if (nilfs_btree_node_get_nchildren(node) < ncblk) { 1094 path[level].bp_op = nilfs_btree_do_insert; 1095 stats->bs_nblocks++; 1096 goto out; 1097 } 1098 1099 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1100 pindex = path[level + 1].bp_index; 1101 1102 /* left sibling */ 1103 if (pindex > 0) { 1104 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1105 ncmax); 1106 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1107 if (ret < 0) 1108 goto err_out_child_node; 1109 sib = (struct nilfs_btree_node *)bh->b_data; 1110 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1111 path[level].bp_sib_bh = bh; 1112 path[level].bp_op = nilfs_btree_carry_left; 1113 stats->bs_nblocks++; 1114 goto out; 1115 } else { 1116 brelse(bh); 1117 } 1118 } 1119 1120 /* right sibling */ 1121 if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { 1122 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1123 ncmax); 1124 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1125 if (ret < 0) 1126 goto err_out_child_node; 1127 sib = (struct nilfs_btree_node *)bh->b_data; 1128 if (nilfs_btree_node_get_nchildren(sib) < ncblk) { 1129 path[level].bp_sib_bh = bh; 1130 path[level].bp_op = nilfs_btree_carry_right; 1131 stats->bs_nblocks++; 1132 goto out; 1133 } else { 1134 brelse(bh); 1135 } 1136 } 1137 1138 /* split */ 1139 path[level].bp_newreq.bpr_ptr = 1140 path[level - 1].bp_newreq.bpr_ptr + 1; 1141 ret = nilfs_bmap_prepare_alloc_ptr(btree, 1142 &path[level].bp_newreq, dat); 1143 if (ret < 0) 1144 goto err_out_child_node; 1145 ret = nilfs_btree_get_new_block(btree, 1146 path[level].bp_newreq.bpr_ptr, 1147 &bh); 1148 if (ret < 0) 1149 goto err_out_curr_node; 1150 1151 stats->bs_nblocks++; 1152 1153 sib = (struct nilfs_btree_node *)bh->b_data; 1154 nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); 1155 path[level].bp_sib_bh = bh; 1156 path[level].bp_op = nilfs_btree_split; 1157 } 1158 1159 /* root */ 1160 node = nilfs_btree_get_root(btree); 1161 if (nilfs_btree_node_get_nchildren(node) < 1162 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1163 path[level].bp_op = nilfs_btree_do_insert; 1164 stats->bs_nblocks++; 1165 goto out; 1166 } 1167 1168 /* grow */ 1169 path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; 1170 ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); 1171 if (ret < 0) 1172 goto err_out_child_node; 1173 ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, 1174 &bh); 1175 if (ret < 0) 1176 goto err_out_curr_node; 1177 1178 nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 1179 0, level, 0, ncblk, NULL, NULL); 1180 path[level].bp_sib_bh = bh; 1181 path[level].bp_op = nilfs_btree_grow; 1182 1183 level++; 1184 path[level].bp_op = nilfs_btree_do_insert; 1185 1186 /* a newly-created node block and a data block are added */ 1187 stats->bs_nblocks += 2; 1188 1189 /* success */ 1190 out: 1191 *levelp = level; 1192 return ret; 1193 1194 /* error */ 1195 err_out_curr_node: 1196 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1197 err_out_child_node: 1198 for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { 1199 nilfs_btnode_delete(path[level].bp_sib_bh); 1200 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1201 1202 } 1203 1204 nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); 1205 err_out_data: 1206 *levelp = level; 1207 stats->bs_nblocks = 0; 1208 return ret; 1209 } 1210 1211 static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, 1212 struct nilfs_btree_path *path, 1213 int maxlevel, __u64 key, __u64 ptr) 1214 { 1215 struct inode *dat = NULL; 1216 int level; 1217 1218 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1219 ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; 1220 if (NILFS_BMAP_USE_VBN(btree)) { 1221 nilfs_bmap_set_target_v(btree, key, ptr); 1222 dat = nilfs_bmap_get_dat(btree); 1223 } 1224 1225 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1226 nilfs_bmap_commit_alloc_ptr(btree, 1227 &path[level - 1].bp_newreq, dat); 1228 path[level].bp_op(btree, path, level, &key, &ptr); 1229 } 1230 1231 if (!nilfs_bmap_dirty(btree)) 1232 nilfs_bmap_set_dirty(btree); 1233 } 1234 1235 static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) 1236 { 1237 struct nilfs_btree_path *path; 1238 struct nilfs_bmap_stats stats; 1239 int level, ret; 1240 1241 path = nilfs_btree_alloc_path(); 1242 if (path == NULL) 1243 return -ENOMEM; 1244 1245 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1246 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1247 if (ret != -ENOENT) { 1248 if (ret == 0) 1249 ret = -EEXIST; 1250 goto out; 1251 } 1252 1253 ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); 1254 if (ret < 0) 1255 goto out; 1256 nilfs_btree_commit_insert(btree, path, level, key, ptr); 1257 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1258 1259 out: 1260 nilfs_btree_free_path(path); 1261 return ret; 1262 } 1263 1264 static void nilfs_btree_do_delete(struct nilfs_bmap *btree, 1265 struct nilfs_btree_path *path, 1266 int level, __u64 *keyp, __u64 *ptrp) 1267 { 1268 struct nilfs_btree_node *node; 1269 int ncblk; 1270 1271 if (level < nilfs_btree_height(btree) - 1) { 1272 node = nilfs_btree_get_nonroot_node(path, level); 1273 ncblk = nilfs_btree_nchildren_per_block(btree); 1274 nilfs_btree_node_delete(node, path[level].bp_index, 1275 keyp, ptrp, ncblk); 1276 if (!buffer_dirty(path[level].bp_bh)) 1277 mark_buffer_dirty(path[level].bp_bh); 1278 if (path[level].bp_index == 0) 1279 nilfs_btree_promote_key(btree, path, level + 1, 1280 nilfs_btree_node_get_key(node, 0)); 1281 } else { 1282 node = nilfs_btree_get_root(btree); 1283 nilfs_btree_node_delete(node, path[level].bp_index, 1284 keyp, ptrp, 1285 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1286 } 1287 } 1288 1289 static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, 1290 struct nilfs_btree_path *path, 1291 int level, __u64 *keyp, __u64 *ptrp) 1292 { 1293 struct nilfs_btree_node *node, *left; 1294 int nchildren, lnchildren, n, ncblk; 1295 1296 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1297 1298 node = nilfs_btree_get_nonroot_node(path, level); 1299 left = nilfs_btree_get_sib_node(path, level); 1300 nchildren = nilfs_btree_node_get_nchildren(node); 1301 lnchildren = nilfs_btree_node_get_nchildren(left); 1302 ncblk = nilfs_btree_nchildren_per_block(btree); 1303 1304 n = (nchildren + lnchildren) / 2 - nchildren; 1305 1306 nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); 1307 1308 if (!buffer_dirty(path[level].bp_bh)) 1309 mark_buffer_dirty(path[level].bp_bh); 1310 if (!buffer_dirty(path[level].bp_sib_bh)) 1311 mark_buffer_dirty(path[level].bp_sib_bh); 1312 1313 nilfs_btree_promote_key(btree, path, level + 1, 1314 nilfs_btree_node_get_key(node, 0)); 1315 1316 brelse(path[level].bp_sib_bh); 1317 path[level].bp_sib_bh = NULL; 1318 path[level].bp_index += n; 1319 } 1320 1321 static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, 1322 struct nilfs_btree_path *path, 1323 int level, __u64 *keyp, __u64 *ptrp) 1324 { 1325 struct nilfs_btree_node *node, *right; 1326 int nchildren, rnchildren, n, ncblk; 1327 1328 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1329 1330 node = nilfs_btree_get_nonroot_node(path, level); 1331 right = nilfs_btree_get_sib_node(path, level); 1332 nchildren = nilfs_btree_node_get_nchildren(node); 1333 rnchildren = nilfs_btree_node_get_nchildren(right); 1334 ncblk = nilfs_btree_nchildren_per_block(btree); 1335 1336 n = (nchildren + rnchildren) / 2 - nchildren; 1337 1338 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1339 1340 if (!buffer_dirty(path[level].bp_bh)) 1341 mark_buffer_dirty(path[level].bp_bh); 1342 if (!buffer_dirty(path[level].bp_sib_bh)) 1343 mark_buffer_dirty(path[level].bp_sib_bh); 1344 1345 path[level + 1].bp_index++; 1346 nilfs_btree_promote_key(btree, path, level + 1, 1347 nilfs_btree_node_get_key(right, 0)); 1348 path[level + 1].bp_index--; 1349 1350 brelse(path[level].bp_sib_bh); 1351 path[level].bp_sib_bh = NULL; 1352 } 1353 1354 static void nilfs_btree_concat_left(struct nilfs_bmap *btree, 1355 struct nilfs_btree_path *path, 1356 int level, __u64 *keyp, __u64 *ptrp) 1357 { 1358 struct nilfs_btree_node *node, *left; 1359 int n, ncblk; 1360 1361 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1362 1363 node = nilfs_btree_get_nonroot_node(path, level); 1364 left = nilfs_btree_get_sib_node(path, level); 1365 ncblk = nilfs_btree_nchildren_per_block(btree); 1366 1367 n = nilfs_btree_node_get_nchildren(node); 1368 1369 nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); 1370 1371 if (!buffer_dirty(path[level].bp_sib_bh)) 1372 mark_buffer_dirty(path[level].bp_sib_bh); 1373 1374 nilfs_btnode_delete(path[level].bp_bh); 1375 path[level].bp_bh = path[level].bp_sib_bh; 1376 path[level].bp_sib_bh = NULL; 1377 path[level].bp_index += nilfs_btree_node_get_nchildren(left); 1378 } 1379 1380 static void nilfs_btree_concat_right(struct nilfs_bmap *btree, 1381 struct nilfs_btree_path *path, 1382 int level, __u64 *keyp, __u64 *ptrp) 1383 { 1384 struct nilfs_btree_node *node, *right; 1385 int n, ncblk; 1386 1387 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1388 1389 node = nilfs_btree_get_nonroot_node(path, level); 1390 right = nilfs_btree_get_sib_node(path, level); 1391 ncblk = nilfs_btree_nchildren_per_block(btree); 1392 1393 n = nilfs_btree_node_get_nchildren(right); 1394 1395 nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); 1396 1397 if (!buffer_dirty(path[level].bp_bh)) 1398 mark_buffer_dirty(path[level].bp_bh); 1399 1400 nilfs_btnode_delete(path[level].bp_sib_bh); 1401 path[level].bp_sib_bh = NULL; 1402 path[level + 1].bp_index++; 1403 } 1404 1405 static void nilfs_btree_shrink(struct nilfs_bmap *btree, 1406 struct nilfs_btree_path *path, 1407 int level, __u64 *keyp, __u64 *ptrp) 1408 { 1409 struct nilfs_btree_node *root, *child; 1410 int n, ncblk; 1411 1412 nilfs_btree_do_delete(btree, path, level, keyp, ptrp); 1413 1414 root = nilfs_btree_get_root(btree); 1415 child = nilfs_btree_get_nonroot_node(path, level); 1416 ncblk = nilfs_btree_nchildren_per_block(btree); 1417 1418 nilfs_btree_node_delete(root, 0, NULL, NULL, 1419 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1420 nilfs_btree_node_set_level(root, level); 1421 n = nilfs_btree_node_get_nchildren(child); 1422 nilfs_btree_node_move_left(root, child, n, 1423 NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); 1424 1425 nilfs_btnode_delete(path[level].bp_bh); 1426 path[level].bp_bh = NULL; 1427 } 1428 1429 static void nilfs_btree_nop(struct nilfs_bmap *btree, 1430 struct nilfs_btree_path *path, 1431 int level, __u64 *keyp, __u64 *ptrp) 1432 { 1433 } 1434 1435 static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, 1436 struct nilfs_btree_path *path, 1437 int *levelp, 1438 struct nilfs_bmap_stats *stats, 1439 struct inode *dat) 1440 { 1441 struct buffer_head *bh; 1442 struct nilfs_btree_node *node, *parent, *sib; 1443 __u64 sibptr; 1444 int pindex, dindex, level, ncmin, ncmax, ncblk, ret; 1445 1446 ret = 0; 1447 stats->bs_nblocks = 0; 1448 ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); 1449 ncblk = nilfs_btree_nchildren_per_block(btree); 1450 1451 for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; 1452 level < nilfs_btree_height(btree) - 1; 1453 level++) { 1454 node = nilfs_btree_get_nonroot_node(path, level); 1455 path[level].bp_oldreq.bpr_ptr = 1456 nilfs_btree_node_get_ptr(node, dindex, ncblk); 1457 ret = nilfs_bmap_prepare_end_ptr(btree, 1458 &path[level].bp_oldreq, dat); 1459 if (ret < 0) 1460 goto err_out_child_node; 1461 1462 if (nilfs_btree_node_get_nchildren(node) > ncmin) { 1463 path[level].bp_op = nilfs_btree_do_delete; 1464 stats->bs_nblocks++; 1465 goto out; 1466 } 1467 1468 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1469 pindex = path[level + 1].bp_index; 1470 dindex = pindex; 1471 1472 if (pindex > 0) { 1473 /* left sibling */ 1474 sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, 1475 ncmax); 1476 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1477 if (ret < 0) 1478 goto err_out_curr_node; 1479 sib = (struct nilfs_btree_node *)bh->b_data; 1480 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1481 path[level].bp_sib_bh = bh; 1482 path[level].bp_op = nilfs_btree_borrow_left; 1483 stats->bs_nblocks++; 1484 goto out; 1485 } else { 1486 path[level].bp_sib_bh = bh; 1487 path[level].bp_op = nilfs_btree_concat_left; 1488 stats->bs_nblocks++; 1489 /* continue; */ 1490 } 1491 } else if (pindex < 1492 nilfs_btree_node_get_nchildren(parent) - 1) { 1493 /* right sibling */ 1494 sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, 1495 ncmax); 1496 ret = nilfs_btree_get_block(btree, sibptr, &bh); 1497 if (ret < 0) 1498 goto err_out_curr_node; 1499 sib = (struct nilfs_btree_node *)bh->b_data; 1500 if (nilfs_btree_node_get_nchildren(sib) > ncmin) { 1501 path[level].bp_sib_bh = bh; 1502 path[level].bp_op = nilfs_btree_borrow_right; 1503 stats->bs_nblocks++; 1504 goto out; 1505 } else { 1506 path[level].bp_sib_bh = bh; 1507 path[level].bp_op = nilfs_btree_concat_right; 1508 stats->bs_nblocks++; 1509 /* 1510 * When merging right sibling node 1511 * into the current node, pointer to 1512 * the right sibling node must be 1513 * terminated instead. The adjustment 1514 * below is required for that. 1515 */ 1516 dindex = pindex + 1; 1517 /* continue; */ 1518 } 1519 } else { 1520 /* no siblings */ 1521 /* the only child of the root node */ 1522 WARN_ON(level != nilfs_btree_height(btree) - 2); 1523 if (nilfs_btree_node_get_nchildren(node) - 1 <= 1524 NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1525 path[level].bp_op = nilfs_btree_shrink; 1526 stats->bs_nblocks += 2; 1527 level++; 1528 path[level].bp_op = nilfs_btree_nop; 1529 goto shrink_root_child; 1530 } else { 1531 path[level].bp_op = nilfs_btree_do_delete; 1532 stats->bs_nblocks++; 1533 goto out; 1534 } 1535 } 1536 } 1537 1538 /* child of the root node is deleted */ 1539 path[level].bp_op = nilfs_btree_do_delete; 1540 stats->bs_nblocks++; 1541 1542 shrink_root_child: 1543 node = nilfs_btree_get_root(btree); 1544 path[level].bp_oldreq.bpr_ptr = 1545 nilfs_btree_node_get_ptr(node, dindex, 1546 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1547 1548 ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); 1549 if (ret < 0) 1550 goto err_out_child_node; 1551 1552 /* success */ 1553 out: 1554 *levelp = level; 1555 return ret; 1556 1557 /* error */ 1558 err_out_curr_node: 1559 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1560 err_out_child_node: 1561 for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { 1562 brelse(path[level].bp_sib_bh); 1563 nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); 1564 } 1565 *levelp = level; 1566 stats->bs_nblocks = 0; 1567 return ret; 1568 } 1569 1570 static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, 1571 struct nilfs_btree_path *path, 1572 int maxlevel, struct inode *dat) 1573 { 1574 int level; 1575 1576 for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { 1577 nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); 1578 path[level].bp_op(btree, path, level, NULL, NULL); 1579 } 1580 1581 if (!nilfs_bmap_dirty(btree)) 1582 nilfs_bmap_set_dirty(btree); 1583 } 1584 1585 static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) 1586 1587 { 1588 struct nilfs_btree_path *path; 1589 struct nilfs_bmap_stats stats; 1590 struct inode *dat; 1591 int level, ret; 1592 1593 path = nilfs_btree_alloc_path(); 1594 if (path == NULL) 1595 return -ENOMEM; 1596 1597 ret = nilfs_btree_do_lookup(btree, path, key, NULL, 1598 NILFS_BTREE_LEVEL_NODE_MIN, 0); 1599 if (ret < 0) 1600 goto out; 1601 1602 1603 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1604 1605 ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); 1606 if (ret < 0) 1607 goto out; 1608 nilfs_btree_commit_delete(btree, path, level, dat); 1609 nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); 1610 1611 out: 1612 nilfs_btree_free_path(path); 1613 return ret; 1614 } 1615 1616 static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, 1617 __u64 *keyp) 1618 { 1619 struct nilfs_btree_path *path; 1620 const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; 1621 int ret; 1622 1623 path = nilfs_btree_alloc_path(); 1624 if (!path) 1625 return -ENOMEM; 1626 1627 ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); 1628 if (!ret) 1629 *keyp = start; 1630 else if (ret == -ENOENT) 1631 ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); 1632 1633 nilfs_btree_free_path(path); 1634 return ret; 1635 } 1636 1637 static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) 1638 { 1639 struct nilfs_btree_path *path; 1640 int ret; 1641 1642 path = nilfs_btree_alloc_path(); 1643 if (path == NULL) 1644 return -ENOMEM; 1645 1646 ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); 1647 1648 nilfs_btree_free_path(path); 1649 1650 return ret; 1651 } 1652 1653 static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) 1654 { 1655 struct buffer_head *bh; 1656 struct nilfs_btree_node *root, *node; 1657 __u64 maxkey, nextmaxkey; 1658 __u64 ptr; 1659 int nchildren, ret; 1660 1661 root = nilfs_btree_get_root(btree); 1662 nchildren = nilfs_btree_node_get_nchildren(root); 1663 if (unlikely(nchildren == 0)) 1664 return 0; 1665 1666 switch (nilfs_btree_height(btree)) { 1667 case 2: 1668 bh = NULL; 1669 node = root; 1670 break; 1671 case 3: 1672 if (nchildren > 1) 1673 return 0; 1674 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1675 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1676 ret = nilfs_btree_get_block(btree, ptr, &bh); 1677 if (ret < 0) 1678 return ret; 1679 node = (struct nilfs_btree_node *)bh->b_data; 1680 nchildren = nilfs_btree_node_get_nchildren(node); 1681 break; 1682 default: 1683 return 0; 1684 } 1685 1686 maxkey = nilfs_btree_node_get_key(node, nchildren - 1); 1687 nextmaxkey = (nchildren > 1) ? 1688 nilfs_btree_node_get_key(node, nchildren - 2) : 0; 1689 brelse(bh); 1690 1691 return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); 1692 } 1693 1694 static int nilfs_btree_gather_data(struct nilfs_bmap *btree, 1695 __u64 *keys, __u64 *ptrs, int nitems) 1696 { 1697 struct buffer_head *bh; 1698 struct nilfs_btree_node *node, *root; 1699 __le64 *dkeys; 1700 __le64 *dptrs; 1701 __u64 ptr; 1702 int nchildren, ncmax, i, ret; 1703 1704 root = nilfs_btree_get_root(btree); 1705 switch (nilfs_btree_height(btree)) { 1706 case 2: 1707 bh = NULL; 1708 node = root; 1709 ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; 1710 break; 1711 case 3: 1712 nchildren = nilfs_btree_node_get_nchildren(root); 1713 WARN_ON(nchildren > 1); 1714 ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, 1715 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1716 ret = nilfs_btree_get_block(btree, ptr, &bh); 1717 if (ret < 0) 1718 return ret; 1719 node = (struct nilfs_btree_node *)bh->b_data; 1720 ncmax = nilfs_btree_nchildren_per_block(btree); 1721 break; 1722 default: 1723 node = NULL; 1724 return -EINVAL; 1725 } 1726 1727 nchildren = nilfs_btree_node_get_nchildren(node); 1728 if (nchildren < nitems) 1729 nitems = nchildren; 1730 dkeys = nilfs_btree_node_dkeys(node); 1731 dptrs = nilfs_btree_node_dptrs(node, ncmax); 1732 for (i = 0; i < nitems; i++) { 1733 keys[i] = le64_to_cpu(dkeys[i]); 1734 ptrs[i] = le64_to_cpu(dptrs[i]); 1735 } 1736 1737 brelse(bh); 1738 1739 return nitems; 1740 } 1741 1742 static int 1743 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, 1744 union nilfs_bmap_ptr_req *dreq, 1745 union nilfs_bmap_ptr_req *nreq, 1746 struct buffer_head **bhp, 1747 struct nilfs_bmap_stats *stats) 1748 { 1749 struct buffer_head *bh; 1750 struct inode *dat = NULL; 1751 int ret; 1752 1753 stats->bs_nblocks = 0; 1754 1755 /* for data */ 1756 /* cannot find near ptr */ 1757 if (NILFS_BMAP_USE_VBN(btree)) { 1758 dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); 1759 dat = nilfs_bmap_get_dat(btree); 1760 } 1761 1762 ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); 1763 if (ret < 0) 1764 return ret; 1765 1766 ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); 1767 if (ret < 0) 1768 return ret; 1769 1770 *bhp = NULL; 1771 stats->bs_nblocks++; 1772 if (nreq != NULL) { 1773 nreq->bpr_ptr = dreq->bpr_ptr + 1; 1774 ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); 1775 if (ret < 0) 1776 goto err_out_dreq; 1777 1778 ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); 1779 if (ret < 0) 1780 goto err_out_nreq; 1781 1782 *bhp = bh; 1783 stats->bs_nblocks++; 1784 } 1785 1786 /* success */ 1787 return 0; 1788 1789 /* error */ 1790 err_out_nreq: 1791 nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); 1792 err_out_dreq: 1793 nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); 1794 stats->bs_nblocks = 0; 1795 return ret; 1796 1797 } 1798 1799 static void 1800 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, 1801 __u64 key, __u64 ptr, 1802 const __u64 *keys, const __u64 *ptrs, 1803 int n, 1804 union nilfs_bmap_ptr_req *dreq, 1805 union nilfs_bmap_ptr_req *nreq, 1806 struct buffer_head *bh) 1807 { 1808 struct nilfs_btree_node *node; 1809 struct inode *dat; 1810 __u64 tmpptr; 1811 int ncblk; 1812 1813 /* free resources */ 1814 if (btree->b_ops->bop_clear != NULL) 1815 btree->b_ops->bop_clear(btree); 1816 1817 /* ptr must be a pointer to a buffer head. */ 1818 set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); 1819 1820 /* convert and insert */ 1821 dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; 1822 __nilfs_btree_init(btree); 1823 if (nreq != NULL) { 1824 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1825 nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); 1826 1827 /* create child node at level 1 */ 1828 node = (struct nilfs_btree_node *)bh->b_data; 1829 ncblk = nilfs_btree_nchildren_per_block(btree); 1830 nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); 1831 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); 1832 if (!buffer_dirty(bh)) 1833 mark_buffer_dirty(bh); 1834 if (!nilfs_bmap_dirty(btree)) 1835 nilfs_bmap_set_dirty(btree); 1836 1837 brelse(bh); 1838 1839 /* create root node at level 2 */ 1840 node = nilfs_btree_get_root(btree); 1841 tmpptr = nreq->bpr_ptr; 1842 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, 1843 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1844 &keys[0], &tmpptr); 1845 } else { 1846 nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); 1847 1848 /* create root node at level 1 */ 1849 node = nilfs_btree_get_root(btree); 1850 nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, 1851 NILFS_BTREE_ROOT_NCHILDREN_MAX, 1852 keys, ptrs); 1853 nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, 1854 NILFS_BTREE_ROOT_NCHILDREN_MAX); 1855 if (!nilfs_bmap_dirty(btree)) 1856 nilfs_bmap_set_dirty(btree); 1857 } 1858 1859 if (NILFS_BMAP_USE_VBN(btree)) 1860 nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); 1861 } 1862 1863 /** 1864 * nilfs_btree_convert_and_insert - 1865 * @bmap: 1866 * @key: 1867 * @ptr: 1868 * @keys: 1869 * @ptrs: 1870 * @n: 1871 */ 1872 int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, 1873 __u64 key, __u64 ptr, 1874 const __u64 *keys, const __u64 *ptrs, int n) 1875 { 1876 struct buffer_head *bh = NULL; 1877 union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; 1878 struct nilfs_bmap_stats stats; 1879 int ret; 1880 1881 if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { 1882 di = &dreq; 1883 ni = NULL; 1884 } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( 1885 nilfs_btree_node_size(btree))) { 1886 di = &dreq; 1887 ni = &nreq; 1888 } else { 1889 di = NULL; 1890 ni = NULL; 1891 BUG(); 1892 } 1893 1894 ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, 1895 &stats); 1896 if (ret < 0) 1897 return ret; 1898 nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, 1899 di, ni, bh); 1900 nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); 1901 return 0; 1902 } 1903 1904 static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, 1905 struct nilfs_btree_path *path, 1906 int level, 1907 struct buffer_head *bh) 1908 { 1909 while ((++level < nilfs_btree_height(btree) - 1) && 1910 !buffer_dirty(path[level].bp_bh)) 1911 mark_buffer_dirty(path[level].bp_bh); 1912 1913 return 0; 1914 } 1915 1916 static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, 1917 struct nilfs_btree_path *path, 1918 int level, struct inode *dat) 1919 { 1920 struct nilfs_btree_node *parent; 1921 int ncmax, ret; 1922 1923 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1924 path[level].bp_oldreq.bpr_ptr = 1925 nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 1926 ncmax); 1927 path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; 1928 ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, 1929 &path[level].bp_newreq.bpr_req); 1930 if (ret < 0) 1931 return ret; 1932 1933 if (buffer_nilfs_node(path[level].bp_bh)) { 1934 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; 1935 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; 1936 path[level].bp_ctxt.bh = path[level].bp_bh; 1937 ret = nilfs_btnode_prepare_change_key( 1938 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1939 &path[level].bp_ctxt); 1940 if (ret < 0) { 1941 nilfs_dat_abort_update(dat, 1942 &path[level].bp_oldreq.bpr_req, 1943 &path[level].bp_newreq.bpr_req); 1944 return ret; 1945 } 1946 } 1947 1948 return 0; 1949 } 1950 1951 static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, 1952 struct nilfs_btree_path *path, 1953 int level, struct inode *dat) 1954 { 1955 struct nilfs_btree_node *parent; 1956 int ncmax; 1957 1958 nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, 1959 &path[level].bp_newreq.bpr_req, 1960 btree->b_ptr_type == NILFS_BMAP_PTR_VS); 1961 1962 if (buffer_nilfs_node(path[level].bp_bh)) { 1963 nilfs_btnode_commit_change_key( 1964 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1965 &path[level].bp_ctxt); 1966 path[level].bp_bh = path[level].bp_ctxt.bh; 1967 } 1968 set_buffer_nilfs_volatile(path[level].bp_bh); 1969 1970 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 1971 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, 1972 path[level].bp_newreq.bpr_ptr, ncmax); 1973 } 1974 1975 static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, 1976 struct nilfs_btree_path *path, 1977 int level, struct inode *dat) 1978 { 1979 nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, 1980 &path[level].bp_newreq.bpr_req); 1981 if (buffer_nilfs_node(path[level].bp_bh)) 1982 nilfs_btnode_abort_change_key( 1983 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 1984 &path[level].bp_ctxt); 1985 } 1986 1987 static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, 1988 struct nilfs_btree_path *path, 1989 int minlevel, int *maxlevelp, 1990 struct inode *dat) 1991 { 1992 int level, ret; 1993 1994 level = minlevel; 1995 if (!buffer_nilfs_volatile(path[level].bp_bh)) { 1996 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 1997 if (ret < 0) 1998 return ret; 1999 } 2000 while ((++level < nilfs_btree_height(btree) - 1) && 2001 !buffer_dirty(path[level].bp_bh)) { 2002 2003 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); 2004 ret = nilfs_btree_prepare_update_v(btree, path, level, dat); 2005 if (ret < 0) 2006 goto out; 2007 } 2008 2009 /* success */ 2010 *maxlevelp = level - 1; 2011 return 0; 2012 2013 /* error */ 2014 out: 2015 while (--level > minlevel) 2016 nilfs_btree_abort_update_v(btree, path, level, dat); 2017 if (!buffer_nilfs_volatile(path[level].bp_bh)) 2018 nilfs_btree_abort_update_v(btree, path, level, dat); 2019 return ret; 2020 } 2021 2022 static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, 2023 struct nilfs_btree_path *path, 2024 int minlevel, int maxlevel, 2025 struct buffer_head *bh, 2026 struct inode *dat) 2027 { 2028 int level; 2029 2030 if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) 2031 nilfs_btree_commit_update_v(btree, path, minlevel, dat); 2032 2033 for (level = minlevel + 1; level <= maxlevel; level++) 2034 nilfs_btree_commit_update_v(btree, path, level, dat); 2035 } 2036 2037 static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, 2038 struct nilfs_btree_path *path, 2039 int level, struct buffer_head *bh) 2040 { 2041 int maxlevel = 0, ret; 2042 struct nilfs_btree_node *parent; 2043 struct inode *dat = nilfs_bmap_get_dat(btree); 2044 __u64 ptr; 2045 int ncmax; 2046 2047 get_bh(bh); 2048 path[level].bp_bh = bh; 2049 ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, 2050 dat); 2051 if (ret < 0) 2052 goto out; 2053 2054 if (buffer_nilfs_volatile(path[level].bp_bh)) { 2055 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2056 ptr = nilfs_btree_node_get_ptr(parent, 2057 path[level + 1].bp_index, 2058 ncmax); 2059 ret = nilfs_dat_mark_dirty(dat, ptr); 2060 if (ret < 0) 2061 goto out; 2062 } 2063 2064 nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); 2065 2066 out: 2067 brelse(path[level].bp_bh); 2068 path[level].bp_bh = NULL; 2069 return ret; 2070 } 2071 2072 static int nilfs_btree_propagate(struct nilfs_bmap *btree, 2073 struct buffer_head *bh) 2074 { 2075 struct nilfs_btree_path *path; 2076 struct nilfs_btree_node *node; 2077 __u64 key; 2078 int level, ret; 2079 2080 WARN_ON(!buffer_dirty(bh)); 2081 2082 path = nilfs_btree_alloc_path(); 2083 if (path == NULL) 2084 return -ENOMEM; 2085 2086 if (buffer_nilfs_node(bh)) { 2087 node = (struct nilfs_btree_node *)bh->b_data; 2088 key = nilfs_btree_node_get_key(node, 0); 2089 level = nilfs_btree_node_get_level(node); 2090 } else { 2091 key = nilfs_bmap_data_get_key(btree, bh); 2092 level = NILFS_BTREE_LEVEL_DATA; 2093 } 2094 2095 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2096 if (ret < 0) { 2097 if (unlikely(ret == -ENOENT)) 2098 nilfs_crit(btree->b_inode->i_sb, 2099 "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", 2100 btree->b_inode->i_ino, 2101 (unsigned long long)key, level); 2102 goto out; 2103 } 2104 2105 ret = NILFS_BMAP_USE_VBN(btree) ? 2106 nilfs_btree_propagate_v(btree, path, level, bh) : 2107 nilfs_btree_propagate_p(btree, path, level, bh); 2108 2109 out: 2110 nilfs_btree_free_path(path); 2111 2112 return ret; 2113 } 2114 2115 static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, 2116 struct buffer_head *bh) 2117 { 2118 return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); 2119 } 2120 2121 static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, 2122 struct list_head *lists, 2123 struct buffer_head *bh) 2124 { 2125 struct list_head *head; 2126 struct buffer_head *cbh; 2127 struct nilfs_btree_node *node, *cnode; 2128 __u64 key, ckey; 2129 int level; 2130 2131 get_bh(bh); 2132 node = (struct nilfs_btree_node *)bh->b_data; 2133 key = nilfs_btree_node_get_key(node, 0); 2134 level = nilfs_btree_node_get_level(node); 2135 if (level < NILFS_BTREE_LEVEL_NODE_MIN || 2136 level >= NILFS_BTREE_LEVEL_MAX) { 2137 dump_stack(); 2138 nilfs_warn(btree->b_inode->i_sb, 2139 "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", 2140 level, (unsigned long long)key, 2141 btree->b_inode->i_ino, 2142 (unsigned long long)bh->b_blocknr); 2143 return; 2144 } 2145 2146 list_for_each(head, &lists[level]) { 2147 cbh = list_entry(head, struct buffer_head, b_assoc_buffers); 2148 cnode = (struct nilfs_btree_node *)cbh->b_data; 2149 ckey = nilfs_btree_node_get_key(cnode, 0); 2150 if (key < ckey) 2151 break; 2152 } 2153 list_add_tail(&bh->b_assoc_buffers, head); 2154 } 2155 2156 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, 2157 struct list_head *listp) 2158 { 2159 struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; 2160 struct address_space *btcache = btnc_inode->i_mapping; 2161 struct list_head lists[NILFS_BTREE_LEVEL_MAX]; 2162 struct folio_batch fbatch; 2163 struct buffer_head *bh, *head; 2164 pgoff_t index = 0; 2165 int level, i; 2166 2167 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2168 level < NILFS_BTREE_LEVEL_MAX; 2169 level++) 2170 INIT_LIST_HEAD(&lists[level]); 2171 2172 folio_batch_init(&fbatch); 2173 2174 while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1, 2175 PAGECACHE_TAG_DIRTY, &fbatch)) { 2176 for (i = 0; i < folio_batch_count(&fbatch); i++) { 2177 bh = head = folio_buffers(fbatch.folios[i]); 2178 do { 2179 if (buffer_dirty(bh)) 2180 nilfs_btree_add_dirty_buffer(btree, 2181 lists, bh); 2182 } while ((bh = bh->b_this_page) != head); 2183 } 2184 folio_batch_release(&fbatch); 2185 cond_resched(); 2186 } 2187 2188 for (level = NILFS_BTREE_LEVEL_NODE_MIN; 2189 level < NILFS_BTREE_LEVEL_MAX; 2190 level++) 2191 list_splice_tail(&lists[level], listp); 2192 } 2193 2194 static int nilfs_btree_assign_p(struct nilfs_bmap *btree, 2195 struct nilfs_btree_path *path, 2196 int level, 2197 struct buffer_head **bh, 2198 sector_t blocknr, 2199 union nilfs_binfo *binfo) 2200 { 2201 struct nilfs_btree_node *parent; 2202 __u64 key; 2203 __u64 ptr; 2204 int ncmax, ret; 2205 2206 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2207 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2208 ncmax); 2209 if (buffer_nilfs_node(*bh)) { 2210 path[level].bp_ctxt.oldkey = ptr; 2211 path[level].bp_ctxt.newkey = blocknr; 2212 path[level].bp_ctxt.bh = *bh; 2213 ret = nilfs_btnode_prepare_change_key( 2214 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2215 &path[level].bp_ctxt); 2216 if (ret < 0) 2217 return ret; 2218 nilfs_btnode_commit_change_key( 2219 NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, 2220 &path[level].bp_ctxt); 2221 *bh = path[level].bp_ctxt.bh; 2222 } 2223 2224 nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, 2225 ncmax); 2226 2227 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2228 /* on-disk format */ 2229 binfo->bi_dat.bi_blkoff = cpu_to_le64(key); 2230 binfo->bi_dat.bi_level = level; 2231 memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); 2232 2233 return 0; 2234 } 2235 2236 static int nilfs_btree_assign_v(struct nilfs_bmap *btree, 2237 struct nilfs_btree_path *path, 2238 int level, 2239 struct buffer_head **bh, 2240 sector_t blocknr, 2241 union nilfs_binfo *binfo) 2242 { 2243 struct nilfs_btree_node *parent; 2244 struct inode *dat = nilfs_bmap_get_dat(btree); 2245 __u64 key; 2246 __u64 ptr; 2247 union nilfs_bmap_ptr_req req; 2248 int ncmax, ret; 2249 2250 parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); 2251 ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, 2252 ncmax); 2253 req.bpr_ptr = ptr; 2254 ret = nilfs_dat_prepare_start(dat, &req.bpr_req); 2255 if (ret < 0) 2256 return ret; 2257 nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); 2258 2259 key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); 2260 /* on-disk format */ 2261 binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); 2262 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2263 2264 return 0; 2265 } 2266 2267 static int nilfs_btree_assign(struct nilfs_bmap *btree, 2268 struct buffer_head **bh, 2269 sector_t blocknr, 2270 union nilfs_binfo *binfo) 2271 { 2272 struct nilfs_btree_path *path; 2273 struct nilfs_btree_node *node; 2274 __u64 key; 2275 int level, ret; 2276 2277 path = nilfs_btree_alloc_path(); 2278 if (path == NULL) 2279 return -ENOMEM; 2280 2281 if (buffer_nilfs_node(*bh)) { 2282 node = (struct nilfs_btree_node *)(*bh)->b_data; 2283 key = nilfs_btree_node_get_key(node, 0); 2284 level = nilfs_btree_node_get_level(node); 2285 } else { 2286 key = nilfs_bmap_data_get_key(btree, *bh); 2287 level = NILFS_BTREE_LEVEL_DATA; 2288 } 2289 2290 ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); 2291 if (ret < 0) { 2292 WARN_ON(ret == -ENOENT); 2293 goto out; 2294 } 2295 2296 ret = NILFS_BMAP_USE_VBN(btree) ? 2297 nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : 2298 nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); 2299 2300 out: 2301 nilfs_btree_free_path(path); 2302 2303 return ret; 2304 } 2305 2306 static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, 2307 struct buffer_head **bh, 2308 sector_t blocknr, 2309 union nilfs_binfo *binfo) 2310 { 2311 struct nilfs_btree_node *node; 2312 __u64 key; 2313 int ret; 2314 2315 ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, 2316 blocknr); 2317 if (ret < 0) 2318 return ret; 2319 2320 if (buffer_nilfs_node(*bh)) { 2321 node = (struct nilfs_btree_node *)(*bh)->b_data; 2322 key = nilfs_btree_node_get_key(node, 0); 2323 } else 2324 key = nilfs_bmap_data_get_key(btree, *bh); 2325 2326 /* on-disk format */ 2327 binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); 2328 binfo->bi_v.bi_blkoff = cpu_to_le64(key); 2329 2330 return 0; 2331 } 2332 2333 static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) 2334 { 2335 struct buffer_head *bh; 2336 struct nilfs_btree_path *path; 2337 __u64 ptr; 2338 int ret; 2339 2340 path = nilfs_btree_alloc_path(); 2341 if (path == NULL) 2342 return -ENOMEM; 2343 2344 ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); 2345 if (ret < 0) { 2346 WARN_ON(ret == -ENOENT); 2347 goto out; 2348 } 2349 ret = nilfs_btree_get_block(btree, ptr, &bh); 2350 if (ret < 0) { 2351 WARN_ON(ret == -ENOENT); 2352 goto out; 2353 } 2354 2355 if (!buffer_dirty(bh)) 2356 mark_buffer_dirty(bh); 2357 brelse(bh); 2358 if (!nilfs_bmap_dirty(btree)) 2359 nilfs_bmap_set_dirty(btree); 2360 2361 out: 2362 nilfs_btree_free_path(path); 2363 return ret; 2364 } 2365 2366 static const struct nilfs_bmap_operations nilfs_btree_ops = { 2367 .bop_lookup = nilfs_btree_lookup, 2368 .bop_lookup_contig = nilfs_btree_lookup_contig, 2369 .bop_insert = nilfs_btree_insert, 2370 .bop_delete = nilfs_btree_delete, 2371 .bop_clear = NULL, 2372 2373 .bop_propagate = nilfs_btree_propagate, 2374 2375 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2376 2377 .bop_assign = nilfs_btree_assign, 2378 .bop_mark = nilfs_btree_mark, 2379 2380 .bop_seek_key = nilfs_btree_seek_key, 2381 .bop_last_key = nilfs_btree_last_key, 2382 2383 .bop_check_insert = NULL, 2384 .bop_check_delete = nilfs_btree_check_delete, 2385 .bop_gather_data = nilfs_btree_gather_data, 2386 }; 2387 2388 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { 2389 .bop_lookup = NULL, 2390 .bop_lookup_contig = NULL, 2391 .bop_insert = NULL, 2392 .bop_delete = NULL, 2393 .bop_clear = NULL, 2394 2395 .bop_propagate = nilfs_btree_propagate_gc, 2396 2397 .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, 2398 2399 .bop_assign = nilfs_btree_assign_gc, 2400 .bop_mark = NULL, 2401 2402 .bop_seek_key = NULL, 2403 .bop_last_key = NULL, 2404 2405 .bop_check_insert = NULL, 2406 .bop_check_delete = NULL, 2407 .bop_gather_data = NULL, 2408 }; 2409 2410 static void __nilfs_btree_init(struct nilfs_bmap *bmap) 2411 { 2412 bmap->b_ops = &nilfs_btree_ops; 2413 bmap->b_nchildren_per_block = 2414 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2415 } 2416 2417 int nilfs_btree_init(struct nilfs_bmap *bmap) 2418 { 2419 int ret = 0; 2420 2421 __nilfs_btree_init(bmap); 2422 2423 if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) 2424 ret = -EIO; 2425 else 2426 ret = nilfs_attach_btree_node_cache( 2427 &NILFS_BMAP_I(bmap)->vfs_inode); 2428 2429 return ret; 2430 } 2431 2432 void nilfs_btree_init_gc(struct nilfs_bmap *bmap) 2433 { 2434 bmap->b_ops = &nilfs_btree_ops_gc; 2435 bmap->b_nchildren_per_block = 2436 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); 2437 } 2438