1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_trans.h" 16 #include "xfs_buf_item.h" 17 #include "xfs_btree.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_alloc.h" 22 #include "xfs_log.h" 23 #include "xfs_btree_staging.h" 24 #include "xfs_ag.h" 25 #include "xfs_alloc_btree.h" 26 #include "xfs_ialloc_btree.h" 27 #include "xfs_bmap_btree.h" 28 #include "xfs_rmap_btree.h" 29 #include "xfs_refcount_btree.h" 30 31 /* 32 * Btree magic numbers. 33 */ 34 static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 35 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 36 XFS_FIBT_MAGIC, 0 }, 37 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, 38 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, 39 XFS_REFC_CRC_MAGIC } 40 }; 41 42 uint32_t 43 xfs_btree_magic( 44 int crc, 45 xfs_btnum_t btnum) 46 { 47 uint32_t magic = xfs_magics[crc][btnum]; 48 49 /* Ensure we asked for crc for crc-only magics. */ 50 ASSERT(magic != 0); 51 return magic; 52 } 53 54 /* 55 * These sibling pointer checks are optimised for null sibling pointers. This 56 * happens a lot, and we don't need to byte swap at runtime if the sibling 57 * pointer is NULL. 58 * 59 * These are explicitly marked at inline because the cost of calling them as 60 * functions instead of inlining them is about 36 bytes extra code per call site 61 * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these 62 * two sibling check functions reduces the compiled code size by over 300 63 * bytes. 64 */ 65 static inline xfs_failaddr_t 66 xfs_btree_check_lblock_siblings( 67 struct xfs_mount *mp, 68 struct xfs_btree_cur *cur, 69 int level, 70 xfs_fsblock_t fsb, 71 __be64 dsibling) 72 { 73 xfs_fsblock_t sibling; 74 75 if (dsibling == cpu_to_be64(NULLFSBLOCK)) 76 return NULL; 77 78 sibling = be64_to_cpu(dsibling); 79 if (sibling == fsb) 80 return __this_address; 81 if (level >= 0) { 82 if (!xfs_btree_check_lptr(cur, sibling, level + 1)) 83 return __this_address; 84 } else { 85 if (!xfs_verify_fsbno(mp, sibling)) 86 return __this_address; 87 } 88 89 return NULL; 90 } 91 92 static inline xfs_failaddr_t 93 xfs_btree_check_sblock_siblings( 94 struct xfs_perag *pag, 95 struct xfs_btree_cur *cur, 96 int level, 97 xfs_agblock_t agbno, 98 __be32 dsibling) 99 { 100 xfs_agblock_t sibling; 101 102 if (dsibling == cpu_to_be32(NULLAGBLOCK)) 103 return NULL; 104 105 sibling = be32_to_cpu(dsibling); 106 if (sibling == agbno) 107 return __this_address; 108 if (level >= 0) { 109 if (!xfs_btree_check_sptr(cur, sibling, level + 1)) 110 return __this_address; 111 } else { 112 if (!xfs_verify_agbno(pag, sibling)) 113 return __this_address; 114 } 115 return NULL; 116 } 117 118 /* 119 * Check a long btree block header. Return the address of the failing check, 120 * or NULL if everything is ok. 121 */ 122 xfs_failaddr_t 123 __xfs_btree_check_lblock( 124 struct xfs_btree_cur *cur, 125 struct xfs_btree_block *block, 126 int level, 127 struct xfs_buf *bp) 128 { 129 struct xfs_mount *mp = cur->bc_mp; 130 xfs_btnum_t btnum = cur->bc_btnum; 131 int crc = xfs_has_crc(mp); 132 xfs_failaddr_t fa; 133 xfs_fsblock_t fsb = NULLFSBLOCK; 134 135 if (crc) { 136 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 137 return __this_address; 138 if (block->bb_u.l.bb_blkno != 139 cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL)) 140 return __this_address; 141 if (block->bb_u.l.bb_pad != cpu_to_be32(0)) 142 return __this_address; 143 } 144 145 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 146 return __this_address; 147 if (be16_to_cpu(block->bb_level) != level) 148 return __this_address; 149 if (be16_to_cpu(block->bb_numrecs) > 150 cur->bc_ops->get_maxrecs(cur, level)) 151 return __this_address; 152 153 if (bp) 154 fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); 155 156 fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, 157 block->bb_u.l.bb_leftsib); 158 if (!fa) 159 fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, 160 block->bb_u.l.bb_rightsib); 161 return fa; 162 } 163 164 /* Check a long btree block header. */ 165 static int 166 xfs_btree_check_lblock( 167 struct xfs_btree_cur *cur, 168 struct xfs_btree_block *block, 169 int level, 170 struct xfs_buf *bp) 171 { 172 struct xfs_mount *mp = cur->bc_mp; 173 xfs_failaddr_t fa; 174 175 fa = __xfs_btree_check_lblock(cur, block, level, bp); 176 if (XFS_IS_CORRUPT(mp, fa != NULL) || 177 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) { 178 if (bp) 179 trace_xfs_btree_corrupt(bp, _RET_IP_); 180 return -EFSCORRUPTED; 181 } 182 return 0; 183 } 184 185 /* 186 * Check a short btree block header. Return the address of the failing check, 187 * or NULL if everything is ok. 188 */ 189 xfs_failaddr_t 190 __xfs_btree_check_sblock( 191 struct xfs_btree_cur *cur, 192 struct xfs_btree_block *block, 193 int level, 194 struct xfs_buf *bp) 195 { 196 struct xfs_mount *mp = cur->bc_mp; 197 struct xfs_perag *pag = cur->bc_ag.pag; 198 xfs_btnum_t btnum = cur->bc_btnum; 199 int crc = xfs_has_crc(mp); 200 xfs_failaddr_t fa; 201 xfs_agblock_t agbno = NULLAGBLOCK; 202 203 if (crc) { 204 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 205 return __this_address; 206 if (block->bb_u.s.bb_blkno != 207 cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL)) 208 return __this_address; 209 } 210 211 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 212 return __this_address; 213 if (be16_to_cpu(block->bb_level) != level) 214 return __this_address; 215 if (be16_to_cpu(block->bb_numrecs) > 216 cur->bc_ops->get_maxrecs(cur, level)) 217 return __this_address; 218 219 if (bp) 220 agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); 221 222 fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, 223 block->bb_u.s.bb_leftsib); 224 if (!fa) 225 fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, 226 block->bb_u.s.bb_rightsib); 227 return fa; 228 } 229 230 /* Check a short btree block header. */ 231 STATIC int 232 xfs_btree_check_sblock( 233 struct xfs_btree_cur *cur, 234 struct xfs_btree_block *block, 235 int level, 236 struct xfs_buf *bp) 237 { 238 struct xfs_mount *mp = cur->bc_mp; 239 xfs_failaddr_t fa; 240 241 fa = __xfs_btree_check_sblock(cur, block, level, bp); 242 if (XFS_IS_CORRUPT(mp, fa != NULL) || 243 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) { 244 if (bp) 245 trace_xfs_btree_corrupt(bp, _RET_IP_); 246 return -EFSCORRUPTED; 247 } 248 return 0; 249 } 250 251 /* 252 * Debug routine: check that block header is ok. 253 */ 254 int 255 xfs_btree_check_block( 256 struct xfs_btree_cur *cur, /* btree cursor */ 257 struct xfs_btree_block *block, /* generic btree block pointer */ 258 int level, /* level of the btree block */ 259 struct xfs_buf *bp) /* buffer containing block, if any */ 260 { 261 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 262 return xfs_btree_check_lblock(cur, block, level, bp); 263 else 264 return xfs_btree_check_sblock(cur, block, level, bp); 265 } 266 267 /* Check that this long pointer is valid and points within the fs. */ 268 bool 269 xfs_btree_check_lptr( 270 struct xfs_btree_cur *cur, 271 xfs_fsblock_t fsbno, 272 int level) 273 { 274 if (level <= 0) 275 return false; 276 return xfs_verify_fsbno(cur->bc_mp, fsbno); 277 } 278 279 /* Check that this short pointer is valid and points within the AG. */ 280 bool 281 xfs_btree_check_sptr( 282 struct xfs_btree_cur *cur, 283 xfs_agblock_t agbno, 284 int level) 285 { 286 if (level <= 0) 287 return false; 288 return xfs_verify_agbno(cur->bc_ag.pag, agbno); 289 } 290 291 /* 292 * Check that a given (indexed) btree pointer at a certain level of a 293 * btree is valid and doesn't point past where it should. 294 */ 295 static int 296 xfs_btree_check_ptr( 297 struct xfs_btree_cur *cur, 298 const union xfs_btree_ptr *ptr, 299 int index, 300 int level) 301 { 302 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 303 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]), 304 level)) 305 return 0; 306 xfs_err(cur->bc_mp, 307 "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.", 308 cur->bc_ino.ip->i_ino, 309 cur->bc_ino.whichfork, cur->bc_btnum, 310 level, index); 311 } else { 312 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]), 313 level)) 314 return 0; 315 xfs_err(cur->bc_mp, 316 "AG %u: Corrupt btree %d pointer at level %d index %d.", 317 cur->bc_ag.pag->pag_agno, cur->bc_btnum, 318 level, index); 319 } 320 321 return -EFSCORRUPTED; 322 } 323 324 #ifdef DEBUG 325 # define xfs_btree_debug_check_ptr xfs_btree_check_ptr 326 #else 327 # define xfs_btree_debug_check_ptr(...) (0) 328 #endif 329 330 /* 331 * Calculate CRC on the whole btree block and stuff it into the 332 * long-form btree header. 333 * 334 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 335 * it into the buffer so recovery knows what the last modification was that made 336 * it to disk. 337 */ 338 void 339 xfs_btree_lblock_calc_crc( 340 struct xfs_buf *bp) 341 { 342 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 343 struct xfs_buf_log_item *bip = bp->b_log_item; 344 345 if (!xfs_has_crc(bp->b_mount)) 346 return; 347 if (bip) 348 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 349 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 350 } 351 352 bool 353 xfs_btree_lblock_verify_crc( 354 struct xfs_buf *bp) 355 { 356 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 357 struct xfs_mount *mp = bp->b_mount; 358 359 if (xfs_has_crc(mp)) { 360 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 361 return false; 362 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 363 } 364 365 return true; 366 } 367 368 /* 369 * Calculate CRC on the whole btree block and stuff it into the 370 * short-form btree header. 371 * 372 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 373 * it into the buffer so recovery knows what the last modification was that made 374 * it to disk. 375 */ 376 void 377 xfs_btree_sblock_calc_crc( 378 struct xfs_buf *bp) 379 { 380 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 381 struct xfs_buf_log_item *bip = bp->b_log_item; 382 383 if (!xfs_has_crc(bp->b_mount)) 384 return; 385 if (bip) 386 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 387 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 388 } 389 390 bool 391 xfs_btree_sblock_verify_crc( 392 struct xfs_buf *bp) 393 { 394 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 395 struct xfs_mount *mp = bp->b_mount; 396 397 if (xfs_has_crc(mp)) { 398 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 399 return false; 400 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 401 } 402 403 return true; 404 } 405 406 static int 407 xfs_btree_free_block( 408 struct xfs_btree_cur *cur, 409 struct xfs_buf *bp) 410 { 411 int error; 412 413 error = cur->bc_ops->free_block(cur, bp); 414 if (!error) { 415 xfs_trans_binval(cur->bc_tp, bp); 416 XFS_BTREE_STATS_INC(cur, free); 417 } 418 return error; 419 } 420 421 /* 422 * Delete the btree cursor. 423 */ 424 void 425 xfs_btree_del_cursor( 426 struct xfs_btree_cur *cur, /* btree cursor */ 427 int error) /* del because of error */ 428 { 429 int i; /* btree level */ 430 431 /* 432 * Clear the buffer pointers and release the buffers. If we're doing 433 * this because of an error, inspect all of the entries in the bc_bufs 434 * array for buffers to be unlocked. This is because some of the btree 435 * code works from level n down to 0, and if we get an error along the 436 * way we won't have initialized all the entries down to 0. 437 */ 438 for (i = 0; i < cur->bc_nlevels; i++) { 439 if (cur->bc_levels[i].bp) 440 xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp); 441 else if (!error) 442 break; 443 } 444 445 /* 446 * If we are doing a BMBT update, the number of unaccounted blocks 447 * allocated during this cursor life time should be zero. If it's not 448 * zero, then we should be shut down or on our way to shutdown due to 449 * cancelling a dirty transaction on error. 450 */ 451 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || 452 xfs_is_shutdown(cur->bc_mp) || error != 0); 453 if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) 454 kmem_free(cur->bc_ops); 455 if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) 456 xfs_perag_put(cur->bc_ag.pag); 457 kmem_cache_free(cur->bc_cache, cur); 458 } 459 460 /* 461 * Duplicate the btree cursor. 462 * Allocate a new one, copy the record, re-get the buffers. 463 */ 464 int /* error */ 465 xfs_btree_dup_cursor( 466 struct xfs_btree_cur *cur, /* input cursor */ 467 struct xfs_btree_cur **ncur) /* output cursor */ 468 { 469 struct xfs_buf *bp; /* btree block's buffer pointer */ 470 int error; /* error return value */ 471 int i; /* level number of btree block */ 472 xfs_mount_t *mp; /* mount structure for filesystem */ 473 struct xfs_btree_cur *new; /* new cursor value */ 474 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 475 476 tp = cur->bc_tp; 477 mp = cur->bc_mp; 478 479 /* 480 * Allocate a new cursor like the old one. 481 */ 482 new = cur->bc_ops->dup_cursor(cur); 483 484 /* 485 * Copy the record currently in the cursor. 486 */ 487 new->bc_rec = cur->bc_rec; 488 489 /* 490 * For each level current, re-get the buffer and copy the ptr value. 491 */ 492 for (i = 0; i < new->bc_nlevels; i++) { 493 new->bc_levels[i].ptr = cur->bc_levels[i].ptr; 494 new->bc_levels[i].ra = cur->bc_levels[i].ra; 495 bp = cur->bc_levels[i].bp; 496 if (bp) { 497 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 498 xfs_buf_daddr(bp), mp->m_bsize, 499 0, &bp, 500 cur->bc_ops->buf_ops); 501 if (error) { 502 xfs_btree_del_cursor(new, error); 503 *ncur = NULL; 504 return error; 505 } 506 } 507 new->bc_levels[i].bp = bp; 508 } 509 *ncur = new; 510 return 0; 511 } 512 513 /* 514 * XFS btree block layout and addressing: 515 * 516 * There are two types of blocks in the btree: leaf and non-leaf blocks. 517 * 518 * The leaf record start with a header then followed by records containing 519 * the values. A non-leaf block also starts with the same header, and 520 * then first contains lookup keys followed by an equal number of pointers 521 * to the btree blocks at the previous level. 522 * 523 * +--------+-------+-------+-------+-------+-------+-------+ 524 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 525 * +--------+-------+-------+-------+-------+-------+-------+ 526 * 527 * +--------+-------+-------+-------+-------+-------+-------+ 528 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 529 * +--------+-------+-------+-------+-------+-------+-------+ 530 * 531 * The header is called struct xfs_btree_block for reasons better left unknown 532 * and comes in different versions for short (32bit) and long (64bit) block 533 * pointers. The record and key structures are defined by the btree instances 534 * and opaque to the btree core. The block pointers are simple disk endian 535 * integers, available in a short (32bit) and long (64bit) variant. 536 * 537 * The helpers below calculate the offset of a given record, key or pointer 538 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 539 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 540 * inside the btree block is done using indices starting at one, not zero! 541 * 542 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing 543 * overlapping intervals. In such a tree, records are still sorted lowest to 544 * highest and indexed by the smallest key value that refers to the record. 545 * However, nodes are different: each pointer has two associated keys -- one 546 * indexing the lowest key available in the block(s) below (the same behavior 547 * as the key in a regular btree) and another indexing the highest key 548 * available in the block(s) below. Because records are /not/ sorted by the 549 * highest key, all leaf block updates require us to compute the highest key 550 * that matches any record in the leaf and to recursively update the high keys 551 * in the nodes going further up in the tree, if necessary. Nodes look like 552 * this: 553 * 554 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 555 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | 556 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 557 * 558 * To perform an interval query on an overlapped tree, perform the usual 559 * depth-first search and use the low and high keys to decide if we can skip 560 * that particular node. If a leaf node is reached, return the records that 561 * intersect the interval. Note that an interval query may return numerous 562 * entries. For a non-overlapped tree, simply search for the record associated 563 * with the lowest key and iterate forward until a non-matching record is 564 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by 565 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in 566 * more detail. 567 * 568 * Why do we care about overlapping intervals? Let's say you have a bunch of 569 * reverse mapping records on a reflink filesystem: 570 * 571 * 1: +- file A startblock B offset C length D -----------+ 572 * 2: +- file E startblock F offset G length H --------------+ 573 * 3: +- file I startblock F offset J length K --+ 574 * 4: +- file L... --+ 575 * 576 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, 577 * we'd simply increment the length of record 1. But how do we find the record 578 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return 579 * record 3 because the keys are ordered first by startblock. An interval 580 * query would return records 1 and 2 because they both overlap (B+D-1), and 581 * from that we can pick out record 1 as the appropriate left neighbor. 582 * 583 * In the non-overlapped case you can do a LE lookup and decrement the cursor 584 * because a record's interval must end before the next record. 585 */ 586 587 /* 588 * Return size of the btree block header for this btree instance. 589 */ 590 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 591 { 592 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 593 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 594 return XFS_BTREE_LBLOCK_CRC_LEN; 595 return XFS_BTREE_LBLOCK_LEN; 596 } 597 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 598 return XFS_BTREE_SBLOCK_CRC_LEN; 599 return XFS_BTREE_SBLOCK_LEN; 600 } 601 602 /* 603 * Return size of btree block pointers for this btree instance. 604 */ 605 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 606 { 607 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 608 sizeof(__be64) : sizeof(__be32); 609 } 610 611 /* 612 * Calculate offset of the n-th record in a btree block. 613 */ 614 STATIC size_t 615 xfs_btree_rec_offset( 616 struct xfs_btree_cur *cur, 617 int n) 618 { 619 return xfs_btree_block_len(cur) + 620 (n - 1) * cur->bc_ops->rec_len; 621 } 622 623 /* 624 * Calculate offset of the n-th key in a btree block. 625 */ 626 STATIC size_t 627 xfs_btree_key_offset( 628 struct xfs_btree_cur *cur, 629 int n) 630 { 631 return xfs_btree_block_len(cur) + 632 (n - 1) * cur->bc_ops->key_len; 633 } 634 635 /* 636 * Calculate offset of the n-th high key in a btree block. 637 */ 638 STATIC size_t 639 xfs_btree_high_key_offset( 640 struct xfs_btree_cur *cur, 641 int n) 642 { 643 return xfs_btree_block_len(cur) + 644 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); 645 } 646 647 /* 648 * Calculate offset of the n-th block pointer in a btree block. 649 */ 650 STATIC size_t 651 xfs_btree_ptr_offset( 652 struct xfs_btree_cur *cur, 653 int n, 654 int level) 655 { 656 return xfs_btree_block_len(cur) + 657 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 658 (n - 1) * xfs_btree_ptr_len(cur); 659 } 660 661 /* 662 * Return a pointer to the n-th record in the btree block. 663 */ 664 union xfs_btree_rec * 665 xfs_btree_rec_addr( 666 struct xfs_btree_cur *cur, 667 int n, 668 struct xfs_btree_block *block) 669 { 670 return (union xfs_btree_rec *) 671 ((char *)block + xfs_btree_rec_offset(cur, n)); 672 } 673 674 /* 675 * Return a pointer to the n-th key in the btree block. 676 */ 677 union xfs_btree_key * 678 xfs_btree_key_addr( 679 struct xfs_btree_cur *cur, 680 int n, 681 struct xfs_btree_block *block) 682 { 683 return (union xfs_btree_key *) 684 ((char *)block + xfs_btree_key_offset(cur, n)); 685 } 686 687 /* 688 * Return a pointer to the n-th high key in the btree block. 689 */ 690 union xfs_btree_key * 691 xfs_btree_high_key_addr( 692 struct xfs_btree_cur *cur, 693 int n, 694 struct xfs_btree_block *block) 695 { 696 return (union xfs_btree_key *) 697 ((char *)block + xfs_btree_high_key_offset(cur, n)); 698 } 699 700 /* 701 * Return a pointer to the n-th block pointer in the btree block. 702 */ 703 union xfs_btree_ptr * 704 xfs_btree_ptr_addr( 705 struct xfs_btree_cur *cur, 706 int n, 707 struct xfs_btree_block *block) 708 { 709 int level = xfs_btree_get_level(block); 710 711 ASSERT(block->bb_level != 0); 712 713 return (union xfs_btree_ptr *) 714 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 715 } 716 717 struct xfs_ifork * 718 xfs_btree_ifork_ptr( 719 struct xfs_btree_cur *cur) 720 { 721 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 722 723 if (cur->bc_flags & XFS_BTREE_STAGING) 724 return cur->bc_ino.ifake->if_fork; 725 return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork); 726 } 727 728 /* 729 * Get the root block which is stored in the inode. 730 * 731 * For now this btree implementation assumes the btree root is always 732 * stored in the if_broot field of an inode fork. 733 */ 734 STATIC struct xfs_btree_block * 735 xfs_btree_get_iroot( 736 struct xfs_btree_cur *cur) 737 { 738 struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); 739 740 return (struct xfs_btree_block *)ifp->if_broot; 741 } 742 743 /* 744 * Retrieve the block pointer from the cursor at the given level. 745 * This may be an inode btree root or from a buffer. 746 */ 747 struct xfs_btree_block * /* generic btree block pointer */ 748 xfs_btree_get_block( 749 struct xfs_btree_cur *cur, /* btree cursor */ 750 int level, /* level in btree */ 751 struct xfs_buf **bpp) /* buffer containing the block */ 752 { 753 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 754 (level == cur->bc_nlevels - 1)) { 755 *bpp = NULL; 756 return xfs_btree_get_iroot(cur); 757 } 758 759 *bpp = cur->bc_levels[level].bp; 760 return XFS_BUF_TO_BLOCK(*bpp); 761 } 762 763 /* 764 * Change the cursor to point to the first record at the given level. 765 * Other levels are unaffected. 766 */ 767 STATIC int /* success=1, failure=0 */ 768 xfs_btree_firstrec( 769 struct xfs_btree_cur *cur, /* btree cursor */ 770 int level) /* level to change */ 771 { 772 struct xfs_btree_block *block; /* generic btree block pointer */ 773 struct xfs_buf *bp; /* buffer containing block */ 774 775 /* 776 * Get the block pointer for this level. 777 */ 778 block = xfs_btree_get_block(cur, level, &bp); 779 if (xfs_btree_check_block(cur, block, level, bp)) 780 return 0; 781 /* 782 * It's empty, there is no such record. 783 */ 784 if (!block->bb_numrecs) 785 return 0; 786 /* 787 * Set the ptr value to 1, that's the first record/key. 788 */ 789 cur->bc_levels[level].ptr = 1; 790 return 1; 791 } 792 793 /* 794 * Change the cursor to point to the last record in the current block 795 * at the given level. Other levels are unaffected. 796 */ 797 STATIC int /* success=1, failure=0 */ 798 xfs_btree_lastrec( 799 struct xfs_btree_cur *cur, /* btree cursor */ 800 int level) /* level to change */ 801 { 802 struct xfs_btree_block *block; /* generic btree block pointer */ 803 struct xfs_buf *bp; /* buffer containing block */ 804 805 /* 806 * Get the block pointer for this level. 807 */ 808 block = xfs_btree_get_block(cur, level, &bp); 809 if (xfs_btree_check_block(cur, block, level, bp)) 810 return 0; 811 /* 812 * It's empty, there is no such record. 813 */ 814 if (!block->bb_numrecs) 815 return 0; 816 /* 817 * Set the ptr value to numrecs, that's the last record/key. 818 */ 819 cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs); 820 return 1; 821 } 822 823 /* 824 * Compute first and last byte offsets for the fields given. 825 * Interprets the offsets table, which contains struct field offsets. 826 */ 827 void 828 xfs_btree_offsets( 829 uint32_t fields, /* bitmask of fields */ 830 const short *offsets, /* table of field offsets */ 831 int nbits, /* number of bits to inspect */ 832 int *first, /* output: first byte offset */ 833 int *last) /* output: last byte offset */ 834 { 835 int i; /* current bit number */ 836 uint32_t imask; /* mask for current bit number */ 837 838 ASSERT(fields != 0); 839 /* 840 * Find the lowest bit, so the first byte offset. 841 */ 842 for (i = 0, imask = 1u; ; i++, imask <<= 1) { 843 if (imask & fields) { 844 *first = offsets[i]; 845 break; 846 } 847 } 848 /* 849 * Find the highest bit, so the last byte offset. 850 */ 851 for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) { 852 if (imask & fields) { 853 *last = offsets[i + 1] - 1; 854 break; 855 } 856 } 857 } 858 859 /* 860 * Get a buffer for the block, return it read in. 861 * Long-form addressing. 862 */ 863 int 864 xfs_btree_read_bufl( 865 struct xfs_mount *mp, /* file system mount point */ 866 struct xfs_trans *tp, /* transaction pointer */ 867 xfs_fsblock_t fsbno, /* file system block number */ 868 struct xfs_buf **bpp, /* buffer for fsbno */ 869 int refval, /* ref count value for buffer */ 870 const struct xfs_buf_ops *ops) 871 { 872 struct xfs_buf *bp; /* return value */ 873 xfs_daddr_t d; /* real disk block address */ 874 int error; 875 876 if (!xfs_verify_fsbno(mp, fsbno)) 877 return -EFSCORRUPTED; 878 d = XFS_FSB_TO_DADDR(mp, fsbno); 879 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 880 mp->m_bsize, 0, &bp, ops); 881 if (error) 882 return error; 883 if (bp) 884 xfs_buf_set_ref(bp, refval); 885 *bpp = bp; 886 return 0; 887 } 888 889 /* 890 * Read-ahead the block, don't wait for it, don't return a buffer. 891 * Long-form addressing. 892 */ 893 /* ARGSUSED */ 894 void 895 xfs_btree_reada_bufl( 896 struct xfs_mount *mp, /* file system mount point */ 897 xfs_fsblock_t fsbno, /* file system block number */ 898 xfs_extlen_t count, /* count of filesystem blocks */ 899 const struct xfs_buf_ops *ops) 900 { 901 xfs_daddr_t d; 902 903 ASSERT(fsbno != NULLFSBLOCK); 904 d = XFS_FSB_TO_DADDR(mp, fsbno); 905 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 906 } 907 908 /* 909 * Read-ahead the block, don't wait for it, don't return a buffer. 910 * Short-form addressing. 911 */ 912 /* ARGSUSED */ 913 void 914 xfs_btree_reada_bufs( 915 struct xfs_mount *mp, /* file system mount point */ 916 xfs_agnumber_t agno, /* allocation group number */ 917 xfs_agblock_t agbno, /* allocation group block number */ 918 xfs_extlen_t count, /* count of filesystem blocks */ 919 const struct xfs_buf_ops *ops) 920 { 921 xfs_daddr_t d; 922 923 ASSERT(agno != NULLAGNUMBER); 924 ASSERT(agbno != NULLAGBLOCK); 925 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 926 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 927 } 928 929 STATIC int 930 xfs_btree_readahead_lblock( 931 struct xfs_btree_cur *cur, 932 int lr, 933 struct xfs_btree_block *block) 934 { 935 int rval = 0; 936 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 937 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 938 939 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 940 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 941 cur->bc_ops->buf_ops); 942 rval++; 943 } 944 945 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 946 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 947 cur->bc_ops->buf_ops); 948 rval++; 949 } 950 951 return rval; 952 } 953 954 STATIC int 955 xfs_btree_readahead_sblock( 956 struct xfs_btree_cur *cur, 957 int lr, 958 struct xfs_btree_block *block) 959 { 960 int rval = 0; 961 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 962 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 963 964 965 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 966 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, 967 left, 1, cur->bc_ops->buf_ops); 968 rval++; 969 } 970 971 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 972 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, 973 right, 1, cur->bc_ops->buf_ops); 974 rval++; 975 } 976 977 return rval; 978 } 979 980 /* 981 * Read-ahead btree blocks, at the given level. 982 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 983 */ 984 STATIC int 985 xfs_btree_readahead( 986 struct xfs_btree_cur *cur, /* btree cursor */ 987 int lev, /* level in btree */ 988 int lr) /* left/right bits */ 989 { 990 struct xfs_btree_block *block; 991 992 /* 993 * No readahead needed if we are at the root level and the 994 * btree root is stored in the inode. 995 */ 996 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 997 (lev == cur->bc_nlevels - 1)) 998 return 0; 999 1000 if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra) 1001 return 0; 1002 1003 cur->bc_levels[lev].ra |= lr; 1004 block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp); 1005 1006 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1007 return xfs_btree_readahead_lblock(cur, lr, block); 1008 return xfs_btree_readahead_sblock(cur, lr, block); 1009 } 1010 1011 STATIC int 1012 xfs_btree_ptr_to_daddr( 1013 struct xfs_btree_cur *cur, 1014 const union xfs_btree_ptr *ptr, 1015 xfs_daddr_t *daddr) 1016 { 1017 xfs_fsblock_t fsbno; 1018 xfs_agblock_t agbno; 1019 int error; 1020 1021 error = xfs_btree_check_ptr(cur, ptr, 0, 1); 1022 if (error) 1023 return error; 1024 1025 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1026 fsbno = be64_to_cpu(ptr->l); 1027 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); 1028 } else { 1029 agbno = be32_to_cpu(ptr->s); 1030 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno, 1031 agbno); 1032 } 1033 1034 return 0; 1035 } 1036 1037 /* 1038 * Readahead @count btree blocks at the given @ptr location. 1039 * 1040 * We don't need to care about long or short form btrees here as we have a 1041 * method of converting the ptr directly to a daddr available to us. 1042 */ 1043 STATIC void 1044 xfs_btree_readahead_ptr( 1045 struct xfs_btree_cur *cur, 1046 union xfs_btree_ptr *ptr, 1047 xfs_extlen_t count) 1048 { 1049 xfs_daddr_t daddr; 1050 1051 if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr)) 1052 return; 1053 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr, 1054 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 1055 } 1056 1057 /* 1058 * Set the buffer for level "lev" in the cursor to bp, releasing 1059 * any previous buffer. 1060 */ 1061 STATIC void 1062 xfs_btree_setbuf( 1063 struct xfs_btree_cur *cur, /* btree cursor */ 1064 int lev, /* level in btree */ 1065 struct xfs_buf *bp) /* new buffer to set */ 1066 { 1067 struct xfs_btree_block *b; /* btree block */ 1068 1069 if (cur->bc_levels[lev].bp) 1070 xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp); 1071 cur->bc_levels[lev].bp = bp; 1072 cur->bc_levels[lev].ra = 0; 1073 1074 b = XFS_BUF_TO_BLOCK(bp); 1075 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1076 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 1077 cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; 1078 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 1079 cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; 1080 } else { 1081 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 1082 cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; 1083 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 1084 cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA; 1085 } 1086 } 1087 1088 bool 1089 xfs_btree_ptr_is_null( 1090 struct xfs_btree_cur *cur, 1091 const union xfs_btree_ptr *ptr) 1092 { 1093 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1094 return ptr->l == cpu_to_be64(NULLFSBLOCK); 1095 else 1096 return ptr->s == cpu_to_be32(NULLAGBLOCK); 1097 } 1098 1099 void 1100 xfs_btree_set_ptr_null( 1101 struct xfs_btree_cur *cur, 1102 union xfs_btree_ptr *ptr) 1103 { 1104 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1105 ptr->l = cpu_to_be64(NULLFSBLOCK); 1106 else 1107 ptr->s = cpu_to_be32(NULLAGBLOCK); 1108 } 1109 1110 /* 1111 * Get/set/init sibling pointers 1112 */ 1113 void 1114 xfs_btree_get_sibling( 1115 struct xfs_btree_cur *cur, 1116 struct xfs_btree_block *block, 1117 union xfs_btree_ptr *ptr, 1118 int lr) 1119 { 1120 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1121 1122 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1123 if (lr == XFS_BB_RIGHTSIB) 1124 ptr->l = block->bb_u.l.bb_rightsib; 1125 else 1126 ptr->l = block->bb_u.l.bb_leftsib; 1127 } else { 1128 if (lr == XFS_BB_RIGHTSIB) 1129 ptr->s = block->bb_u.s.bb_rightsib; 1130 else 1131 ptr->s = block->bb_u.s.bb_leftsib; 1132 } 1133 } 1134 1135 void 1136 xfs_btree_set_sibling( 1137 struct xfs_btree_cur *cur, 1138 struct xfs_btree_block *block, 1139 const union xfs_btree_ptr *ptr, 1140 int lr) 1141 { 1142 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1143 1144 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1145 if (lr == XFS_BB_RIGHTSIB) 1146 block->bb_u.l.bb_rightsib = ptr->l; 1147 else 1148 block->bb_u.l.bb_leftsib = ptr->l; 1149 } else { 1150 if (lr == XFS_BB_RIGHTSIB) 1151 block->bb_u.s.bb_rightsib = ptr->s; 1152 else 1153 block->bb_u.s.bb_leftsib = ptr->s; 1154 } 1155 } 1156 1157 void 1158 xfs_btree_init_block_int( 1159 struct xfs_mount *mp, 1160 struct xfs_btree_block *buf, 1161 xfs_daddr_t blkno, 1162 xfs_btnum_t btnum, 1163 __u16 level, 1164 __u16 numrecs, 1165 __u64 owner, 1166 unsigned int flags) 1167 { 1168 int crc = xfs_has_crc(mp); 1169 __u32 magic = xfs_btree_magic(crc, btnum); 1170 1171 buf->bb_magic = cpu_to_be32(magic); 1172 buf->bb_level = cpu_to_be16(level); 1173 buf->bb_numrecs = cpu_to_be16(numrecs); 1174 1175 if (flags & XFS_BTREE_LONG_PTRS) { 1176 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 1177 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1178 if (crc) { 1179 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1180 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1181 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); 1182 buf->bb_u.l.bb_pad = 0; 1183 buf->bb_u.l.bb_lsn = 0; 1184 } 1185 } else { 1186 /* owner is a 32 bit value on short blocks */ 1187 __u32 __owner = (__u32)owner; 1188 1189 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1190 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1191 if (crc) { 1192 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1193 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1194 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); 1195 buf->bb_u.s.bb_lsn = 0; 1196 } 1197 } 1198 } 1199 1200 void 1201 xfs_btree_init_block( 1202 struct xfs_mount *mp, 1203 struct xfs_buf *bp, 1204 xfs_btnum_t btnum, 1205 __u16 level, 1206 __u16 numrecs, 1207 __u64 owner) 1208 { 1209 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), xfs_buf_daddr(bp), 1210 btnum, level, numrecs, owner, 0); 1211 } 1212 1213 void 1214 xfs_btree_init_block_cur( 1215 struct xfs_btree_cur *cur, 1216 struct xfs_buf *bp, 1217 int level, 1218 int numrecs) 1219 { 1220 __u64 owner; 1221 1222 /* 1223 * we can pull the owner from the cursor right now as the different 1224 * owners align directly with the pointer size of the btree. This may 1225 * change in future, but is safe for current users of the generic btree 1226 * code. 1227 */ 1228 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1229 owner = cur->bc_ino.ip->i_ino; 1230 else 1231 owner = cur->bc_ag.pag->pag_agno; 1232 1233 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), 1234 xfs_buf_daddr(bp), cur->bc_btnum, level, 1235 numrecs, owner, cur->bc_flags); 1236 } 1237 1238 /* 1239 * Return true if ptr is the last record in the btree and 1240 * we need to track updates to this record. The decision 1241 * will be further refined in the update_lastrec method. 1242 */ 1243 STATIC int 1244 xfs_btree_is_lastrec( 1245 struct xfs_btree_cur *cur, 1246 struct xfs_btree_block *block, 1247 int level) 1248 { 1249 union xfs_btree_ptr ptr; 1250 1251 if (level > 0) 1252 return 0; 1253 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1254 return 0; 1255 1256 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1257 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1258 return 0; 1259 return 1; 1260 } 1261 1262 STATIC void 1263 xfs_btree_buf_to_ptr( 1264 struct xfs_btree_cur *cur, 1265 struct xfs_buf *bp, 1266 union xfs_btree_ptr *ptr) 1267 { 1268 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1269 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1270 xfs_buf_daddr(bp))); 1271 else { 1272 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1273 xfs_buf_daddr(bp))); 1274 } 1275 } 1276 1277 STATIC void 1278 xfs_btree_set_refs( 1279 struct xfs_btree_cur *cur, 1280 struct xfs_buf *bp) 1281 { 1282 switch (cur->bc_btnum) { 1283 case XFS_BTNUM_BNO: 1284 case XFS_BTNUM_CNT: 1285 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1286 break; 1287 case XFS_BTNUM_INO: 1288 case XFS_BTNUM_FINO: 1289 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1290 break; 1291 case XFS_BTNUM_BMAP: 1292 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1293 break; 1294 case XFS_BTNUM_RMAP: 1295 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); 1296 break; 1297 case XFS_BTNUM_REFC: 1298 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); 1299 break; 1300 default: 1301 ASSERT(0); 1302 } 1303 } 1304 1305 int 1306 xfs_btree_get_buf_block( 1307 struct xfs_btree_cur *cur, 1308 const union xfs_btree_ptr *ptr, 1309 struct xfs_btree_block **block, 1310 struct xfs_buf **bpp) 1311 { 1312 struct xfs_mount *mp = cur->bc_mp; 1313 xfs_daddr_t d; 1314 int error; 1315 1316 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1317 if (error) 1318 return error; 1319 error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize, 1320 0, bpp); 1321 if (error) 1322 return error; 1323 1324 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1325 *block = XFS_BUF_TO_BLOCK(*bpp); 1326 return 0; 1327 } 1328 1329 /* 1330 * Read in the buffer at the given ptr and return the buffer and 1331 * the block pointer within the buffer. 1332 */ 1333 STATIC int 1334 xfs_btree_read_buf_block( 1335 struct xfs_btree_cur *cur, 1336 const union xfs_btree_ptr *ptr, 1337 int flags, 1338 struct xfs_btree_block **block, 1339 struct xfs_buf **bpp) 1340 { 1341 struct xfs_mount *mp = cur->bc_mp; 1342 xfs_daddr_t d; 1343 int error; 1344 1345 /* need to sort out how callers deal with failures first */ 1346 ASSERT(!(flags & XBF_TRYLOCK)); 1347 1348 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1349 if (error) 1350 return error; 1351 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1352 mp->m_bsize, flags, bpp, 1353 cur->bc_ops->buf_ops); 1354 if (error) 1355 return error; 1356 1357 xfs_btree_set_refs(cur, *bpp); 1358 *block = XFS_BUF_TO_BLOCK(*bpp); 1359 return 0; 1360 } 1361 1362 /* 1363 * Copy keys from one btree block to another. 1364 */ 1365 void 1366 xfs_btree_copy_keys( 1367 struct xfs_btree_cur *cur, 1368 union xfs_btree_key *dst_key, 1369 const union xfs_btree_key *src_key, 1370 int numkeys) 1371 { 1372 ASSERT(numkeys >= 0); 1373 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1374 } 1375 1376 /* 1377 * Copy records from one btree block to another. 1378 */ 1379 STATIC void 1380 xfs_btree_copy_recs( 1381 struct xfs_btree_cur *cur, 1382 union xfs_btree_rec *dst_rec, 1383 union xfs_btree_rec *src_rec, 1384 int numrecs) 1385 { 1386 ASSERT(numrecs >= 0); 1387 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1388 } 1389 1390 /* 1391 * Copy block pointers from one btree block to another. 1392 */ 1393 void 1394 xfs_btree_copy_ptrs( 1395 struct xfs_btree_cur *cur, 1396 union xfs_btree_ptr *dst_ptr, 1397 const union xfs_btree_ptr *src_ptr, 1398 int numptrs) 1399 { 1400 ASSERT(numptrs >= 0); 1401 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1402 } 1403 1404 /* 1405 * Shift keys one index left/right inside a single btree block. 1406 */ 1407 STATIC void 1408 xfs_btree_shift_keys( 1409 struct xfs_btree_cur *cur, 1410 union xfs_btree_key *key, 1411 int dir, 1412 int numkeys) 1413 { 1414 char *dst_key; 1415 1416 ASSERT(numkeys >= 0); 1417 ASSERT(dir == 1 || dir == -1); 1418 1419 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1420 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1421 } 1422 1423 /* 1424 * Shift records one index left/right inside a single btree block. 1425 */ 1426 STATIC void 1427 xfs_btree_shift_recs( 1428 struct xfs_btree_cur *cur, 1429 union xfs_btree_rec *rec, 1430 int dir, 1431 int numrecs) 1432 { 1433 char *dst_rec; 1434 1435 ASSERT(numrecs >= 0); 1436 ASSERT(dir == 1 || dir == -1); 1437 1438 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1439 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1440 } 1441 1442 /* 1443 * Shift block pointers one index left/right inside a single btree block. 1444 */ 1445 STATIC void 1446 xfs_btree_shift_ptrs( 1447 struct xfs_btree_cur *cur, 1448 union xfs_btree_ptr *ptr, 1449 int dir, 1450 int numptrs) 1451 { 1452 char *dst_ptr; 1453 1454 ASSERT(numptrs >= 0); 1455 ASSERT(dir == 1 || dir == -1); 1456 1457 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1458 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1459 } 1460 1461 /* 1462 * Log key values from the btree block. 1463 */ 1464 STATIC void 1465 xfs_btree_log_keys( 1466 struct xfs_btree_cur *cur, 1467 struct xfs_buf *bp, 1468 int first, 1469 int last) 1470 { 1471 1472 if (bp) { 1473 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1474 xfs_trans_log_buf(cur->bc_tp, bp, 1475 xfs_btree_key_offset(cur, first), 1476 xfs_btree_key_offset(cur, last + 1) - 1); 1477 } else { 1478 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, 1479 xfs_ilog_fbroot(cur->bc_ino.whichfork)); 1480 } 1481 } 1482 1483 /* 1484 * Log record values from the btree block. 1485 */ 1486 void 1487 xfs_btree_log_recs( 1488 struct xfs_btree_cur *cur, 1489 struct xfs_buf *bp, 1490 int first, 1491 int last) 1492 { 1493 1494 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1495 xfs_trans_log_buf(cur->bc_tp, bp, 1496 xfs_btree_rec_offset(cur, first), 1497 xfs_btree_rec_offset(cur, last + 1) - 1); 1498 1499 } 1500 1501 /* 1502 * Log block pointer fields from a btree block (nonleaf). 1503 */ 1504 STATIC void 1505 xfs_btree_log_ptrs( 1506 struct xfs_btree_cur *cur, /* btree cursor */ 1507 struct xfs_buf *bp, /* buffer containing btree block */ 1508 int first, /* index of first pointer to log */ 1509 int last) /* index of last pointer to log */ 1510 { 1511 1512 if (bp) { 1513 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1514 int level = xfs_btree_get_level(block); 1515 1516 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1517 xfs_trans_log_buf(cur->bc_tp, bp, 1518 xfs_btree_ptr_offset(cur, first, level), 1519 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1520 } else { 1521 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, 1522 xfs_ilog_fbroot(cur->bc_ino.whichfork)); 1523 } 1524 1525 } 1526 1527 /* 1528 * Log fields from a btree block header. 1529 */ 1530 void 1531 xfs_btree_log_block( 1532 struct xfs_btree_cur *cur, /* btree cursor */ 1533 struct xfs_buf *bp, /* buffer containing btree block */ 1534 uint32_t fields) /* mask of fields: XFS_BB_... */ 1535 { 1536 int first; /* first byte offset logged */ 1537 int last; /* last byte offset logged */ 1538 static const short soffsets[] = { /* table of offsets (short) */ 1539 offsetof(struct xfs_btree_block, bb_magic), 1540 offsetof(struct xfs_btree_block, bb_level), 1541 offsetof(struct xfs_btree_block, bb_numrecs), 1542 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1543 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1544 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1545 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1546 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1547 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1548 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1549 XFS_BTREE_SBLOCK_CRC_LEN 1550 }; 1551 static const short loffsets[] = { /* table of offsets (long) */ 1552 offsetof(struct xfs_btree_block, bb_magic), 1553 offsetof(struct xfs_btree_block, bb_level), 1554 offsetof(struct xfs_btree_block, bb_numrecs), 1555 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1556 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1557 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1558 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1559 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1560 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1561 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1562 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1563 XFS_BTREE_LBLOCK_CRC_LEN 1564 }; 1565 1566 if (bp) { 1567 int nbits; 1568 1569 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1570 /* 1571 * We don't log the CRC when updating a btree 1572 * block but instead recreate it during log 1573 * recovery. As the log buffers have checksums 1574 * of their own this is safe and avoids logging a crc 1575 * update in a lot of places. 1576 */ 1577 if (fields == XFS_BB_ALL_BITS) 1578 fields = XFS_BB_ALL_BITS_CRC; 1579 nbits = XFS_BB_NUM_BITS_CRC; 1580 } else { 1581 nbits = XFS_BB_NUM_BITS; 1582 } 1583 xfs_btree_offsets(fields, 1584 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1585 loffsets : soffsets, 1586 nbits, &first, &last); 1587 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1588 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1589 } else { 1590 xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip, 1591 xfs_ilog_fbroot(cur->bc_ino.whichfork)); 1592 } 1593 } 1594 1595 /* 1596 * Increment cursor by one record at the level. 1597 * For nonzero levels the leaf-ward information is untouched. 1598 */ 1599 int /* error */ 1600 xfs_btree_increment( 1601 struct xfs_btree_cur *cur, 1602 int level, 1603 int *stat) /* success/failure */ 1604 { 1605 struct xfs_btree_block *block; 1606 union xfs_btree_ptr ptr; 1607 struct xfs_buf *bp; 1608 int error; /* error return value */ 1609 int lev; 1610 1611 ASSERT(level < cur->bc_nlevels); 1612 1613 /* Read-ahead to the right at this level. */ 1614 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1615 1616 /* Get a pointer to the btree block. */ 1617 block = xfs_btree_get_block(cur, level, &bp); 1618 1619 #ifdef DEBUG 1620 error = xfs_btree_check_block(cur, block, level, bp); 1621 if (error) 1622 goto error0; 1623 #endif 1624 1625 /* We're done if we remain in the block after the increment. */ 1626 if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block)) 1627 goto out1; 1628 1629 /* Fail if we just went off the right edge of the tree. */ 1630 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1631 if (xfs_btree_ptr_is_null(cur, &ptr)) 1632 goto out0; 1633 1634 XFS_BTREE_STATS_INC(cur, increment); 1635 1636 /* 1637 * March up the tree incrementing pointers. 1638 * Stop when we don't go off the right edge of a block. 1639 */ 1640 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1641 block = xfs_btree_get_block(cur, lev, &bp); 1642 1643 #ifdef DEBUG 1644 error = xfs_btree_check_block(cur, block, lev, bp); 1645 if (error) 1646 goto error0; 1647 #endif 1648 1649 if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block)) 1650 break; 1651 1652 /* Read-ahead the right block for the next loop. */ 1653 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1654 } 1655 1656 /* 1657 * If we went off the root then we are either seriously 1658 * confused or have the tree root in an inode. 1659 */ 1660 if (lev == cur->bc_nlevels) { 1661 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1662 goto out0; 1663 ASSERT(0); 1664 error = -EFSCORRUPTED; 1665 goto error0; 1666 } 1667 ASSERT(lev < cur->bc_nlevels); 1668 1669 /* 1670 * Now walk back down the tree, fixing up the cursor's buffer 1671 * pointers and key numbers. 1672 */ 1673 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1674 union xfs_btree_ptr *ptrp; 1675 1676 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); 1677 --lev; 1678 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1679 if (error) 1680 goto error0; 1681 1682 xfs_btree_setbuf(cur, lev, bp); 1683 cur->bc_levels[lev].ptr = 1; 1684 } 1685 out1: 1686 *stat = 1; 1687 return 0; 1688 1689 out0: 1690 *stat = 0; 1691 return 0; 1692 1693 error0: 1694 return error; 1695 } 1696 1697 /* 1698 * Decrement cursor by one record at the level. 1699 * For nonzero levels the leaf-ward information is untouched. 1700 */ 1701 int /* error */ 1702 xfs_btree_decrement( 1703 struct xfs_btree_cur *cur, 1704 int level, 1705 int *stat) /* success/failure */ 1706 { 1707 struct xfs_btree_block *block; 1708 struct xfs_buf *bp; 1709 int error; /* error return value */ 1710 int lev; 1711 union xfs_btree_ptr ptr; 1712 1713 ASSERT(level < cur->bc_nlevels); 1714 1715 /* Read-ahead to the left at this level. */ 1716 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1717 1718 /* We're done if we remain in the block after the decrement. */ 1719 if (--cur->bc_levels[level].ptr > 0) 1720 goto out1; 1721 1722 /* Get a pointer to the btree block. */ 1723 block = xfs_btree_get_block(cur, level, &bp); 1724 1725 #ifdef DEBUG 1726 error = xfs_btree_check_block(cur, block, level, bp); 1727 if (error) 1728 goto error0; 1729 #endif 1730 1731 /* Fail if we just went off the left edge of the tree. */ 1732 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1733 if (xfs_btree_ptr_is_null(cur, &ptr)) 1734 goto out0; 1735 1736 XFS_BTREE_STATS_INC(cur, decrement); 1737 1738 /* 1739 * March up the tree decrementing pointers. 1740 * Stop when we don't go off the left edge of a block. 1741 */ 1742 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1743 if (--cur->bc_levels[lev].ptr > 0) 1744 break; 1745 /* Read-ahead the left block for the next loop. */ 1746 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1747 } 1748 1749 /* 1750 * If we went off the root then we are seriously confused. 1751 * or the root of the tree is in an inode. 1752 */ 1753 if (lev == cur->bc_nlevels) { 1754 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1755 goto out0; 1756 ASSERT(0); 1757 error = -EFSCORRUPTED; 1758 goto error0; 1759 } 1760 ASSERT(lev < cur->bc_nlevels); 1761 1762 /* 1763 * Now walk back down the tree, fixing up the cursor's buffer 1764 * pointers and key numbers. 1765 */ 1766 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1767 union xfs_btree_ptr *ptrp; 1768 1769 ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block); 1770 --lev; 1771 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1772 if (error) 1773 goto error0; 1774 xfs_btree_setbuf(cur, lev, bp); 1775 cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block); 1776 } 1777 out1: 1778 *stat = 1; 1779 return 0; 1780 1781 out0: 1782 *stat = 0; 1783 return 0; 1784 1785 error0: 1786 return error; 1787 } 1788 1789 int 1790 xfs_btree_lookup_get_block( 1791 struct xfs_btree_cur *cur, /* btree cursor */ 1792 int level, /* level in the btree */ 1793 const union xfs_btree_ptr *pp, /* ptr to btree block */ 1794 struct xfs_btree_block **blkp) /* return btree block */ 1795 { 1796 struct xfs_buf *bp; /* buffer pointer for btree block */ 1797 xfs_daddr_t daddr; 1798 int error = 0; 1799 1800 /* special case the root block if in an inode */ 1801 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1802 (level == cur->bc_nlevels - 1)) { 1803 *blkp = xfs_btree_get_iroot(cur); 1804 return 0; 1805 } 1806 1807 /* 1808 * If the old buffer at this level for the disk address we are 1809 * looking for re-use it. 1810 * 1811 * Otherwise throw it away and get a new one. 1812 */ 1813 bp = cur->bc_levels[level].bp; 1814 error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); 1815 if (error) 1816 return error; 1817 if (bp && xfs_buf_daddr(bp) == daddr) { 1818 *blkp = XFS_BUF_TO_BLOCK(bp); 1819 return 0; 1820 } 1821 1822 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1823 if (error) 1824 return error; 1825 1826 /* Check the inode owner since the verifiers don't. */ 1827 if (xfs_has_crc(cur->bc_mp) && 1828 !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) && 1829 (cur->bc_flags & XFS_BTREE_LONG_PTRS) && 1830 be64_to_cpu((*blkp)->bb_u.l.bb_owner) != 1831 cur->bc_ino.ip->i_ino) 1832 goto out_bad; 1833 1834 /* Did we get the level we were looking for? */ 1835 if (be16_to_cpu((*blkp)->bb_level) != level) 1836 goto out_bad; 1837 1838 /* Check that internal nodes have at least one record. */ 1839 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) 1840 goto out_bad; 1841 1842 xfs_btree_setbuf(cur, level, bp); 1843 return 0; 1844 1845 out_bad: 1846 *blkp = NULL; 1847 xfs_buf_mark_corrupt(bp); 1848 xfs_trans_brelse(cur->bc_tp, bp); 1849 return -EFSCORRUPTED; 1850 } 1851 1852 /* 1853 * Get current search key. For level 0 we don't actually have a key 1854 * structure so we make one up from the record. For all other levels 1855 * we just return the right key. 1856 */ 1857 STATIC union xfs_btree_key * 1858 xfs_lookup_get_search_key( 1859 struct xfs_btree_cur *cur, 1860 int level, 1861 int keyno, 1862 struct xfs_btree_block *block, 1863 union xfs_btree_key *kp) 1864 { 1865 if (level == 0) { 1866 cur->bc_ops->init_key_from_rec(kp, 1867 xfs_btree_rec_addr(cur, keyno, block)); 1868 return kp; 1869 } 1870 1871 return xfs_btree_key_addr(cur, keyno, block); 1872 } 1873 1874 /* 1875 * Lookup the record. The cursor is made to point to it, based on dir. 1876 * stat is set to 0 if can't find any such record, 1 for success. 1877 */ 1878 int /* error */ 1879 xfs_btree_lookup( 1880 struct xfs_btree_cur *cur, /* btree cursor */ 1881 xfs_lookup_t dir, /* <=, ==, or >= */ 1882 int *stat) /* success/failure */ 1883 { 1884 struct xfs_btree_block *block; /* current btree block */ 1885 int64_t diff; /* difference for the current key */ 1886 int error; /* error return value */ 1887 int keyno; /* current key number */ 1888 int level; /* level in the btree */ 1889 union xfs_btree_ptr *pp; /* ptr to btree block */ 1890 union xfs_btree_ptr ptr; /* ptr to btree block */ 1891 1892 XFS_BTREE_STATS_INC(cur, lookup); 1893 1894 /* No such thing as a zero-level tree. */ 1895 if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) 1896 return -EFSCORRUPTED; 1897 1898 block = NULL; 1899 keyno = 0; 1900 1901 /* initialise start pointer from cursor */ 1902 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1903 pp = &ptr; 1904 1905 /* 1906 * Iterate over each level in the btree, starting at the root. 1907 * For each level above the leaves, find the key we need, based 1908 * on the lookup record, then follow the corresponding block 1909 * pointer down to the next level. 1910 */ 1911 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1912 /* Get the block we need to do the lookup on. */ 1913 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1914 if (error) 1915 goto error0; 1916 1917 if (diff == 0) { 1918 /* 1919 * If we already had a key match at a higher level, we 1920 * know we need to use the first entry in this block. 1921 */ 1922 keyno = 1; 1923 } else { 1924 /* Otherwise search this block. Do a binary search. */ 1925 1926 int high; /* high entry number */ 1927 int low; /* low entry number */ 1928 1929 /* Set low and high entry numbers, 1-based. */ 1930 low = 1; 1931 high = xfs_btree_get_numrecs(block); 1932 if (!high) { 1933 /* Block is empty, must be an empty leaf. */ 1934 if (level != 0 || cur->bc_nlevels != 1) { 1935 XFS_CORRUPTION_ERROR(__func__, 1936 XFS_ERRLEVEL_LOW, 1937 cur->bc_mp, block, 1938 sizeof(*block)); 1939 return -EFSCORRUPTED; 1940 } 1941 1942 cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE; 1943 *stat = 0; 1944 return 0; 1945 } 1946 1947 /* Binary search the block. */ 1948 while (low <= high) { 1949 union xfs_btree_key key; 1950 union xfs_btree_key *kp; 1951 1952 XFS_BTREE_STATS_INC(cur, compare); 1953 1954 /* keyno is average of low and high. */ 1955 keyno = (low + high) >> 1; 1956 1957 /* Get current search key */ 1958 kp = xfs_lookup_get_search_key(cur, level, 1959 keyno, block, &key); 1960 1961 /* 1962 * Compute difference to get next direction: 1963 * - less than, move right 1964 * - greater than, move left 1965 * - equal, we're done 1966 */ 1967 diff = cur->bc_ops->key_diff(cur, kp); 1968 if (diff < 0) 1969 low = keyno + 1; 1970 else if (diff > 0) 1971 high = keyno - 1; 1972 else 1973 break; 1974 } 1975 } 1976 1977 /* 1978 * If there are more levels, set up for the next level 1979 * by getting the block number and filling in the cursor. 1980 */ 1981 if (level > 0) { 1982 /* 1983 * If we moved left, need the previous key number, 1984 * unless there isn't one. 1985 */ 1986 if (diff > 0 && --keyno < 1) 1987 keyno = 1; 1988 pp = xfs_btree_ptr_addr(cur, keyno, block); 1989 1990 error = xfs_btree_debug_check_ptr(cur, pp, 0, level); 1991 if (error) 1992 goto error0; 1993 1994 cur->bc_levels[level].ptr = keyno; 1995 } 1996 } 1997 1998 /* Done with the search. See if we need to adjust the results. */ 1999 if (dir != XFS_LOOKUP_LE && diff < 0) { 2000 keyno++; 2001 /* 2002 * If ge search and we went off the end of the block, but it's 2003 * not the last block, we're in the wrong block. 2004 */ 2005 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 2006 if (dir == XFS_LOOKUP_GE && 2007 keyno > xfs_btree_get_numrecs(block) && 2008 !xfs_btree_ptr_is_null(cur, &ptr)) { 2009 int i; 2010 2011 cur->bc_levels[0].ptr = keyno; 2012 error = xfs_btree_increment(cur, 0, &i); 2013 if (error) 2014 goto error0; 2015 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) 2016 return -EFSCORRUPTED; 2017 *stat = 1; 2018 return 0; 2019 } 2020 } else if (dir == XFS_LOOKUP_LE && diff > 0) 2021 keyno--; 2022 cur->bc_levels[0].ptr = keyno; 2023 2024 /* Return if we succeeded or not. */ 2025 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 2026 *stat = 0; 2027 else if (dir != XFS_LOOKUP_EQ || diff == 0) 2028 *stat = 1; 2029 else 2030 *stat = 0; 2031 return 0; 2032 2033 error0: 2034 return error; 2035 } 2036 2037 /* Find the high key storage area from a regular key. */ 2038 union xfs_btree_key * 2039 xfs_btree_high_key_from_key( 2040 struct xfs_btree_cur *cur, 2041 union xfs_btree_key *key) 2042 { 2043 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2044 return (union xfs_btree_key *)((char *)key + 2045 (cur->bc_ops->key_len / 2)); 2046 } 2047 2048 /* Determine the low (and high if overlapped) keys of a leaf block */ 2049 STATIC void 2050 xfs_btree_get_leaf_keys( 2051 struct xfs_btree_cur *cur, 2052 struct xfs_btree_block *block, 2053 union xfs_btree_key *key) 2054 { 2055 union xfs_btree_key max_hkey; 2056 union xfs_btree_key hkey; 2057 union xfs_btree_rec *rec; 2058 union xfs_btree_key *high; 2059 int n; 2060 2061 rec = xfs_btree_rec_addr(cur, 1, block); 2062 cur->bc_ops->init_key_from_rec(key, rec); 2063 2064 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2065 2066 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); 2067 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2068 rec = xfs_btree_rec_addr(cur, n, block); 2069 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 2070 if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey)) 2071 max_hkey = hkey; 2072 } 2073 2074 high = xfs_btree_high_key_from_key(cur, key); 2075 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); 2076 } 2077 } 2078 2079 /* Determine the low (and high if overlapped) keys of a node block */ 2080 STATIC void 2081 xfs_btree_get_node_keys( 2082 struct xfs_btree_cur *cur, 2083 struct xfs_btree_block *block, 2084 union xfs_btree_key *key) 2085 { 2086 union xfs_btree_key *hkey; 2087 union xfs_btree_key *max_hkey; 2088 union xfs_btree_key *high; 2089 int n; 2090 2091 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2092 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2093 cur->bc_ops->key_len / 2); 2094 2095 max_hkey = xfs_btree_high_key_addr(cur, 1, block); 2096 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2097 hkey = xfs_btree_high_key_addr(cur, n, block); 2098 if (xfs_btree_keycmp_gt(cur, hkey, max_hkey)) 2099 max_hkey = hkey; 2100 } 2101 2102 high = xfs_btree_high_key_from_key(cur, key); 2103 memcpy(high, max_hkey, cur->bc_ops->key_len / 2); 2104 } else { 2105 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2106 cur->bc_ops->key_len); 2107 } 2108 } 2109 2110 /* Derive the keys for any btree block. */ 2111 void 2112 xfs_btree_get_keys( 2113 struct xfs_btree_cur *cur, 2114 struct xfs_btree_block *block, 2115 union xfs_btree_key *key) 2116 { 2117 if (be16_to_cpu(block->bb_level) == 0) 2118 xfs_btree_get_leaf_keys(cur, block, key); 2119 else 2120 xfs_btree_get_node_keys(cur, block, key); 2121 } 2122 2123 /* 2124 * Decide if we need to update the parent keys of a btree block. For 2125 * a standard btree this is only necessary if we're updating the first 2126 * record/key. For an overlapping btree, we must always update the 2127 * keys because the highest key can be in any of the records or keys 2128 * in the block. 2129 */ 2130 static inline bool 2131 xfs_btree_needs_key_update( 2132 struct xfs_btree_cur *cur, 2133 int ptr) 2134 { 2135 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; 2136 } 2137 2138 /* 2139 * Update the low and high parent keys of the given level, progressing 2140 * towards the root. If force_all is false, stop if the keys for a given 2141 * level do not need updating. 2142 */ 2143 STATIC int 2144 __xfs_btree_updkeys( 2145 struct xfs_btree_cur *cur, 2146 int level, 2147 struct xfs_btree_block *block, 2148 struct xfs_buf *bp0, 2149 bool force_all) 2150 { 2151 union xfs_btree_key key; /* keys from current level */ 2152 union xfs_btree_key *lkey; /* keys from the next level up */ 2153 union xfs_btree_key *hkey; 2154 union xfs_btree_key *nlkey; /* keys from the next level up */ 2155 union xfs_btree_key *nhkey; 2156 struct xfs_buf *bp; 2157 int ptr; 2158 2159 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2160 2161 /* Exit if there aren't any parent levels to update. */ 2162 if (level + 1 >= cur->bc_nlevels) 2163 return 0; 2164 2165 trace_xfs_btree_updkeys(cur, level, bp0); 2166 2167 lkey = &key; 2168 hkey = xfs_btree_high_key_from_key(cur, lkey); 2169 xfs_btree_get_keys(cur, block, lkey); 2170 for (level++; level < cur->bc_nlevels; level++) { 2171 #ifdef DEBUG 2172 int error; 2173 #endif 2174 block = xfs_btree_get_block(cur, level, &bp); 2175 trace_xfs_btree_updkeys(cur, level, bp); 2176 #ifdef DEBUG 2177 error = xfs_btree_check_block(cur, block, level, bp); 2178 if (error) 2179 return error; 2180 #endif 2181 ptr = cur->bc_levels[level].ptr; 2182 nlkey = xfs_btree_key_addr(cur, ptr, block); 2183 nhkey = xfs_btree_high_key_addr(cur, ptr, block); 2184 if (!force_all && 2185 xfs_btree_keycmp_eq(cur, nlkey, lkey) && 2186 xfs_btree_keycmp_eq(cur, nhkey, hkey)) 2187 break; 2188 xfs_btree_copy_keys(cur, nlkey, lkey, 1); 2189 xfs_btree_log_keys(cur, bp, ptr, ptr); 2190 if (level + 1 >= cur->bc_nlevels) 2191 break; 2192 xfs_btree_get_node_keys(cur, block, lkey); 2193 } 2194 2195 return 0; 2196 } 2197 2198 /* Update all the keys from some level in cursor back to the root. */ 2199 STATIC int 2200 xfs_btree_updkeys_force( 2201 struct xfs_btree_cur *cur, 2202 int level) 2203 { 2204 struct xfs_buf *bp; 2205 struct xfs_btree_block *block; 2206 2207 block = xfs_btree_get_block(cur, level, &bp); 2208 return __xfs_btree_updkeys(cur, level, block, bp, true); 2209 } 2210 2211 /* 2212 * Update the parent keys of the given level, progressing towards the root. 2213 */ 2214 STATIC int 2215 xfs_btree_update_keys( 2216 struct xfs_btree_cur *cur, 2217 int level) 2218 { 2219 struct xfs_btree_block *block; 2220 struct xfs_buf *bp; 2221 union xfs_btree_key *kp; 2222 union xfs_btree_key key; 2223 int ptr; 2224 2225 ASSERT(level >= 0); 2226 2227 block = xfs_btree_get_block(cur, level, &bp); 2228 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) 2229 return __xfs_btree_updkeys(cur, level, block, bp, false); 2230 2231 /* 2232 * Go up the tree from this level toward the root. 2233 * At each level, update the key value to the value input. 2234 * Stop when we reach a level where the cursor isn't pointing 2235 * at the first entry in the block. 2236 */ 2237 xfs_btree_get_keys(cur, block, &key); 2238 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2239 #ifdef DEBUG 2240 int error; 2241 #endif 2242 block = xfs_btree_get_block(cur, level, &bp); 2243 #ifdef DEBUG 2244 error = xfs_btree_check_block(cur, block, level, bp); 2245 if (error) 2246 return error; 2247 #endif 2248 ptr = cur->bc_levels[level].ptr; 2249 kp = xfs_btree_key_addr(cur, ptr, block); 2250 xfs_btree_copy_keys(cur, kp, &key, 1); 2251 xfs_btree_log_keys(cur, bp, ptr, ptr); 2252 } 2253 2254 return 0; 2255 } 2256 2257 /* 2258 * Update the record referred to by cur to the value in the 2259 * given record. This either works (return 0) or gets an 2260 * EFSCORRUPTED error. 2261 */ 2262 int 2263 xfs_btree_update( 2264 struct xfs_btree_cur *cur, 2265 union xfs_btree_rec *rec) 2266 { 2267 struct xfs_btree_block *block; 2268 struct xfs_buf *bp; 2269 int error; 2270 int ptr; 2271 union xfs_btree_rec *rp; 2272 2273 /* Pick up the current block. */ 2274 block = xfs_btree_get_block(cur, 0, &bp); 2275 2276 #ifdef DEBUG 2277 error = xfs_btree_check_block(cur, block, 0, bp); 2278 if (error) 2279 goto error0; 2280 #endif 2281 /* Get the address of the rec to be updated. */ 2282 ptr = cur->bc_levels[0].ptr; 2283 rp = xfs_btree_rec_addr(cur, ptr, block); 2284 2285 /* Fill in the new contents and log them. */ 2286 xfs_btree_copy_recs(cur, rp, rec, 1); 2287 xfs_btree_log_recs(cur, bp, ptr, ptr); 2288 2289 /* 2290 * If we are tracking the last record in the tree and 2291 * we are at the far right edge of the tree, update it. 2292 */ 2293 if (xfs_btree_is_lastrec(cur, block, 0)) { 2294 cur->bc_ops->update_lastrec(cur, block, rec, 2295 ptr, LASTREC_UPDATE); 2296 } 2297 2298 /* Pass new key value up to our parent. */ 2299 if (xfs_btree_needs_key_update(cur, ptr)) { 2300 error = xfs_btree_update_keys(cur, 0); 2301 if (error) 2302 goto error0; 2303 } 2304 2305 return 0; 2306 2307 error0: 2308 return error; 2309 } 2310 2311 /* 2312 * Move 1 record left from cur/level if possible. 2313 * Update cur to reflect the new path. 2314 */ 2315 STATIC int /* error */ 2316 xfs_btree_lshift( 2317 struct xfs_btree_cur *cur, 2318 int level, 2319 int *stat) /* success/failure */ 2320 { 2321 struct xfs_buf *lbp; /* left buffer pointer */ 2322 struct xfs_btree_block *left; /* left btree block */ 2323 int lrecs; /* left record count */ 2324 struct xfs_buf *rbp; /* right buffer pointer */ 2325 struct xfs_btree_block *right; /* right btree block */ 2326 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2327 int rrecs; /* right record count */ 2328 union xfs_btree_ptr lptr; /* left btree pointer */ 2329 union xfs_btree_key *rkp = NULL; /* right btree key */ 2330 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2331 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2332 int error; /* error return value */ 2333 int i; 2334 2335 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2336 level == cur->bc_nlevels - 1) 2337 goto out0; 2338 2339 /* Set up variables for this block as "right". */ 2340 right = xfs_btree_get_block(cur, level, &rbp); 2341 2342 #ifdef DEBUG 2343 error = xfs_btree_check_block(cur, right, level, rbp); 2344 if (error) 2345 goto error0; 2346 #endif 2347 2348 /* If we've got no left sibling then we can't shift an entry left. */ 2349 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2350 if (xfs_btree_ptr_is_null(cur, &lptr)) 2351 goto out0; 2352 2353 /* 2354 * If the cursor entry is the one that would be moved, don't 2355 * do it... it's too complicated. 2356 */ 2357 if (cur->bc_levels[level].ptr <= 1) 2358 goto out0; 2359 2360 /* Set up the left neighbor as "left". */ 2361 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2362 if (error) 2363 goto error0; 2364 2365 /* If it's full, it can't take another entry. */ 2366 lrecs = xfs_btree_get_numrecs(left); 2367 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2368 goto out0; 2369 2370 rrecs = xfs_btree_get_numrecs(right); 2371 2372 /* 2373 * We add one entry to the left side and remove one for the right side. 2374 * Account for it here, the changes will be updated on disk and logged 2375 * later. 2376 */ 2377 lrecs++; 2378 rrecs--; 2379 2380 XFS_BTREE_STATS_INC(cur, lshift); 2381 XFS_BTREE_STATS_ADD(cur, moves, 1); 2382 2383 /* 2384 * If non-leaf, copy a key and a ptr to the left block. 2385 * Log the changes to the left block. 2386 */ 2387 if (level > 0) { 2388 /* It's a non-leaf. Move keys and pointers. */ 2389 union xfs_btree_key *lkp; /* left btree key */ 2390 union xfs_btree_ptr *lpp; /* left address pointer */ 2391 2392 lkp = xfs_btree_key_addr(cur, lrecs, left); 2393 rkp = xfs_btree_key_addr(cur, 1, right); 2394 2395 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2396 rpp = xfs_btree_ptr_addr(cur, 1, right); 2397 2398 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level); 2399 if (error) 2400 goto error0; 2401 2402 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2403 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2404 2405 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2406 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2407 2408 ASSERT(cur->bc_ops->keys_inorder(cur, 2409 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2410 } else { 2411 /* It's a leaf. Move records. */ 2412 union xfs_btree_rec *lrp; /* left record pointer */ 2413 2414 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2415 rrp = xfs_btree_rec_addr(cur, 1, right); 2416 2417 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2418 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2419 2420 ASSERT(cur->bc_ops->recs_inorder(cur, 2421 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2422 } 2423 2424 xfs_btree_set_numrecs(left, lrecs); 2425 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2426 2427 xfs_btree_set_numrecs(right, rrecs); 2428 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2429 2430 /* 2431 * Slide the contents of right down one entry. 2432 */ 2433 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2434 if (level > 0) { 2435 /* It's a nonleaf. operate on keys and ptrs */ 2436 for (i = 0; i < rrecs; i++) { 2437 error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level); 2438 if (error) 2439 goto error0; 2440 } 2441 2442 xfs_btree_shift_keys(cur, 2443 xfs_btree_key_addr(cur, 2, right), 2444 -1, rrecs); 2445 xfs_btree_shift_ptrs(cur, 2446 xfs_btree_ptr_addr(cur, 2, right), 2447 -1, rrecs); 2448 2449 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2450 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2451 } else { 2452 /* It's a leaf. operate on records */ 2453 xfs_btree_shift_recs(cur, 2454 xfs_btree_rec_addr(cur, 2, right), 2455 -1, rrecs); 2456 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2457 } 2458 2459 /* 2460 * Using a temporary cursor, update the parent key values of the 2461 * block on the left. 2462 */ 2463 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2464 error = xfs_btree_dup_cursor(cur, &tcur); 2465 if (error) 2466 goto error0; 2467 i = xfs_btree_firstrec(tcur, level); 2468 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { 2469 error = -EFSCORRUPTED; 2470 goto error0; 2471 } 2472 2473 error = xfs_btree_decrement(tcur, level, &i); 2474 if (error) 2475 goto error1; 2476 2477 /* Update the parent high keys of the left block, if needed. */ 2478 error = xfs_btree_update_keys(tcur, level); 2479 if (error) 2480 goto error1; 2481 2482 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2483 } 2484 2485 /* Update the parent keys of the right block. */ 2486 error = xfs_btree_update_keys(cur, level); 2487 if (error) 2488 goto error0; 2489 2490 /* Slide the cursor value left one. */ 2491 cur->bc_levels[level].ptr--; 2492 2493 *stat = 1; 2494 return 0; 2495 2496 out0: 2497 *stat = 0; 2498 return 0; 2499 2500 error0: 2501 return error; 2502 2503 error1: 2504 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2505 return error; 2506 } 2507 2508 /* 2509 * Move 1 record right from cur/level if possible. 2510 * Update cur to reflect the new path. 2511 */ 2512 STATIC int /* error */ 2513 xfs_btree_rshift( 2514 struct xfs_btree_cur *cur, 2515 int level, 2516 int *stat) /* success/failure */ 2517 { 2518 struct xfs_buf *lbp; /* left buffer pointer */ 2519 struct xfs_btree_block *left; /* left btree block */ 2520 struct xfs_buf *rbp; /* right buffer pointer */ 2521 struct xfs_btree_block *right; /* right btree block */ 2522 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2523 union xfs_btree_ptr rptr; /* right block pointer */ 2524 union xfs_btree_key *rkp; /* right btree key */ 2525 int rrecs; /* right record count */ 2526 int lrecs; /* left record count */ 2527 int error; /* error return value */ 2528 int i; /* loop counter */ 2529 2530 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2531 (level == cur->bc_nlevels - 1)) 2532 goto out0; 2533 2534 /* Set up variables for this block as "left". */ 2535 left = xfs_btree_get_block(cur, level, &lbp); 2536 2537 #ifdef DEBUG 2538 error = xfs_btree_check_block(cur, left, level, lbp); 2539 if (error) 2540 goto error0; 2541 #endif 2542 2543 /* If we've got no right sibling then we can't shift an entry right. */ 2544 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2545 if (xfs_btree_ptr_is_null(cur, &rptr)) 2546 goto out0; 2547 2548 /* 2549 * If the cursor entry is the one that would be moved, don't 2550 * do it... it's too complicated. 2551 */ 2552 lrecs = xfs_btree_get_numrecs(left); 2553 if (cur->bc_levels[level].ptr >= lrecs) 2554 goto out0; 2555 2556 /* Set up the right neighbor as "right". */ 2557 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2558 if (error) 2559 goto error0; 2560 2561 /* If it's full, it can't take another entry. */ 2562 rrecs = xfs_btree_get_numrecs(right); 2563 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2564 goto out0; 2565 2566 XFS_BTREE_STATS_INC(cur, rshift); 2567 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2568 2569 /* 2570 * Make a hole at the start of the right neighbor block, then 2571 * copy the last left block entry to the hole. 2572 */ 2573 if (level > 0) { 2574 /* It's a nonleaf. make a hole in the keys and ptrs */ 2575 union xfs_btree_key *lkp; 2576 union xfs_btree_ptr *lpp; 2577 union xfs_btree_ptr *rpp; 2578 2579 lkp = xfs_btree_key_addr(cur, lrecs, left); 2580 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2581 rkp = xfs_btree_key_addr(cur, 1, right); 2582 rpp = xfs_btree_ptr_addr(cur, 1, right); 2583 2584 for (i = rrecs - 1; i >= 0; i--) { 2585 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 2586 if (error) 2587 goto error0; 2588 } 2589 2590 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2591 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2592 2593 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level); 2594 if (error) 2595 goto error0; 2596 2597 /* Now put the new data in, and log it. */ 2598 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2599 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2600 2601 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2602 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2603 2604 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2605 xfs_btree_key_addr(cur, 2, right))); 2606 } else { 2607 /* It's a leaf. make a hole in the records */ 2608 union xfs_btree_rec *lrp; 2609 union xfs_btree_rec *rrp; 2610 2611 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2612 rrp = xfs_btree_rec_addr(cur, 1, right); 2613 2614 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2615 2616 /* Now put the new data in, and log it. */ 2617 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2618 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2619 } 2620 2621 /* 2622 * Decrement and log left's numrecs, bump and log right's numrecs. 2623 */ 2624 xfs_btree_set_numrecs(left, --lrecs); 2625 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2626 2627 xfs_btree_set_numrecs(right, ++rrecs); 2628 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2629 2630 /* 2631 * Using a temporary cursor, update the parent key values of the 2632 * block on the right. 2633 */ 2634 error = xfs_btree_dup_cursor(cur, &tcur); 2635 if (error) 2636 goto error0; 2637 i = xfs_btree_lastrec(tcur, level); 2638 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { 2639 error = -EFSCORRUPTED; 2640 goto error0; 2641 } 2642 2643 error = xfs_btree_increment(tcur, level, &i); 2644 if (error) 2645 goto error1; 2646 2647 /* Update the parent high keys of the left block, if needed. */ 2648 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2649 error = xfs_btree_update_keys(cur, level); 2650 if (error) 2651 goto error1; 2652 } 2653 2654 /* Update the parent keys of the right block. */ 2655 error = xfs_btree_update_keys(tcur, level); 2656 if (error) 2657 goto error1; 2658 2659 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2660 2661 *stat = 1; 2662 return 0; 2663 2664 out0: 2665 *stat = 0; 2666 return 0; 2667 2668 error0: 2669 return error; 2670 2671 error1: 2672 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2673 return error; 2674 } 2675 2676 /* 2677 * Split cur/level block in half. 2678 * Return new block number and the key to its first 2679 * record (to be inserted into parent). 2680 */ 2681 STATIC int /* error */ 2682 __xfs_btree_split( 2683 struct xfs_btree_cur *cur, 2684 int level, 2685 union xfs_btree_ptr *ptrp, 2686 union xfs_btree_key *key, 2687 struct xfs_btree_cur **curp, 2688 int *stat) /* success/failure */ 2689 { 2690 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2691 struct xfs_buf *lbp; /* left buffer pointer */ 2692 struct xfs_btree_block *left; /* left btree block */ 2693 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2694 struct xfs_buf *rbp; /* right buffer pointer */ 2695 struct xfs_btree_block *right; /* right btree block */ 2696 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2697 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2698 struct xfs_btree_block *rrblock; /* right-right btree block */ 2699 int lrecs; 2700 int rrecs; 2701 int src_index; 2702 int error; /* error return value */ 2703 int i; 2704 2705 XFS_BTREE_STATS_INC(cur, split); 2706 2707 /* Set up left block (current one). */ 2708 left = xfs_btree_get_block(cur, level, &lbp); 2709 2710 #ifdef DEBUG 2711 error = xfs_btree_check_block(cur, left, level, lbp); 2712 if (error) 2713 goto error0; 2714 #endif 2715 2716 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2717 2718 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2719 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2720 if (error) 2721 goto error0; 2722 if (*stat == 0) 2723 goto out0; 2724 XFS_BTREE_STATS_INC(cur, alloc); 2725 2726 /* Set up the new block as "right". */ 2727 error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp); 2728 if (error) 2729 goto error0; 2730 2731 /* Fill in the btree header for the new right block. */ 2732 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2733 2734 /* 2735 * Split the entries between the old and the new block evenly. 2736 * Make sure that if there's an odd number of entries now, that 2737 * each new block will have the same number of entries. 2738 */ 2739 lrecs = xfs_btree_get_numrecs(left); 2740 rrecs = lrecs / 2; 2741 if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1) 2742 rrecs++; 2743 src_index = (lrecs - rrecs + 1); 2744 2745 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2746 2747 /* Adjust numrecs for the later get_*_keys() calls. */ 2748 lrecs -= rrecs; 2749 xfs_btree_set_numrecs(left, lrecs); 2750 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2751 2752 /* 2753 * Copy btree block entries from the left block over to the 2754 * new block, the right. Update the right block and log the 2755 * changes. 2756 */ 2757 if (level > 0) { 2758 /* It's a non-leaf. Move keys and pointers. */ 2759 union xfs_btree_key *lkp; /* left btree key */ 2760 union xfs_btree_ptr *lpp; /* left address pointer */ 2761 union xfs_btree_key *rkp; /* right btree key */ 2762 union xfs_btree_ptr *rpp; /* right address pointer */ 2763 2764 lkp = xfs_btree_key_addr(cur, src_index, left); 2765 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2766 rkp = xfs_btree_key_addr(cur, 1, right); 2767 rpp = xfs_btree_ptr_addr(cur, 1, right); 2768 2769 for (i = src_index; i < rrecs; i++) { 2770 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 2771 if (error) 2772 goto error0; 2773 } 2774 2775 /* Copy the keys & pointers to the new block. */ 2776 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2777 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2778 2779 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2780 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2781 2782 /* Stash the keys of the new block for later insertion. */ 2783 xfs_btree_get_node_keys(cur, right, key); 2784 } else { 2785 /* It's a leaf. Move records. */ 2786 union xfs_btree_rec *lrp; /* left record pointer */ 2787 union xfs_btree_rec *rrp; /* right record pointer */ 2788 2789 lrp = xfs_btree_rec_addr(cur, src_index, left); 2790 rrp = xfs_btree_rec_addr(cur, 1, right); 2791 2792 /* Copy records to the new block. */ 2793 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2794 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2795 2796 /* Stash the keys of the new block for later insertion. */ 2797 xfs_btree_get_leaf_keys(cur, right, key); 2798 } 2799 2800 /* 2801 * Find the left block number by looking in the buffer. 2802 * Adjust sibling pointers. 2803 */ 2804 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2805 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2806 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2807 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2808 2809 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2810 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2811 2812 /* 2813 * If there's a block to the new block's right, make that block 2814 * point back to right instead of to left. 2815 */ 2816 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2817 error = xfs_btree_read_buf_block(cur, &rrptr, 2818 0, &rrblock, &rrbp); 2819 if (error) 2820 goto error0; 2821 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2822 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2823 } 2824 2825 /* Update the parent high keys of the left block, if needed. */ 2826 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2827 error = xfs_btree_update_keys(cur, level); 2828 if (error) 2829 goto error0; 2830 } 2831 2832 /* 2833 * If the cursor is really in the right block, move it there. 2834 * If it's just pointing past the last entry in left, then we'll 2835 * insert there, so don't change anything in that case. 2836 */ 2837 if (cur->bc_levels[level].ptr > lrecs + 1) { 2838 xfs_btree_setbuf(cur, level, rbp); 2839 cur->bc_levels[level].ptr -= lrecs; 2840 } 2841 /* 2842 * If there are more levels, we'll need another cursor which refers 2843 * the right block, no matter where this cursor was. 2844 */ 2845 if (level + 1 < cur->bc_nlevels) { 2846 error = xfs_btree_dup_cursor(cur, curp); 2847 if (error) 2848 goto error0; 2849 (*curp)->bc_levels[level + 1].ptr++; 2850 } 2851 *ptrp = rptr; 2852 *stat = 1; 2853 return 0; 2854 out0: 2855 *stat = 0; 2856 return 0; 2857 2858 error0: 2859 return error; 2860 } 2861 2862 #ifdef __KERNEL__ 2863 struct xfs_btree_split_args { 2864 struct xfs_btree_cur *cur; 2865 int level; 2866 union xfs_btree_ptr *ptrp; 2867 union xfs_btree_key *key; 2868 struct xfs_btree_cur **curp; 2869 int *stat; /* success/failure */ 2870 int result; 2871 bool kswapd; /* allocation in kswapd context */ 2872 struct completion *done; 2873 struct work_struct work; 2874 }; 2875 2876 /* 2877 * Stack switching interfaces for allocation 2878 */ 2879 static void 2880 xfs_btree_split_worker( 2881 struct work_struct *work) 2882 { 2883 struct xfs_btree_split_args *args = container_of(work, 2884 struct xfs_btree_split_args, work); 2885 unsigned long pflags; 2886 unsigned long new_pflags = 0; 2887 2888 /* 2889 * we are in a transaction context here, but may also be doing work 2890 * in kswapd context, and hence we may need to inherit that state 2891 * temporarily to ensure that we don't block waiting for memory reclaim 2892 * in any way. 2893 */ 2894 if (args->kswapd) 2895 new_pflags |= PF_MEMALLOC | PF_KSWAPD; 2896 2897 current_set_flags_nested(&pflags, new_pflags); 2898 xfs_trans_set_context(args->cur->bc_tp); 2899 2900 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2901 args->key, args->curp, args->stat); 2902 2903 xfs_trans_clear_context(args->cur->bc_tp); 2904 current_restore_flags_nested(&pflags, new_pflags); 2905 2906 /* 2907 * Do not access args after complete() has run here. We don't own args 2908 * and the owner may run and free args before we return here. 2909 */ 2910 complete(args->done); 2911 2912 } 2913 2914 /* 2915 * BMBT split requests often come in with little stack to work on so we push 2916 * them off to a worker thread so there is lots of stack to use. For the other 2917 * btree types, just call directly to avoid the context switch overhead here. 2918 * 2919 * Care must be taken here - the work queue rescuer thread introduces potential 2920 * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new 2921 * AGFs to allocate blocks. A task being run by the rescuer could attempt to 2922 * lock an AGF that is already locked by a task queued to run by the rescuer, 2923 * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to 2924 * release it until the current thread it is running gains the lock. 2925 * 2926 * To avoid this issue, we only ever queue BMBT splits that don't have an AGF 2927 * already locked to allocate from. The only place that doesn't hold an AGF 2928 * locked is unwritten extent conversion at IO completion, but that has already 2929 * been offloaded to a worker thread and hence has no stack consumption issues 2930 * we have to worry about. 2931 */ 2932 STATIC int /* error */ 2933 xfs_btree_split( 2934 struct xfs_btree_cur *cur, 2935 int level, 2936 union xfs_btree_ptr *ptrp, 2937 union xfs_btree_key *key, 2938 struct xfs_btree_cur **curp, 2939 int *stat) /* success/failure */ 2940 { 2941 struct xfs_btree_split_args args; 2942 DECLARE_COMPLETION_ONSTACK(done); 2943 2944 if (cur->bc_btnum != XFS_BTNUM_BMAP || 2945 cur->bc_tp->t_highest_agno == NULLAGNUMBER) 2946 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2947 2948 args.cur = cur; 2949 args.level = level; 2950 args.ptrp = ptrp; 2951 args.key = key; 2952 args.curp = curp; 2953 args.stat = stat; 2954 args.done = &done; 2955 args.kswapd = current_is_kswapd(); 2956 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2957 queue_work(xfs_alloc_wq, &args.work); 2958 wait_for_completion(&done); 2959 destroy_work_on_stack(&args.work); 2960 return args.result; 2961 } 2962 #else 2963 #define xfs_btree_split __xfs_btree_split 2964 #endif /* __KERNEL__ */ 2965 2966 2967 /* 2968 * Copy the old inode root contents into a real block and make the 2969 * broot point to it. 2970 */ 2971 int /* error */ 2972 xfs_btree_new_iroot( 2973 struct xfs_btree_cur *cur, /* btree cursor */ 2974 int *logflags, /* logging flags for inode */ 2975 int *stat) /* return status - 0 fail */ 2976 { 2977 struct xfs_buf *cbp; /* buffer for cblock */ 2978 struct xfs_btree_block *block; /* btree block */ 2979 struct xfs_btree_block *cblock; /* child btree block */ 2980 union xfs_btree_key *ckp; /* child key pointer */ 2981 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2982 union xfs_btree_key *kp; /* pointer to btree key */ 2983 union xfs_btree_ptr *pp; /* pointer to block addr */ 2984 union xfs_btree_ptr nptr; /* new block addr */ 2985 int level; /* btree level */ 2986 int error; /* error return code */ 2987 int i; /* loop counter */ 2988 2989 XFS_BTREE_STATS_INC(cur, newroot); 2990 2991 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2992 2993 level = cur->bc_nlevels - 1; 2994 2995 block = xfs_btree_get_iroot(cur); 2996 pp = xfs_btree_ptr_addr(cur, 1, block); 2997 2998 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2999 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 3000 if (error) 3001 goto error0; 3002 if (*stat == 0) 3003 return 0; 3004 3005 XFS_BTREE_STATS_INC(cur, alloc); 3006 3007 /* Copy the root into a real block. */ 3008 error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp); 3009 if (error) 3010 goto error0; 3011 3012 /* 3013 * we can't just memcpy() the root in for CRC enabled btree blocks. 3014 * In that case have to also ensure the blkno remains correct 3015 */ 3016 memcpy(cblock, block, xfs_btree_block_len(cur)); 3017 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 3018 __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp)); 3019 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 3020 cblock->bb_u.l.bb_blkno = bno; 3021 else 3022 cblock->bb_u.s.bb_blkno = bno; 3023 } 3024 3025 be16_add_cpu(&block->bb_level, 1); 3026 xfs_btree_set_numrecs(block, 1); 3027 cur->bc_nlevels++; 3028 ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); 3029 cur->bc_levels[level + 1].ptr = 1; 3030 3031 kp = xfs_btree_key_addr(cur, 1, block); 3032 ckp = xfs_btree_key_addr(cur, 1, cblock); 3033 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 3034 3035 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3036 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 3037 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 3038 if (error) 3039 goto error0; 3040 } 3041 3042 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 3043 3044 error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level); 3045 if (error) 3046 goto error0; 3047 3048 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 3049 3050 xfs_iroot_realloc(cur->bc_ino.ip, 3051 1 - xfs_btree_get_numrecs(cblock), 3052 cur->bc_ino.whichfork); 3053 3054 xfs_btree_setbuf(cur, level, cbp); 3055 3056 /* 3057 * Do all this logging at the end so that 3058 * the root is at the right level. 3059 */ 3060 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 3061 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3062 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3063 3064 *logflags |= 3065 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork); 3066 *stat = 1; 3067 return 0; 3068 error0: 3069 return error; 3070 } 3071 3072 /* 3073 * Allocate a new root block, fill it in. 3074 */ 3075 STATIC int /* error */ 3076 xfs_btree_new_root( 3077 struct xfs_btree_cur *cur, /* btree cursor */ 3078 int *stat) /* success/failure */ 3079 { 3080 struct xfs_btree_block *block; /* one half of the old root block */ 3081 struct xfs_buf *bp; /* buffer containing block */ 3082 int error; /* error return value */ 3083 struct xfs_buf *lbp; /* left buffer pointer */ 3084 struct xfs_btree_block *left; /* left btree block */ 3085 struct xfs_buf *nbp; /* new (root) buffer */ 3086 struct xfs_btree_block *new; /* new (root) btree block */ 3087 int nptr; /* new value for key index, 1 or 2 */ 3088 struct xfs_buf *rbp; /* right buffer pointer */ 3089 struct xfs_btree_block *right; /* right btree block */ 3090 union xfs_btree_ptr rptr; 3091 union xfs_btree_ptr lptr; 3092 3093 XFS_BTREE_STATS_INC(cur, newroot); 3094 3095 /* initialise our start point from the cursor */ 3096 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 3097 3098 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 3099 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 3100 if (error) 3101 goto error0; 3102 if (*stat == 0) 3103 goto out0; 3104 XFS_BTREE_STATS_INC(cur, alloc); 3105 3106 /* Set up the new block. */ 3107 error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp); 3108 if (error) 3109 goto error0; 3110 3111 /* Set the root in the holding structure increasing the level by 1. */ 3112 cur->bc_ops->set_root(cur, &lptr, 1); 3113 3114 /* 3115 * At the previous root level there are now two blocks: the old root, 3116 * and the new block generated when it was split. We don't know which 3117 * one the cursor is pointing at, so we set up variables "left" and 3118 * "right" for each case. 3119 */ 3120 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 3121 3122 #ifdef DEBUG 3123 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 3124 if (error) 3125 goto error0; 3126 #endif 3127 3128 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3129 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3130 /* Our block is left, pick up the right block. */ 3131 lbp = bp; 3132 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 3133 left = block; 3134 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3135 if (error) 3136 goto error0; 3137 bp = rbp; 3138 nptr = 1; 3139 } else { 3140 /* Our block is right, pick up the left block. */ 3141 rbp = bp; 3142 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 3143 right = block; 3144 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 3145 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3146 if (error) 3147 goto error0; 3148 bp = lbp; 3149 nptr = 2; 3150 } 3151 3152 /* Fill in the new block's btree header and log it. */ 3153 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3154 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3155 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 3156 !xfs_btree_ptr_is_null(cur, &rptr)); 3157 3158 /* Fill in the key data in the new root. */ 3159 if (xfs_btree_get_level(left) > 0) { 3160 /* 3161 * Get the keys for the left block's keys and put them directly 3162 * in the parent block. Do the same for the right block. 3163 */ 3164 xfs_btree_get_node_keys(cur, left, 3165 xfs_btree_key_addr(cur, 1, new)); 3166 xfs_btree_get_node_keys(cur, right, 3167 xfs_btree_key_addr(cur, 2, new)); 3168 } else { 3169 /* 3170 * Get the keys for the left block's records and put them 3171 * directly in the parent block. Do the same for the right 3172 * block. 3173 */ 3174 xfs_btree_get_leaf_keys(cur, left, 3175 xfs_btree_key_addr(cur, 1, new)); 3176 xfs_btree_get_leaf_keys(cur, right, 3177 xfs_btree_key_addr(cur, 2, new)); 3178 } 3179 xfs_btree_log_keys(cur, nbp, 1, 2); 3180 3181 /* Fill in the pointer data in the new root. */ 3182 xfs_btree_copy_ptrs(cur, 3183 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 3184 xfs_btree_copy_ptrs(cur, 3185 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 3186 xfs_btree_log_ptrs(cur, nbp, 1, 2); 3187 3188 /* Fix up the cursor. */ 3189 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 3190 cur->bc_levels[cur->bc_nlevels].ptr = nptr; 3191 cur->bc_nlevels++; 3192 ASSERT(cur->bc_nlevels <= cur->bc_maxlevels); 3193 *stat = 1; 3194 return 0; 3195 error0: 3196 return error; 3197 out0: 3198 *stat = 0; 3199 return 0; 3200 } 3201 3202 STATIC int 3203 xfs_btree_make_block_unfull( 3204 struct xfs_btree_cur *cur, /* btree cursor */ 3205 int level, /* btree level */ 3206 int numrecs,/* # of recs in block */ 3207 int *oindex,/* old tree index */ 3208 int *index, /* new tree index */ 3209 union xfs_btree_ptr *nptr, /* new btree ptr */ 3210 struct xfs_btree_cur **ncur, /* new btree cursor */ 3211 union xfs_btree_key *key, /* key of new block */ 3212 int *stat) 3213 { 3214 int error = 0; 3215 3216 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3217 level == cur->bc_nlevels - 1) { 3218 struct xfs_inode *ip = cur->bc_ino.ip; 3219 3220 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3221 /* A root block that can be made bigger. */ 3222 xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork); 3223 *stat = 1; 3224 } else { 3225 /* A root block that needs replacing */ 3226 int logflags = 0; 3227 3228 error = xfs_btree_new_iroot(cur, &logflags, stat); 3229 if (error || *stat == 0) 3230 return error; 3231 3232 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 3233 } 3234 3235 return 0; 3236 } 3237 3238 /* First, try shifting an entry to the right neighbor. */ 3239 error = xfs_btree_rshift(cur, level, stat); 3240 if (error || *stat) 3241 return error; 3242 3243 /* Next, try shifting an entry to the left neighbor. */ 3244 error = xfs_btree_lshift(cur, level, stat); 3245 if (error) 3246 return error; 3247 3248 if (*stat) { 3249 *oindex = *index = cur->bc_levels[level].ptr; 3250 return 0; 3251 } 3252 3253 /* 3254 * Next, try splitting the current block in half. 3255 * 3256 * If this works we have to re-set our variables because we 3257 * could be in a different block now. 3258 */ 3259 error = xfs_btree_split(cur, level, nptr, key, ncur, stat); 3260 if (error || *stat == 0) 3261 return error; 3262 3263 3264 *index = cur->bc_levels[level].ptr; 3265 return 0; 3266 } 3267 3268 /* 3269 * Insert one record/level. Return information to the caller 3270 * allowing the next level up to proceed if necessary. 3271 */ 3272 STATIC int 3273 xfs_btree_insrec( 3274 struct xfs_btree_cur *cur, /* btree cursor */ 3275 int level, /* level to insert record at */ 3276 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3277 union xfs_btree_rec *rec, /* record to insert */ 3278 union xfs_btree_key *key, /* i/o: block key for ptrp */ 3279 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3280 int *stat) /* success/failure */ 3281 { 3282 struct xfs_btree_block *block; /* btree block */ 3283 struct xfs_buf *bp; /* buffer for block */ 3284 union xfs_btree_ptr nptr; /* new block ptr */ 3285 struct xfs_btree_cur *ncur = NULL; /* new btree cursor */ 3286 union xfs_btree_key nkey; /* new block key */ 3287 union xfs_btree_key *lkey; 3288 int optr; /* old key/record index */ 3289 int ptr; /* key/record index */ 3290 int numrecs;/* number of records */ 3291 int error; /* error return value */ 3292 int i; 3293 xfs_daddr_t old_bn; 3294 3295 ncur = NULL; 3296 lkey = &nkey; 3297 3298 /* 3299 * If we have an external root pointer, and we've made it to the 3300 * root level, allocate a new root block and we're done. 3301 */ 3302 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3303 (level >= cur->bc_nlevels)) { 3304 error = xfs_btree_new_root(cur, stat); 3305 xfs_btree_set_ptr_null(cur, ptrp); 3306 3307 return error; 3308 } 3309 3310 /* If we're off the left edge, return failure. */ 3311 ptr = cur->bc_levels[level].ptr; 3312 if (ptr == 0) { 3313 *stat = 0; 3314 return 0; 3315 } 3316 3317 optr = ptr; 3318 3319 XFS_BTREE_STATS_INC(cur, insrec); 3320 3321 /* Get pointers to the btree buffer and block. */ 3322 block = xfs_btree_get_block(cur, level, &bp); 3323 old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL; 3324 numrecs = xfs_btree_get_numrecs(block); 3325 3326 #ifdef DEBUG 3327 error = xfs_btree_check_block(cur, block, level, bp); 3328 if (error) 3329 goto error0; 3330 3331 /* Check that the new entry is being inserted in the right place. */ 3332 if (ptr <= numrecs) { 3333 if (level == 0) { 3334 ASSERT(cur->bc_ops->recs_inorder(cur, rec, 3335 xfs_btree_rec_addr(cur, ptr, block))); 3336 } else { 3337 ASSERT(cur->bc_ops->keys_inorder(cur, key, 3338 xfs_btree_key_addr(cur, ptr, block))); 3339 } 3340 } 3341 #endif 3342 3343 /* 3344 * If the block is full, we can't insert the new entry until we 3345 * make the block un-full. 3346 */ 3347 xfs_btree_set_ptr_null(cur, &nptr); 3348 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3349 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3350 &optr, &ptr, &nptr, &ncur, lkey, stat); 3351 if (error || *stat == 0) 3352 goto error0; 3353 } 3354 3355 /* 3356 * The current block may have changed if the block was 3357 * previously full and we have just made space in it. 3358 */ 3359 block = xfs_btree_get_block(cur, level, &bp); 3360 numrecs = xfs_btree_get_numrecs(block); 3361 3362 #ifdef DEBUG 3363 error = xfs_btree_check_block(cur, block, level, bp); 3364 if (error) 3365 goto error0; 3366 #endif 3367 3368 /* 3369 * At this point we know there's room for our new entry in the block 3370 * we're pointing at. 3371 */ 3372 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3373 3374 if (level > 0) { 3375 /* It's a nonleaf. make a hole in the keys and ptrs */ 3376 union xfs_btree_key *kp; 3377 union xfs_btree_ptr *pp; 3378 3379 kp = xfs_btree_key_addr(cur, ptr, block); 3380 pp = xfs_btree_ptr_addr(cur, ptr, block); 3381 3382 for (i = numrecs - ptr; i >= 0; i--) { 3383 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 3384 if (error) 3385 goto error0; 3386 } 3387 3388 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3389 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3390 3391 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level); 3392 if (error) 3393 goto error0; 3394 3395 /* Now put the new data in, bump numrecs and log it. */ 3396 xfs_btree_copy_keys(cur, kp, key, 1); 3397 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3398 numrecs++; 3399 xfs_btree_set_numrecs(block, numrecs); 3400 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3401 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3402 #ifdef DEBUG 3403 if (ptr < numrecs) { 3404 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3405 xfs_btree_key_addr(cur, ptr + 1, block))); 3406 } 3407 #endif 3408 } else { 3409 /* It's a leaf. make a hole in the records */ 3410 union xfs_btree_rec *rp; 3411 3412 rp = xfs_btree_rec_addr(cur, ptr, block); 3413 3414 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3415 3416 /* Now put the new data in, bump numrecs and log it. */ 3417 xfs_btree_copy_recs(cur, rp, rec, 1); 3418 xfs_btree_set_numrecs(block, ++numrecs); 3419 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3420 #ifdef DEBUG 3421 if (ptr < numrecs) { 3422 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3423 xfs_btree_rec_addr(cur, ptr + 1, block))); 3424 } 3425 #endif 3426 } 3427 3428 /* Log the new number of records in the btree header. */ 3429 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3430 3431 /* 3432 * Update btree keys to reflect the newly added record or keyptr. 3433 * There are three cases here to be aware of. Normally, all we have to 3434 * do is walk towards the root, updating keys as necessary. 3435 * 3436 * If the caller had us target a full block for the insertion, we dealt 3437 * with that by calling the _make_block_unfull function. If the 3438 * "make unfull" function splits the block, it'll hand us back the key 3439 * and pointer of the new block. We haven't yet added the new block to 3440 * the next level up, so if we decide to add the new record to the new 3441 * block (bp->b_bn != old_bn), we have to update the caller's pointer 3442 * so that the caller adds the new block with the correct key. 3443 * 3444 * However, there is a third possibility-- if the selected block is the 3445 * root block of an inode-rooted btree and cannot be expanded further, 3446 * the "make unfull" function moves the root block contents to a new 3447 * block and updates the root block to point to the new block. In this 3448 * case, no block pointer is passed back because the block has already 3449 * been added to the btree. In this case, we need to use the regular 3450 * key update function, just like the first case. This is critical for 3451 * overlapping btrees, because the high key must be updated to reflect 3452 * the entire tree, not just the subtree accessible through the first 3453 * child of the root (which is now two levels down from the root). 3454 */ 3455 if (!xfs_btree_ptr_is_null(cur, &nptr) && 3456 bp && xfs_buf_daddr(bp) != old_bn) { 3457 xfs_btree_get_keys(cur, block, lkey); 3458 } else if (xfs_btree_needs_key_update(cur, optr)) { 3459 error = xfs_btree_update_keys(cur, level); 3460 if (error) 3461 goto error0; 3462 } 3463 3464 /* 3465 * If we are tracking the last record in the tree and 3466 * we are at the far right edge of the tree, update it. 3467 */ 3468 if (xfs_btree_is_lastrec(cur, block, level)) { 3469 cur->bc_ops->update_lastrec(cur, block, rec, 3470 ptr, LASTREC_INSREC); 3471 } 3472 3473 /* 3474 * Return the new block number, if any. 3475 * If there is one, give back a record value and a cursor too. 3476 */ 3477 *ptrp = nptr; 3478 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3479 xfs_btree_copy_keys(cur, key, lkey, 1); 3480 *curp = ncur; 3481 } 3482 3483 *stat = 1; 3484 return 0; 3485 3486 error0: 3487 if (ncur) 3488 xfs_btree_del_cursor(ncur, error); 3489 return error; 3490 } 3491 3492 /* 3493 * Insert the record at the point referenced by cur. 3494 * 3495 * A multi-level split of the tree on insert will invalidate the original 3496 * cursor. All callers of this function should assume that the cursor is 3497 * no longer valid and revalidate it. 3498 */ 3499 int 3500 xfs_btree_insert( 3501 struct xfs_btree_cur *cur, 3502 int *stat) 3503 { 3504 int error; /* error return value */ 3505 int i; /* result value, 0 for failure */ 3506 int level; /* current level number in btree */ 3507 union xfs_btree_ptr nptr; /* new block number (split result) */ 3508 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3509 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3510 union xfs_btree_key bkey; /* key of block to insert */ 3511 union xfs_btree_key *key; 3512 union xfs_btree_rec rec; /* record to insert */ 3513 3514 level = 0; 3515 ncur = NULL; 3516 pcur = cur; 3517 key = &bkey; 3518 3519 xfs_btree_set_ptr_null(cur, &nptr); 3520 3521 /* Make a key out of the record data to be inserted, and save it. */ 3522 cur->bc_ops->init_rec_from_cur(cur, &rec); 3523 cur->bc_ops->init_key_from_rec(key, &rec); 3524 3525 /* 3526 * Loop going up the tree, starting at the leaf level. 3527 * Stop when we don't get a split block, that must mean that 3528 * the insert is finished with this level. 3529 */ 3530 do { 3531 /* 3532 * Insert nrec/nptr into this level of the tree. 3533 * Note if we fail, nptr will be null. 3534 */ 3535 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, 3536 &ncur, &i); 3537 if (error) { 3538 if (pcur != cur) 3539 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3540 goto error0; 3541 } 3542 3543 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3544 error = -EFSCORRUPTED; 3545 goto error0; 3546 } 3547 level++; 3548 3549 /* 3550 * See if the cursor we just used is trash. 3551 * Can't trash the caller's cursor, but otherwise we should 3552 * if ncur is a new cursor or we're about to be done. 3553 */ 3554 if (pcur != cur && 3555 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3556 /* Save the state from the cursor before we trash it */ 3557 if (cur->bc_ops->update_cursor) 3558 cur->bc_ops->update_cursor(pcur, cur); 3559 cur->bc_nlevels = pcur->bc_nlevels; 3560 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3561 } 3562 /* If we got a new cursor, switch to it. */ 3563 if (ncur) { 3564 pcur = ncur; 3565 ncur = NULL; 3566 } 3567 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3568 3569 *stat = i; 3570 return 0; 3571 error0: 3572 return error; 3573 } 3574 3575 /* 3576 * Try to merge a non-leaf block back into the inode root. 3577 * 3578 * Note: the killroot names comes from the fact that we're effectively 3579 * killing the old root block. But because we can't just delete the 3580 * inode we have to copy the single block it was pointing to into the 3581 * inode. 3582 */ 3583 STATIC int 3584 xfs_btree_kill_iroot( 3585 struct xfs_btree_cur *cur) 3586 { 3587 int whichfork = cur->bc_ino.whichfork; 3588 struct xfs_inode *ip = cur->bc_ino.ip; 3589 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 3590 struct xfs_btree_block *block; 3591 struct xfs_btree_block *cblock; 3592 union xfs_btree_key *kp; 3593 union xfs_btree_key *ckp; 3594 union xfs_btree_ptr *pp; 3595 union xfs_btree_ptr *cpp; 3596 struct xfs_buf *cbp; 3597 int level; 3598 int index; 3599 int numrecs; 3600 int error; 3601 #ifdef DEBUG 3602 union xfs_btree_ptr ptr; 3603 #endif 3604 int i; 3605 3606 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3607 ASSERT(cur->bc_nlevels > 1); 3608 3609 /* 3610 * Don't deal with the root block needs to be a leaf case. 3611 * We're just going to turn the thing back into extents anyway. 3612 */ 3613 level = cur->bc_nlevels - 1; 3614 if (level == 1) 3615 goto out0; 3616 3617 /* 3618 * Give up if the root has multiple children. 3619 */ 3620 block = xfs_btree_get_iroot(cur); 3621 if (xfs_btree_get_numrecs(block) != 1) 3622 goto out0; 3623 3624 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3625 numrecs = xfs_btree_get_numrecs(cblock); 3626 3627 /* 3628 * Only do this if the next level will fit. 3629 * Then the data must be copied up to the inode, 3630 * instead of freeing the root you free the next level. 3631 */ 3632 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3633 goto out0; 3634 3635 XFS_BTREE_STATS_INC(cur, killroot); 3636 3637 #ifdef DEBUG 3638 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3639 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3640 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3641 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3642 #endif 3643 3644 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3645 if (index) { 3646 xfs_iroot_realloc(cur->bc_ino.ip, index, 3647 cur->bc_ino.whichfork); 3648 block = ifp->if_broot; 3649 } 3650 3651 be16_add_cpu(&block->bb_numrecs, index); 3652 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3653 3654 kp = xfs_btree_key_addr(cur, 1, block); 3655 ckp = xfs_btree_key_addr(cur, 1, cblock); 3656 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3657 3658 pp = xfs_btree_ptr_addr(cur, 1, block); 3659 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3660 3661 for (i = 0; i < numrecs; i++) { 3662 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1); 3663 if (error) 3664 return error; 3665 } 3666 3667 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3668 3669 error = xfs_btree_free_block(cur, cbp); 3670 if (error) 3671 return error; 3672 3673 cur->bc_levels[level - 1].bp = NULL; 3674 be16_add_cpu(&block->bb_level, -1); 3675 xfs_trans_log_inode(cur->bc_tp, ip, 3676 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork)); 3677 cur->bc_nlevels--; 3678 out0: 3679 return 0; 3680 } 3681 3682 /* 3683 * Kill the current root node, and replace it with it's only child node. 3684 */ 3685 STATIC int 3686 xfs_btree_kill_root( 3687 struct xfs_btree_cur *cur, 3688 struct xfs_buf *bp, 3689 int level, 3690 union xfs_btree_ptr *newroot) 3691 { 3692 int error; 3693 3694 XFS_BTREE_STATS_INC(cur, killroot); 3695 3696 /* 3697 * Update the root pointer, decreasing the level by 1 and then 3698 * free the old root. 3699 */ 3700 cur->bc_ops->set_root(cur, newroot, -1); 3701 3702 error = xfs_btree_free_block(cur, bp); 3703 if (error) 3704 return error; 3705 3706 cur->bc_levels[level].bp = NULL; 3707 cur->bc_levels[level].ra = 0; 3708 cur->bc_nlevels--; 3709 3710 return 0; 3711 } 3712 3713 STATIC int 3714 xfs_btree_dec_cursor( 3715 struct xfs_btree_cur *cur, 3716 int level, 3717 int *stat) 3718 { 3719 int error; 3720 int i; 3721 3722 if (level > 0) { 3723 error = xfs_btree_decrement(cur, level, &i); 3724 if (error) 3725 return error; 3726 } 3727 3728 *stat = 1; 3729 return 0; 3730 } 3731 3732 /* 3733 * Single level of the btree record deletion routine. 3734 * Delete record pointed to by cur/level. 3735 * Remove the record from its block then rebalance the tree. 3736 * Return 0 for error, 1 for done, 2 to go on to the next level. 3737 */ 3738 STATIC int /* error */ 3739 xfs_btree_delrec( 3740 struct xfs_btree_cur *cur, /* btree cursor */ 3741 int level, /* level removing record from */ 3742 int *stat) /* fail/done/go-on */ 3743 { 3744 struct xfs_btree_block *block; /* btree block */ 3745 union xfs_btree_ptr cptr; /* current block ptr */ 3746 struct xfs_buf *bp; /* buffer for block */ 3747 int error; /* error return value */ 3748 int i; /* loop counter */ 3749 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3750 struct xfs_buf *lbp; /* left buffer pointer */ 3751 struct xfs_btree_block *left; /* left btree block */ 3752 int lrecs = 0; /* left record count */ 3753 int ptr; /* key/record index */ 3754 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3755 struct xfs_buf *rbp; /* right buffer pointer */ 3756 struct xfs_btree_block *right; /* right btree block */ 3757 struct xfs_btree_block *rrblock; /* right-right btree block */ 3758 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3759 int rrecs = 0; /* right record count */ 3760 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3761 int numrecs; /* temporary numrec count */ 3762 3763 tcur = NULL; 3764 3765 /* Get the index of the entry being deleted, check for nothing there. */ 3766 ptr = cur->bc_levels[level].ptr; 3767 if (ptr == 0) { 3768 *stat = 0; 3769 return 0; 3770 } 3771 3772 /* Get the buffer & block containing the record or key/ptr. */ 3773 block = xfs_btree_get_block(cur, level, &bp); 3774 numrecs = xfs_btree_get_numrecs(block); 3775 3776 #ifdef DEBUG 3777 error = xfs_btree_check_block(cur, block, level, bp); 3778 if (error) 3779 goto error0; 3780 #endif 3781 3782 /* Fail if we're off the end of the block. */ 3783 if (ptr > numrecs) { 3784 *stat = 0; 3785 return 0; 3786 } 3787 3788 XFS_BTREE_STATS_INC(cur, delrec); 3789 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3790 3791 /* Excise the entries being deleted. */ 3792 if (level > 0) { 3793 /* It's a nonleaf. operate on keys and ptrs */ 3794 union xfs_btree_key *lkp; 3795 union xfs_btree_ptr *lpp; 3796 3797 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3798 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3799 3800 for (i = 0; i < numrecs - ptr; i++) { 3801 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 3802 if (error) 3803 goto error0; 3804 } 3805 3806 if (ptr < numrecs) { 3807 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3808 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3809 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3810 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3811 } 3812 } else { 3813 /* It's a leaf. operate on records */ 3814 if (ptr < numrecs) { 3815 xfs_btree_shift_recs(cur, 3816 xfs_btree_rec_addr(cur, ptr + 1, block), 3817 -1, numrecs - ptr); 3818 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3819 } 3820 } 3821 3822 /* 3823 * Decrement and log the number of entries in the block. 3824 */ 3825 xfs_btree_set_numrecs(block, --numrecs); 3826 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3827 3828 /* 3829 * If we are tracking the last record in the tree and 3830 * we are at the far right edge of the tree, update it. 3831 */ 3832 if (xfs_btree_is_lastrec(cur, block, level)) { 3833 cur->bc_ops->update_lastrec(cur, block, NULL, 3834 ptr, LASTREC_DELREC); 3835 } 3836 3837 /* 3838 * We're at the root level. First, shrink the root block in-memory. 3839 * Try to get rid of the next level down. If we can't then there's 3840 * nothing left to do. 3841 */ 3842 if (level == cur->bc_nlevels - 1) { 3843 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3844 xfs_iroot_realloc(cur->bc_ino.ip, -1, 3845 cur->bc_ino.whichfork); 3846 3847 error = xfs_btree_kill_iroot(cur); 3848 if (error) 3849 goto error0; 3850 3851 error = xfs_btree_dec_cursor(cur, level, stat); 3852 if (error) 3853 goto error0; 3854 *stat = 1; 3855 return 0; 3856 } 3857 3858 /* 3859 * If this is the root level, and there's only one entry left, 3860 * and it's NOT the leaf level, then we can get rid of this 3861 * level. 3862 */ 3863 if (numrecs == 1 && level > 0) { 3864 union xfs_btree_ptr *pp; 3865 /* 3866 * pp is still set to the first pointer in the block. 3867 * Make it the new root of the btree. 3868 */ 3869 pp = xfs_btree_ptr_addr(cur, 1, block); 3870 error = xfs_btree_kill_root(cur, bp, level, pp); 3871 if (error) 3872 goto error0; 3873 } else if (level > 0) { 3874 error = xfs_btree_dec_cursor(cur, level, stat); 3875 if (error) 3876 goto error0; 3877 } 3878 *stat = 1; 3879 return 0; 3880 } 3881 3882 /* 3883 * If we deleted the leftmost entry in the block, update the 3884 * key values above us in the tree. 3885 */ 3886 if (xfs_btree_needs_key_update(cur, ptr)) { 3887 error = xfs_btree_update_keys(cur, level); 3888 if (error) 3889 goto error0; 3890 } 3891 3892 /* 3893 * If the number of records remaining in the block is at least 3894 * the minimum, we're done. 3895 */ 3896 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3897 error = xfs_btree_dec_cursor(cur, level, stat); 3898 if (error) 3899 goto error0; 3900 return 0; 3901 } 3902 3903 /* 3904 * Otherwise, we have to move some records around to keep the 3905 * tree balanced. Look at the left and right sibling blocks to 3906 * see if we can re-balance by moving only one record. 3907 */ 3908 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3909 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3910 3911 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3912 /* 3913 * One child of root, need to get a chance to copy its contents 3914 * into the root and delete it. Can't go up to next level, 3915 * there's nothing to delete there. 3916 */ 3917 if (xfs_btree_ptr_is_null(cur, &rptr) && 3918 xfs_btree_ptr_is_null(cur, &lptr) && 3919 level == cur->bc_nlevels - 2) { 3920 error = xfs_btree_kill_iroot(cur); 3921 if (!error) 3922 error = xfs_btree_dec_cursor(cur, level, stat); 3923 if (error) 3924 goto error0; 3925 return 0; 3926 } 3927 } 3928 3929 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3930 !xfs_btree_ptr_is_null(cur, &lptr)); 3931 3932 /* 3933 * Duplicate the cursor so our btree manipulations here won't 3934 * disrupt the next level up. 3935 */ 3936 error = xfs_btree_dup_cursor(cur, &tcur); 3937 if (error) 3938 goto error0; 3939 3940 /* 3941 * If there's a right sibling, see if it's ok to shift an entry 3942 * out of it. 3943 */ 3944 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3945 /* 3946 * Move the temp cursor to the last entry in the next block. 3947 * Actually any entry but the first would suffice. 3948 */ 3949 i = xfs_btree_lastrec(tcur, level); 3950 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3951 error = -EFSCORRUPTED; 3952 goto error0; 3953 } 3954 3955 error = xfs_btree_increment(tcur, level, &i); 3956 if (error) 3957 goto error0; 3958 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3959 error = -EFSCORRUPTED; 3960 goto error0; 3961 } 3962 3963 i = xfs_btree_lastrec(tcur, level); 3964 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3965 error = -EFSCORRUPTED; 3966 goto error0; 3967 } 3968 3969 /* Grab a pointer to the block. */ 3970 right = xfs_btree_get_block(tcur, level, &rbp); 3971 #ifdef DEBUG 3972 error = xfs_btree_check_block(tcur, right, level, rbp); 3973 if (error) 3974 goto error0; 3975 #endif 3976 /* Grab the current block number, for future use. */ 3977 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3978 3979 /* 3980 * If right block is full enough so that removing one entry 3981 * won't make it too empty, and left-shifting an entry out 3982 * of right to us works, we're done. 3983 */ 3984 if (xfs_btree_get_numrecs(right) - 1 >= 3985 cur->bc_ops->get_minrecs(tcur, level)) { 3986 error = xfs_btree_lshift(tcur, level, &i); 3987 if (error) 3988 goto error0; 3989 if (i) { 3990 ASSERT(xfs_btree_get_numrecs(block) >= 3991 cur->bc_ops->get_minrecs(tcur, level)); 3992 3993 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3994 tcur = NULL; 3995 3996 error = xfs_btree_dec_cursor(cur, level, stat); 3997 if (error) 3998 goto error0; 3999 return 0; 4000 } 4001 } 4002 4003 /* 4004 * Otherwise, grab the number of records in right for 4005 * future reference, and fix up the temp cursor to point 4006 * to our block again (last record). 4007 */ 4008 rrecs = xfs_btree_get_numrecs(right); 4009 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 4010 i = xfs_btree_firstrec(tcur, level); 4011 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 4012 error = -EFSCORRUPTED; 4013 goto error0; 4014 } 4015 4016 error = xfs_btree_decrement(tcur, level, &i); 4017 if (error) 4018 goto error0; 4019 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 4020 error = -EFSCORRUPTED; 4021 goto error0; 4022 } 4023 } 4024 } 4025 4026 /* 4027 * If there's a left sibling, see if it's ok to shift an entry 4028 * out of it. 4029 */ 4030 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 4031 /* 4032 * Move the temp cursor to the first entry in the 4033 * previous block. 4034 */ 4035 i = xfs_btree_firstrec(tcur, level); 4036 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 4037 error = -EFSCORRUPTED; 4038 goto error0; 4039 } 4040 4041 error = xfs_btree_decrement(tcur, level, &i); 4042 if (error) 4043 goto error0; 4044 i = xfs_btree_firstrec(tcur, level); 4045 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 4046 error = -EFSCORRUPTED; 4047 goto error0; 4048 } 4049 4050 /* Grab a pointer to the block. */ 4051 left = xfs_btree_get_block(tcur, level, &lbp); 4052 #ifdef DEBUG 4053 error = xfs_btree_check_block(cur, left, level, lbp); 4054 if (error) 4055 goto error0; 4056 #endif 4057 /* Grab the current block number, for future use. */ 4058 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 4059 4060 /* 4061 * If left block is full enough so that removing one entry 4062 * won't make it too empty, and right-shifting an entry out 4063 * of left to us works, we're done. 4064 */ 4065 if (xfs_btree_get_numrecs(left) - 1 >= 4066 cur->bc_ops->get_minrecs(tcur, level)) { 4067 error = xfs_btree_rshift(tcur, level, &i); 4068 if (error) 4069 goto error0; 4070 if (i) { 4071 ASSERT(xfs_btree_get_numrecs(block) >= 4072 cur->bc_ops->get_minrecs(tcur, level)); 4073 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 4074 tcur = NULL; 4075 if (level == 0) 4076 cur->bc_levels[0].ptr++; 4077 4078 *stat = 1; 4079 return 0; 4080 } 4081 } 4082 4083 /* 4084 * Otherwise, grab the number of records in right for 4085 * future reference. 4086 */ 4087 lrecs = xfs_btree_get_numrecs(left); 4088 } 4089 4090 /* Delete the temp cursor, we're done with it. */ 4091 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 4092 tcur = NULL; 4093 4094 /* If here, we need to do a join to keep the tree balanced. */ 4095 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 4096 4097 if (!xfs_btree_ptr_is_null(cur, &lptr) && 4098 lrecs + xfs_btree_get_numrecs(block) <= 4099 cur->bc_ops->get_maxrecs(cur, level)) { 4100 /* 4101 * Set "right" to be the starting block, 4102 * "left" to be the left neighbor. 4103 */ 4104 rptr = cptr; 4105 right = block; 4106 rbp = bp; 4107 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 4108 if (error) 4109 goto error0; 4110 4111 /* 4112 * If that won't work, see if we can join with the right neighbor block. 4113 */ 4114 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 4115 rrecs + xfs_btree_get_numrecs(block) <= 4116 cur->bc_ops->get_maxrecs(cur, level)) { 4117 /* 4118 * Set "left" to be the starting block, 4119 * "right" to be the right neighbor. 4120 */ 4121 lptr = cptr; 4122 left = block; 4123 lbp = bp; 4124 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 4125 if (error) 4126 goto error0; 4127 4128 /* 4129 * Otherwise, we can't fix the imbalance. 4130 * Just return. This is probably a logic error, but it's not fatal. 4131 */ 4132 } else { 4133 error = xfs_btree_dec_cursor(cur, level, stat); 4134 if (error) 4135 goto error0; 4136 return 0; 4137 } 4138 4139 rrecs = xfs_btree_get_numrecs(right); 4140 lrecs = xfs_btree_get_numrecs(left); 4141 4142 /* 4143 * We're now going to join "left" and "right" by moving all the stuff 4144 * in "right" to "left" and deleting "right". 4145 */ 4146 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 4147 if (level > 0) { 4148 /* It's a non-leaf. Move keys and pointers. */ 4149 union xfs_btree_key *lkp; /* left btree key */ 4150 union xfs_btree_ptr *lpp; /* left address pointer */ 4151 union xfs_btree_key *rkp; /* right btree key */ 4152 union xfs_btree_ptr *rpp; /* right address pointer */ 4153 4154 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 4155 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 4156 rkp = xfs_btree_key_addr(cur, 1, right); 4157 rpp = xfs_btree_ptr_addr(cur, 1, right); 4158 4159 for (i = 1; i < rrecs; i++) { 4160 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 4161 if (error) 4162 goto error0; 4163 } 4164 4165 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 4166 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 4167 4168 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 4169 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 4170 } else { 4171 /* It's a leaf. Move records. */ 4172 union xfs_btree_rec *lrp; /* left record pointer */ 4173 union xfs_btree_rec *rrp; /* right record pointer */ 4174 4175 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 4176 rrp = xfs_btree_rec_addr(cur, 1, right); 4177 4178 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 4179 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 4180 } 4181 4182 XFS_BTREE_STATS_INC(cur, join); 4183 4184 /* 4185 * Fix up the number of records and right block pointer in the 4186 * surviving block, and log it. 4187 */ 4188 xfs_btree_set_numrecs(left, lrecs + rrecs); 4189 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB); 4190 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4191 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 4192 4193 /* If there is a right sibling, point it to the remaining block. */ 4194 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4195 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 4196 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 4197 if (error) 4198 goto error0; 4199 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 4200 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 4201 } 4202 4203 /* Free the deleted block. */ 4204 error = xfs_btree_free_block(cur, rbp); 4205 if (error) 4206 goto error0; 4207 4208 /* 4209 * If we joined with the left neighbor, set the buffer in the 4210 * cursor to the left block, and fix up the index. 4211 */ 4212 if (bp != lbp) { 4213 cur->bc_levels[level].bp = lbp; 4214 cur->bc_levels[level].ptr += lrecs; 4215 cur->bc_levels[level].ra = 0; 4216 } 4217 /* 4218 * If we joined with the right neighbor and there's a level above 4219 * us, increment the cursor at that level. 4220 */ 4221 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 4222 (level + 1 < cur->bc_nlevels)) { 4223 error = xfs_btree_increment(cur, level + 1, &i); 4224 if (error) 4225 goto error0; 4226 } 4227 4228 /* 4229 * Readjust the ptr at this level if it's not a leaf, since it's 4230 * still pointing at the deletion point, which makes the cursor 4231 * inconsistent. If this makes the ptr 0, the caller fixes it up. 4232 * We can't use decrement because it would change the next level up. 4233 */ 4234 if (level > 0) 4235 cur->bc_levels[level].ptr--; 4236 4237 /* 4238 * We combined blocks, so we have to update the parent keys if the 4239 * btree supports overlapped intervals. However, 4240 * bc_levels[level + 1].ptr points to the old block so that the caller 4241 * knows which record to delete. Therefore, the caller must be savvy 4242 * enough to call updkeys for us if we return stat == 2. The other 4243 * exit points from this function don't require deletions further up 4244 * the tree, so they can call updkeys directly. 4245 */ 4246 4247 /* Return value means the next level up has something to do. */ 4248 *stat = 2; 4249 return 0; 4250 4251 error0: 4252 if (tcur) 4253 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 4254 return error; 4255 } 4256 4257 /* 4258 * Delete the record pointed to by cur. 4259 * The cursor refers to the place where the record was (could be inserted) 4260 * when the operation returns. 4261 */ 4262 int /* error */ 4263 xfs_btree_delete( 4264 struct xfs_btree_cur *cur, 4265 int *stat) /* success/failure */ 4266 { 4267 int error; /* error return value */ 4268 int level; 4269 int i; 4270 bool joined = false; 4271 4272 /* 4273 * Go up the tree, starting at leaf level. 4274 * 4275 * If 2 is returned then a join was done; go to the next level. 4276 * Otherwise we are done. 4277 */ 4278 for (level = 0, i = 2; i == 2; level++) { 4279 error = xfs_btree_delrec(cur, level, &i); 4280 if (error) 4281 goto error0; 4282 if (i == 2) 4283 joined = true; 4284 } 4285 4286 /* 4287 * If we combined blocks as part of deleting the record, delrec won't 4288 * have updated the parent high keys so we have to do that here. 4289 */ 4290 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { 4291 error = xfs_btree_updkeys_force(cur, 0); 4292 if (error) 4293 goto error0; 4294 } 4295 4296 if (i == 0) { 4297 for (level = 1; level < cur->bc_nlevels; level++) { 4298 if (cur->bc_levels[level].ptr == 0) { 4299 error = xfs_btree_decrement(cur, level, &i); 4300 if (error) 4301 goto error0; 4302 break; 4303 } 4304 } 4305 } 4306 4307 *stat = i; 4308 return 0; 4309 error0: 4310 return error; 4311 } 4312 4313 /* 4314 * Get the data from the pointed-to record. 4315 */ 4316 int /* error */ 4317 xfs_btree_get_rec( 4318 struct xfs_btree_cur *cur, /* btree cursor */ 4319 union xfs_btree_rec **recp, /* output: btree record */ 4320 int *stat) /* output: success/failure */ 4321 { 4322 struct xfs_btree_block *block; /* btree block */ 4323 struct xfs_buf *bp; /* buffer pointer */ 4324 int ptr; /* record number */ 4325 #ifdef DEBUG 4326 int error; /* error return value */ 4327 #endif 4328 4329 ptr = cur->bc_levels[0].ptr; 4330 block = xfs_btree_get_block(cur, 0, &bp); 4331 4332 #ifdef DEBUG 4333 error = xfs_btree_check_block(cur, block, 0, bp); 4334 if (error) 4335 return error; 4336 #endif 4337 4338 /* 4339 * Off the right end or left end, return failure. 4340 */ 4341 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 4342 *stat = 0; 4343 return 0; 4344 } 4345 4346 /* 4347 * Point to the record and extract its data. 4348 */ 4349 *recp = xfs_btree_rec_addr(cur, ptr, block); 4350 *stat = 1; 4351 return 0; 4352 } 4353 4354 /* Visit a block in a btree. */ 4355 STATIC int 4356 xfs_btree_visit_block( 4357 struct xfs_btree_cur *cur, 4358 int level, 4359 xfs_btree_visit_blocks_fn fn, 4360 void *data) 4361 { 4362 struct xfs_btree_block *block; 4363 struct xfs_buf *bp; 4364 union xfs_btree_ptr rptr; 4365 int error; 4366 4367 /* do right sibling readahead */ 4368 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4369 block = xfs_btree_get_block(cur, level, &bp); 4370 4371 /* process the block */ 4372 error = fn(cur, level, data); 4373 if (error) 4374 return error; 4375 4376 /* now read rh sibling block for next iteration */ 4377 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4378 if (xfs_btree_ptr_is_null(cur, &rptr)) 4379 return -ENOENT; 4380 4381 /* 4382 * We only visit blocks once in this walk, so we have to avoid the 4383 * internal xfs_btree_lookup_get_block() optimisation where it will 4384 * return the same block without checking if the right sibling points 4385 * back to us and creates a cyclic reference in the btree. 4386 */ 4387 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4388 if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp, 4389 xfs_buf_daddr(bp))) 4390 return -EFSCORRUPTED; 4391 } else { 4392 if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp, 4393 xfs_buf_daddr(bp))) 4394 return -EFSCORRUPTED; 4395 } 4396 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4397 } 4398 4399 4400 /* Visit every block in a btree. */ 4401 int 4402 xfs_btree_visit_blocks( 4403 struct xfs_btree_cur *cur, 4404 xfs_btree_visit_blocks_fn fn, 4405 unsigned int flags, 4406 void *data) 4407 { 4408 union xfs_btree_ptr lptr; 4409 int level; 4410 struct xfs_btree_block *block = NULL; 4411 int error = 0; 4412 4413 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4414 4415 /* for each level */ 4416 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4417 /* grab the left hand block */ 4418 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4419 if (error) 4420 return error; 4421 4422 /* readahead the left most block for the next level down */ 4423 if (level > 0) { 4424 union xfs_btree_ptr *ptr; 4425 4426 ptr = xfs_btree_ptr_addr(cur, 1, block); 4427 xfs_btree_readahead_ptr(cur, ptr, 1); 4428 4429 /* save for the next iteration of the loop */ 4430 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); 4431 4432 if (!(flags & XFS_BTREE_VISIT_LEAVES)) 4433 continue; 4434 } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) { 4435 continue; 4436 } 4437 4438 /* for each buffer in the level */ 4439 do { 4440 error = xfs_btree_visit_block(cur, level, fn, data); 4441 } while (!error); 4442 4443 if (error != -ENOENT) 4444 return error; 4445 } 4446 4447 return 0; 4448 } 4449 4450 /* 4451 * Change the owner of a btree. 4452 * 4453 * The mechanism we use here is ordered buffer logging. Because we don't know 4454 * how many buffers were are going to need to modify, we don't really want to 4455 * have to make transaction reservations for the worst case of every buffer in a 4456 * full size btree as that may be more space that we can fit in the log.... 4457 * 4458 * We do the btree walk in the most optimal manner possible - we have sibling 4459 * pointers so we can just walk all the blocks on each level from left to right 4460 * in a single pass, and then move to the next level and do the same. We can 4461 * also do readahead on the sibling pointers to get IO moving more quickly, 4462 * though for slow disks this is unlikely to make much difference to performance 4463 * as the amount of CPU work we have to do before moving to the next block is 4464 * relatively small. 4465 * 4466 * For each btree block that we load, modify the owner appropriately, set the 4467 * buffer as an ordered buffer and log it appropriately. We need to ensure that 4468 * we mark the region we change dirty so that if the buffer is relogged in 4469 * a subsequent transaction the changes we make here as an ordered buffer are 4470 * correctly relogged in that transaction. If we are in recovery context, then 4471 * just queue the modified buffer as delayed write buffer so the transaction 4472 * recovery completion writes the changes to disk. 4473 */ 4474 struct xfs_btree_block_change_owner_info { 4475 uint64_t new_owner; 4476 struct list_head *buffer_list; 4477 }; 4478 4479 static int 4480 xfs_btree_block_change_owner( 4481 struct xfs_btree_cur *cur, 4482 int level, 4483 void *data) 4484 { 4485 struct xfs_btree_block_change_owner_info *bbcoi = data; 4486 struct xfs_btree_block *block; 4487 struct xfs_buf *bp; 4488 4489 /* modify the owner */ 4490 block = xfs_btree_get_block(cur, level, &bp); 4491 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4492 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) 4493 return 0; 4494 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4495 } else { 4496 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) 4497 return 0; 4498 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4499 } 4500 4501 /* 4502 * If the block is a root block hosted in an inode, we might not have a 4503 * buffer pointer here and we shouldn't attempt to log the change as the 4504 * information is already held in the inode and discarded when the root 4505 * block is formatted into the on-disk inode fork. We still change it, 4506 * though, so everything is consistent in memory. 4507 */ 4508 if (!bp) { 4509 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4510 ASSERT(level == cur->bc_nlevels - 1); 4511 return 0; 4512 } 4513 4514 if (cur->bc_tp) { 4515 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { 4516 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4517 return -EAGAIN; 4518 } 4519 } else { 4520 xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4521 } 4522 4523 return 0; 4524 } 4525 4526 int 4527 xfs_btree_change_owner( 4528 struct xfs_btree_cur *cur, 4529 uint64_t new_owner, 4530 struct list_head *buffer_list) 4531 { 4532 struct xfs_btree_block_change_owner_info bbcoi; 4533 4534 bbcoi.new_owner = new_owner; 4535 bbcoi.buffer_list = buffer_list; 4536 4537 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, 4538 XFS_BTREE_VISIT_ALL, &bbcoi); 4539 } 4540 4541 /* Verify the v5 fields of a long-format btree block. */ 4542 xfs_failaddr_t 4543 xfs_btree_lblock_v5hdr_verify( 4544 struct xfs_buf *bp, 4545 uint64_t owner) 4546 { 4547 struct xfs_mount *mp = bp->b_mount; 4548 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4549 4550 if (!xfs_has_crc(mp)) 4551 return __this_address; 4552 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4553 return __this_address; 4554 if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) 4555 return __this_address; 4556 if (owner != XFS_RMAP_OWN_UNKNOWN && 4557 be64_to_cpu(block->bb_u.l.bb_owner) != owner) 4558 return __this_address; 4559 return NULL; 4560 } 4561 4562 /* Verify a long-format btree block. */ 4563 xfs_failaddr_t 4564 xfs_btree_lblock_verify( 4565 struct xfs_buf *bp, 4566 unsigned int max_recs) 4567 { 4568 struct xfs_mount *mp = bp->b_mount; 4569 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4570 xfs_fsblock_t fsb; 4571 xfs_failaddr_t fa; 4572 4573 /* numrecs verification */ 4574 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4575 return __this_address; 4576 4577 /* sibling pointer verification */ 4578 fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); 4579 fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, 4580 block->bb_u.l.bb_leftsib); 4581 if (!fa) 4582 fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, 4583 block->bb_u.l.bb_rightsib); 4584 return fa; 4585 } 4586 4587 /** 4588 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4589 * btree block 4590 * 4591 * @bp: buffer containing the btree block 4592 */ 4593 xfs_failaddr_t 4594 xfs_btree_sblock_v5hdr_verify( 4595 struct xfs_buf *bp) 4596 { 4597 struct xfs_mount *mp = bp->b_mount; 4598 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4599 struct xfs_perag *pag = bp->b_pag; 4600 4601 if (!xfs_has_crc(mp)) 4602 return __this_address; 4603 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4604 return __this_address; 4605 if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) 4606 return __this_address; 4607 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4608 return __this_address; 4609 return NULL; 4610 } 4611 4612 /** 4613 * xfs_btree_sblock_verify() -- verify a short-format btree block 4614 * 4615 * @bp: buffer containing the btree block 4616 * @max_recs: maximum records allowed in this btree node 4617 */ 4618 xfs_failaddr_t 4619 xfs_btree_sblock_verify( 4620 struct xfs_buf *bp, 4621 unsigned int max_recs) 4622 { 4623 struct xfs_mount *mp = bp->b_mount; 4624 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4625 xfs_agblock_t agbno; 4626 xfs_failaddr_t fa; 4627 4628 /* numrecs verification */ 4629 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4630 return __this_address; 4631 4632 /* sibling pointer verification */ 4633 agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); 4634 fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno, 4635 block->bb_u.s.bb_leftsib); 4636 if (!fa) 4637 fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno, 4638 block->bb_u.s.bb_rightsib); 4639 return fa; 4640 } 4641 4642 /* 4643 * For the given limits on leaf and keyptr records per block, calculate the 4644 * height of the tree needed to index the number of leaf records. 4645 */ 4646 unsigned int 4647 xfs_btree_compute_maxlevels( 4648 const unsigned int *limits, 4649 unsigned long long records) 4650 { 4651 unsigned long long level_blocks = howmany_64(records, limits[0]); 4652 unsigned int height = 1; 4653 4654 while (level_blocks > 1) { 4655 level_blocks = howmany_64(level_blocks, limits[1]); 4656 height++; 4657 } 4658 4659 return height; 4660 } 4661 4662 /* 4663 * For the given limits on leaf and keyptr records per block, calculate the 4664 * number of blocks needed to index the given number of leaf records. 4665 */ 4666 unsigned long long 4667 xfs_btree_calc_size( 4668 const unsigned int *limits, 4669 unsigned long long records) 4670 { 4671 unsigned long long level_blocks = howmany_64(records, limits[0]); 4672 unsigned long long blocks = level_blocks; 4673 4674 while (level_blocks > 1) { 4675 level_blocks = howmany_64(level_blocks, limits[1]); 4676 blocks += level_blocks; 4677 } 4678 4679 return blocks; 4680 } 4681 4682 /* 4683 * Given a number of available blocks for the btree to consume with records and 4684 * pointers, calculate the height of the tree needed to index all the records 4685 * that space can hold based on the number of pointers each interior node 4686 * holds. 4687 * 4688 * We start by assuming a single level tree consumes a single block, then track 4689 * the number of blocks each node level consumes until we no longer have space 4690 * to store the next node level. At this point, we are indexing all the leaf 4691 * blocks in the space, and there's no more free space to split the tree any 4692 * further. That's our maximum btree height. 4693 */ 4694 unsigned int 4695 xfs_btree_space_to_height( 4696 const unsigned int *limits, 4697 unsigned long long leaf_blocks) 4698 { 4699 /* 4700 * The root btree block can have fewer than minrecs pointers in it 4701 * because the tree might not be big enough to require that amount of 4702 * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. 4703 */ 4704 unsigned long long node_blocks = 2; 4705 unsigned long long blocks_left = leaf_blocks - 1; 4706 unsigned int height = 1; 4707 4708 if (leaf_blocks < 1) 4709 return 0; 4710 4711 while (node_blocks < blocks_left) { 4712 blocks_left -= node_blocks; 4713 node_blocks *= limits[1]; 4714 height++; 4715 } 4716 4717 return height; 4718 } 4719 4720 /* 4721 * Query a regular btree for all records overlapping a given interval. 4722 * Start with a LE lookup of the key of low_rec and return all records 4723 * until we find a record with a key greater than the key of high_rec. 4724 */ 4725 STATIC int 4726 xfs_btree_simple_query_range( 4727 struct xfs_btree_cur *cur, 4728 const union xfs_btree_key *low_key, 4729 const union xfs_btree_key *high_key, 4730 xfs_btree_query_range_fn fn, 4731 void *priv) 4732 { 4733 union xfs_btree_rec *recp; 4734 union xfs_btree_key rec_key; 4735 int stat; 4736 bool firstrec = true; 4737 int error; 4738 4739 ASSERT(cur->bc_ops->init_high_key_from_rec); 4740 ASSERT(cur->bc_ops->diff_two_keys); 4741 4742 /* 4743 * Find the leftmost record. The btree cursor must be set 4744 * to the low record used to generate low_key. 4745 */ 4746 stat = 0; 4747 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 4748 if (error) 4749 goto out; 4750 4751 /* Nothing? See if there's anything to the right. */ 4752 if (!stat) { 4753 error = xfs_btree_increment(cur, 0, &stat); 4754 if (error) 4755 goto out; 4756 } 4757 4758 while (stat) { 4759 /* Find the record. */ 4760 error = xfs_btree_get_rec(cur, &recp, &stat); 4761 if (error || !stat) 4762 break; 4763 4764 /* Skip if low_key > high_key(rec). */ 4765 if (firstrec) { 4766 cur->bc_ops->init_high_key_from_rec(&rec_key, recp); 4767 firstrec = false; 4768 if (xfs_btree_keycmp_gt(cur, low_key, &rec_key)) 4769 goto advloop; 4770 } 4771 4772 /* Stop if low_key(rec) > high_key. */ 4773 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4774 if (xfs_btree_keycmp_gt(cur, &rec_key, high_key)) 4775 break; 4776 4777 /* Callback */ 4778 error = fn(cur, recp, priv); 4779 if (error) 4780 break; 4781 4782 advloop: 4783 /* Move on to the next record. */ 4784 error = xfs_btree_increment(cur, 0, &stat); 4785 if (error) 4786 break; 4787 } 4788 4789 out: 4790 return error; 4791 } 4792 4793 /* 4794 * Query an overlapped interval btree for all records overlapping a given 4795 * interval. This function roughly follows the algorithm given in 4796 * "Interval Trees" of _Introduction to Algorithms_, which is section 4797 * 14.3 in the 2nd and 3rd editions. 4798 * 4799 * First, generate keys for the low and high records passed in. 4800 * 4801 * For any leaf node, generate the high and low keys for the record. 4802 * If the record keys overlap with the query low/high keys, pass the 4803 * record to the function iterator. 4804 * 4805 * For any internal node, compare the low and high keys of each 4806 * pointer against the query low/high keys. If there's an overlap, 4807 * follow the pointer. 4808 * 4809 * As an optimization, we stop scanning a block when we find a low key 4810 * that is greater than the query's high key. 4811 */ 4812 STATIC int 4813 xfs_btree_overlapped_query_range( 4814 struct xfs_btree_cur *cur, 4815 const union xfs_btree_key *low_key, 4816 const union xfs_btree_key *high_key, 4817 xfs_btree_query_range_fn fn, 4818 void *priv) 4819 { 4820 union xfs_btree_ptr ptr; 4821 union xfs_btree_ptr *pp; 4822 union xfs_btree_key rec_key; 4823 union xfs_btree_key rec_hkey; 4824 union xfs_btree_key *lkp; 4825 union xfs_btree_key *hkp; 4826 union xfs_btree_rec *recp; 4827 struct xfs_btree_block *block; 4828 int level; 4829 struct xfs_buf *bp; 4830 int i; 4831 int error; 4832 4833 /* Load the root of the btree. */ 4834 level = cur->bc_nlevels - 1; 4835 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 4836 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); 4837 if (error) 4838 return error; 4839 xfs_btree_get_block(cur, level, &bp); 4840 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4841 #ifdef DEBUG 4842 error = xfs_btree_check_block(cur, block, level, bp); 4843 if (error) 4844 goto out; 4845 #endif 4846 cur->bc_levels[level].ptr = 1; 4847 4848 while (level < cur->bc_nlevels) { 4849 block = xfs_btree_get_block(cur, level, &bp); 4850 4851 /* End of node, pop back towards the root. */ 4852 if (cur->bc_levels[level].ptr > 4853 be16_to_cpu(block->bb_numrecs)) { 4854 pop_up: 4855 if (level < cur->bc_nlevels - 1) 4856 cur->bc_levels[level + 1].ptr++; 4857 level++; 4858 continue; 4859 } 4860 4861 if (level == 0) { 4862 /* Handle a leaf node. */ 4863 recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, 4864 block); 4865 4866 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); 4867 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4868 4869 /* 4870 * If (query's high key < record's low key), then there 4871 * are no more interesting records in this block. Pop 4872 * up to the leaf level to find more record blocks. 4873 * 4874 * If (record's high key >= query's low key) and 4875 * (query's high key >= record's low key), then 4876 * this record overlaps the query range; callback. 4877 */ 4878 if (xfs_btree_keycmp_lt(cur, high_key, &rec_key)) 4879 goto pop_up; 4880 if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) { 4881 error = fn(cur, recp, priv); 4882 if (error) 4883 break; 4884 } 4885 cur->bc_levels[level].ptr++; 4886 continue; 4887 } 4888 4889 /* Handle an internal node. */ 4890 lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); 4891 hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, 4892 block); 4893 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); 4894 4895 /* 4896 * If (query's high key < pointer's low key), then there are no 4897 * more interesting keys in this block. Pop up one leaf level 4898 * to continue looking for records. 4899 * 4900 * If (pointer's high key >= query's low key) and 4901 * (query's high key >= pointer's low key), then 4902 * this record overlaps the query range; follow pointer. 4903 */ 4904 if (xfs_btree_keycmp_lt(cur, high_key, lkp)) 4905 goto pop_up; 4906 if (xfs_btree_keycmp_ge(cur, hkp, low_key)) { 4907 level--; 4908 error = xfs_btree_lookup_get_block(cur, level, pp, 4909 &block); 4910 if (error) 4911 goto out; 4912 xfs_btree_get_block(cur, level, &bp); 4913 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4914 #ifdef DEBUG 4915 error = xfs_btree_check_block(cur, block, level, bp); 4916 if (error) 4917 goto out; 4918 #endif 4919 cur->bc_levels[level].ptr = 1; 4920 continue; 4921 } 4922 cur->bc_levels[level].ptr++; 4923 } 4924 4925 out: 4926 /* 4927 * If we don't end this function with the cursor pointing at a record 4928 * block, a subsequent non-error cursor deletion will not release 4929 * node-level buffers, causing a buffer leak. This is quite possible 4930 * with a zero-results range query, so release the buffers if we 4931 * failed to return any results. 4932 */ 4933 if (cur->bc_levels[0].bp == NULL) { 4934 for (i = 0; i < cur->bc_nlevels; i++) { 4935 if (cur->bc_levels[i].bp) { 4936 xfs_trans_brelse(cur->bc_tp, 4937 cur->bc_levels[i].bp); 4938 cur->bc_levels[i].bp = NULL; 4939 cur->bc_levels[i].ptr = 0; 4940 cur->bc_levels[i].ra = 0; 4941 } 4942 } 4943 } 4944 4945 return error; 4946 } 4947 4948 static inline void 4949 xfs_btree_key_from_irec( 4950 struct xfs_btree_cur *cur, 4951 union xfs_btree_key *key, 4952 const union xfs_btree_irec *irec) 4953 { 4954 union xfs_btree_rec rec; 4955 4956 cur->bc_rec = *irec; 4957 cur->bc_ops->init_rec_from_cur(cur, &rec); 4958 cur->bc_ops->init_key_from_rec(key, &rec); 4959 } 4960 4961 /* 4962 * Query a btree for all records overlapping a given interval of keys. The 4963 * supplied function will be called with each record found; return one of the 4964 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error 4965 * code. This function returns -ECANCELED, zero, or a negative error code. 4966 */ 4967 int 4968 xfs_btree_query_range( 4969 struct xfs_btree_cur *cur, 4970 const union xfs_btree_irec *low_rec, 4971 const union xfs_btree_irec *high_rec, 4972 xfs_btree_query_range_fn fn, 4973 void *priv) 4974 { 4975 union xfs_btree_key low_key; 4976 union xfs_btree_key high_key; 4977 4978 /* Find the keys of both ends of the interval. */ 4979 xfs_btree_key_from_irec(cur, &high_key, high_rec); 4980 xfs_btree_key_from_irec(cur, &low_key, low_rec); 4981 4982 /* Enforce low key <= high key. */ 4983 if (!xfs_btree_keycmp_le(cur, &low_key, &high_key)) 4984 return -EINVAL; 4985 4986 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 4987 return xfs_btree_simple_query_range(cur, &low_key, 4988 &high_key, fn, priv); 4989 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4990 fn, priv); 4991 } 4992 4993 /* Query a btree for all records. */ 4994 int 4995 xfs_btree_query_all( 4996 struct xfs_btree_cur *cur, 4997 xfs_btree_query_range_fn fn, 4998 void *priv) 4999 { 5000 union xfs_btree_key low_key; 5001 union xfs_btree_key high_key; 5002 5003 memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); 5004 memset(&low_key, 0, sizeof(low_key)); 5005 memset(&high_key, 0xFF, sizeof(high_key)); 5006 5007 return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); 5008 } 5009 5010 static int 5011 xfs_btree_count_blocks_helper( 5012 struct xfs_btree_cur *cur, 5013 int level, 5014 void *data) 5015 { 5016 xfs_extlen_t *blocks = data; 5017 (*blocks)++; 5018 5019 return 0; 5020 } 5021 5022 /* Count the blocks in a btree and return the result in *blocks. */ 5023 int 5024 xfs_btree_count_blocks( 5025 struct xfs_btree_cur *cur, 5026 xfs_extlen_t *blocks) 5027 { 5028 *blocks = 0; 5029 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 5030 XFS_BTREE_VISIT_ALL, blocks); 5031 } 5032 5033 /* Compare two btree pointers. */ 5034 int64_t 5035 xfs_btree_diff_two_ptrs( 5036 struct xfs_btree_cur *cur, 5037 const union xfs_btree_ptr *a, 5038 const union xfs_btree_ptr *b) 5039 { 5040 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 5041 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 5042 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 5043 } 5044 5045 struct xfs_btree_has_records { 5046 /* Keys for the start and end of the range we want to know about. */ 5047 union xfs_btree_key start_key; 5048 union xfs_btree_key end_key; 5049 5050 /* Mask for key comparisons, if desired. */ 5051 const union xfs_btree_key *key_mask; 5052 5053 /* Highest record key we've seen so far. */ 5054 union xfs_btree_key high_key; 5055 5056 enum xbtree_recpacking outcome; 5057 }; 5058 5059 STATIC int 5060 xfs_btree_has_records_helper( 5061 struct xfs_btree_cur *cur, 5062 const union xfs_btree_rec *rec, 5063 void *priv) 5064 { 5065 union xfs_btree_key rec_key; 5066 union xfs_btree_key rec_high_key; 5067 struct xfs_btree_has_records *info = priv; 5068 enum xbtree_key_contig key_contig; 5069 5070 cur->bc_ops->init_key_from_rec(&rec_key, rec); 5071 5072 if (info->outcome == XBTREE_RECPACKING_EMPTY) { 5073 info->outcome = XBTREE_RECPACKING_SPARSE; 5074 5075 /* 5076 * If the first record we find does not overlap the start key, 5077 * then there is a hole at the start of the search range. 5078 * Classify this as sparse and stop immediately. 5079 */ 5080 if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key, 5081 info->key_mask)) 5082 return -ECANCELED; 5083 } else { 5084 /* 5085 * If a subsequent record does not overlap with the any record 5086 * we've seen so far, there is a hole in the middle of the 5087 * search range. Classify this as sparse and stop. 5088 * If the keys overlap and this btree does not allow overlap, 5089 * signal corruption. 5090 */ 5091 key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key, 5092 &rec_key, info->key_mask); 5093 if (key_contig == XBTREE_KEY_OVERLAP && 5094 !(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 5095 return -EFSCORRUPTED; 5096 if (key_contig == XBTREE_KEY_GAP) 5097 return -ECANCELED; 5098 } 5099 5100 /* 5101 * If high_key(rec) is larger than any other high key we've seen, 5102 * remember it for later. 5103 */ 5104 cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec); 5105 if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key, 5106 info->key_mask)) 5107 info->high_key = rec_high_key; /* struct copy */ 5108 5109 return 0; 5110 } 5111 5112 /* 5113 * Scan part of the keyspace of a btree and tell us if that keyspace does not 5114 * map to any records; is fully mapped to records; or is partially mapped to 5115 * records. This is the btree record equivalent to determining if a file is 5116 * sparse. 5117 * 5118 * For most btree types, the record scan should use all available btree key 5119 * fields to compare the keys encountered. These callers should pass NULL for 5120 * @mask. However, some callers (e.g. scanning physical space in the rmapbt) 5121 * want to ignore some part of the btree record keyspace when performing the 5122 * comparison. These callers should pass in a union xfs_btree_key object with 5123 * the fields that *should* be a part of the comparison set to any nonzero 5124 * value, and the rest zeroed. 5125 */ 5126 int 5127 xfs_btree_has_records( 5128 struct xfs_btree_cur *cur, 5129 const union xfs_btree_irec *low, 5130 const union xfs_btree_irec *high, 5131 const union xfs_btree_key *mask, 5132 enum xbtree_recpacking *outcome) 5133 { 5134 struct xfs_btree_has_records info = { 5135 .outcome = XBTREE_RECPACKING_EMPTY, 5136 .key_mask = mask, 5137 }; 5138 int error; 5139 5140 /* Not all btrees support this operation. */ 5141 if (!cur->bc_ops->keys_contiguous) { 5142 ASSERT(0); 5143 return -EOPNOTSUPP; 5144 } 5145 5146 xfs_btree_key_from_irec(cur, &info.start_key, low); 5147 xfs_btree_key_from_irec(cur, &info.end_key, high); 5148 5149 error = xfs_btree_query_range(cur, low, high, 5150 xfs_btree_has_records_helper, &info); 5151 if (error == -ECANCELED) 5152 goto out; 5153 if (error) 5154 return error; 5155 5156 if (info.outcome == XBTREE_RECPACKING_EMPTY) 5157 goto out; 5158 5159 /* 5160 * If the largest high_key(rec) we saw during the walk is greater than 5161 * the end of the search range, classify this as full. Otherwise, 5162 * there is a hole at the end of the search range. 5163 */ 5164 if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key, 5165 mask)) 5166 info.outcome = XBTREE_RECPACKING_FULL; 5167 5168 out: 5169 *outcome = info.outcome; 5170 return 0; 5171 } 5172 5173 /* Are there more records in this btree? */ 5174 bool 5175 xfs_btree_has_more_records( 5176 struct xfs_btree_cur *cur) 5177 { 5178 struct xfs_btree_block *block; 5179 struct xfs_buf *bp; 5180 5181 block = xfs_btree_get_block(cur, 0, &bp); 5182 5183 /* There are still records in this block. */ 5184 if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block)) 5185 return true; 5186 5187 /* There are more record blocks. */ 5188 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 5189 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK); 5190 else 5191 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); 5192 } 5193 5194 /* Set up all the btree cursor caches. */ 5195 int __init 5196 xfs_btree_init_cur_caches(void) 5197 { 5198 int error; 5199 5200 error = xfs_allocbt_init_cur_cache(); 5201 if (error) 5202 return error; 5203 error = xfs_inobt_init_cur_cache(); 5204 if (error) 5205 goto err; 5206 error = xfs_bmbt_init_cur_cache(); 5207 if (error) 5208 goto err; 5209 error = xfs_rmapbt_init_cur_cache(); 5210 if (error) 5211 goto err; 5212 error = xfs_refcountbt_init_cur_cache(); 5213 if (error) 5214 goto err; 5215 5216 return 0; 5217 err: 5218 xfs_btree_destroy_cur_caches(); 5219 return error; 5220 } 5221 5222 /* Destroy all the btree cursor caches, if they've been allocated. */ 5223 void 5224 xfs_btree_destroy_cur_caches(void) 5225 { 5226 xfs_allocbt_destroy_cur_cache(); 5227 xfs_inobt_destroy_cur_cache(); 5228 xfs_bmbt_destroy_cur_cache(); 5229 xfs_rmapbt_destroy_cur_cache(); 5230 xfs_refcountbt_destroy_cur_cache(); 5231 } 5232