1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_trans.h" 16 #include "xfs_buf_item.h" 17 #include "xfs_btree.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_alloc.h" 22 #include "xfs_log.h" 23 24 /* 25 * Cursor allocation zone. 26 */ 27 kmem_zone_t *xfs_btree_cur_zone; 28 29 /* 30 * Btree magic numbers. 31 */ 32 static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 33 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 34 XFS_FIBT_MAGIC, 0 }, 35 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, 36 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, 37 XFS_REFC_CRC_MAGIC } 38 }; 39 40 uint32_t 41 xfs_btree_magic( 42 int crc, 43 xfs_btnum_t btnum) 44 { 45 uint32_t magic = xfs_magics[crc][btnum]; 46 47 /* Ensure we asked for crc for crc-only magics. */ 48 ASSERT(magic != 0); 49 return magic; 50 } 51 52 /* 53 * Check a long btree block header. Return the address of the failing check, 54 * or NULL if everything is ok. 55 */ 56 xfs_failaddr_t 57 __xfs_btree_check_lblock( 58 struct xfs_btree_cur *cur, 59 struct xfs_btree_block *block, 60 int level, 61 struct xfs_buf *bp) 62 { 63 struct xfs_mount *mp = cur->bc_mp; 64 xfs_btnum_t btnum = cur->bc_btnum; 65 int crc = xfs_sb_version_hascrc(&mp->m_sb); 66 67 if (crc) { 68 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 69 return __this_address; 70 if (block->bb_u.l.bb_blkno != 71 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 72 return __this_address; 73 if (block->bb_u.l.bb_pad != cpu_to_be32(0)) 74 return __this_address; 75 } 76 77 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 78 return __this_address; 79 if (be16_to_cpu(block->bb_level) != level) 80 return __this_address; 81 if (be16_to_cpu(block->bb_numrecs) > 82 cur->bc_ops->get_maxrecs(cur, level)) 83 return __this_address; 84 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 85 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib), 86 level + 1)) 87 return __this_address; 88 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 89 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib), 90 level + 1)) 91 return __this_address; 92 93 return NULL; 94 } 95 96 /* Check a long btree block header. */ 97 static int 98 xfs_btree_check_lblock( 99 struct xfs_btree_cur *cur, 100 struct xfs_btree_block *block, 101 int level, 102 struct xfs_buf *bp) 103 { 104 struct xfs_mount *mp = cur->bc_mp; 105 xfs_failaddr_t fa; 106 107 fa = __xfs_btree_check_lblock(cur, block, level, bp); 108 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, 109 XFS_ERRTAG_BTREE_CHECK_LBLOCK))) { 110 if (bp) 111 trace_xfs_btree_corrupt(bp, _RET_IP_); 112 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 113 return -EFSCORRUPTED; 114 } 115 return 0; 116 } 117 118 /* 119 * Check a short btree block header. Return the address of the failing check, 120 * or NULL if everything is ok. 121 */ 122 xfs_failaddr_t 123 __xfs_btree_check_sblock( 124 struct xfs_btree_cur *cur, 125 struct xfs_btree_block *block, 126 int level, 127 struct xfs_buf *bp) 128 { 129 struct xfs_mount *mp = cur->bc_mp; 130 xfs_btnum_t btnum = cur->bc_btnum; 131 int crc = xfs_sb_version_hascrc(&mp->m_sb); 132 133 if (crc) { 134 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 135 return __this_address; 136 if (block->bb_u.s.bb_blkno != 137 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 138 return __this_address; 139 } 140 141 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 142 return __this_address; 143 if (be16_to_cpu(block->bb_level) != level) 144 return __this_address; 145 if (be16_to_cpu(block->bb_numrecs) > 146 cur->bc_ops->get_maxrecs(cur, level)) 147 return __this_address; 148 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 149 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib), 150 level + 1)) 151 return __this_address; 152 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 153 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib), 154 level + 1)) 155 return __this_address; 156 157 return NULL; 158 } 159 160 /* Check a short btree block header. */ 161 STATIC int 162 xfs_btree_check_sblock( 163 struct xfs_btree_cur *cur, 164 struct xfs_btree_block *block, 165 int level, 166 struct xfs_buf *bp) 167 { 168 struct xfs_mount *mp = cur->bc_mp; 169 xfs_failaddr_t fa; 170 171 fa = __xfs_btree_check_sblock(cur, block, level, bp); 172 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, 173 XFS_ERRTAG_BTREE_CHECK_SBLOCK))) { 174 if (bp) 175 trace_xfs_btree_corrupt(bp, _RET_IP_); 176 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 177 return -EFSCORRUPTED; 178 } 179 return 0; 180 } 181 182 /* 183 * Debug routine: check that block header is ok. 184 */ 185 int 186 xfs_btree_check_block( 187 struct xfs_btree_cur *cur, /* btree cursor */ 188 struct xfs_btree_block *block, /* generic btree block pointer */ 189 int level, /* level of the btree block */ 190 struct xfs_buf *bp) /* buffer containing block, if any */ 191 { 192 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 193 return xfs_btree_check_lblock(cur, block, level, bp); 194 else 195 return xfs_btree_check_sblock(cur, block, level, bp); 196 } 197 198 /* Check that this long pointer is valid and points within the fs. */ 199 bool 200 xfs_btree_check_lptr( 201 struct xfs_btree_cur *cur, 202 xfs_fsblock_t fsbno, 203 int level) 204 { 205 if (level <= 0) 206 return false; 207 return xfs_verify_fsbno(cur->bc_mp, fsbno); 208 } 209 210 /* Check that this short pointer is valid and points within the AG. */ 211 bool 212 xfs_btree_check_sptr( 213 struct xfs_btree_cur *cur, 214 xfs_agblock_t agbno, 215 int level) 216 { 217 if (level <= 0) 218 return false; 219 return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno); 220 } 221 222 /* 223 * Check that a given (indexed) btree pointer at a certain level of a 224 * btree is valid and doesn't point past where it should. 225 */ 226 static int 227 xfs_btree_check_ptr( 228 struct xfs_btree_cur *cur, 229 union xfs_btree_ptr *ptr, 230 int index, 231 int level) 232 { 233 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 234 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]), 235 level)) 236 return 0; 237 xfs_err(cur->bc_mp, 238 "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.", 239 cur->bc_private.b.ip->i_ino, 240 cur->bc_private.b.whichfork, cur->bc_btnum, 241 level, index); 242 } else { 243 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]), 244 level)) 245 return 0; 246 xfs_err(cur->bc_mp, 247 "AG %u: Corrupt btree %d pointer at level %d index %d.", 248 cur->bc_private.a.agno, cur->bc_btnum, 249 level, index); 250 } 251 252 return -EFSCORRUPTED; 253 } 254 255 #ifdef DEBUG 256 # define xfs_btree_debug_check_ptr xfs_btree_check_ptr 257 #else 258 # define xfs_btree_debug_check_ptr(...) (0) 259 #endif 260 261 /* 262 * Calculate CRC on the whole btree block and stuff it into the 263 * long-form btree header. 264 * 265 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 266 * it into the buffer so recovery knows what the last modification was that made 267 * it to disk. 268 */ 269 void 270 xfs_btree_lblock_calc_crc( 271 struct xfs_buf *bp) 272 { 273 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 274 struct xfs_buf_log_item *bip = bp->b_log_item; 275 276 if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) 277 return; 278 if (bip) 279 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 280 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 281 } 282 283 bool 284 xfs_btree_lblock_verify_crc( 285 struct xfs_buf *bp) 286 { 287 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 288 struct xfs_mount *mp = bp->b_mount; 289 290 if (xfs_sb_version_hascrc(&mp->m_sb)) { 291 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 292 return false; 293 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 294 } 295 296 return true; 297 } 298 299 /* 300 * Calculate CRC on the whole btree block and stuff it into the 301 * short-form btree header. 302 * 303 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 304 * it into the buffer so recovery knows what the last modification was that made 305 * it to disk. 306 */ 307 void 308 xfs_btree_sblock_calc_crc( 309 struct xfs_buf *bp) 310 { 311 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 312 struct xfs_buf_log_item *bip = bp->b_log_item; 313 314 if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) 315 return; 316 if (bip) 317 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 318 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 319 } 320 321 bool 322 xfs_btree_sblock_verify_crc( 323 struct xfs_buf *bp) 324 { 325 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 326 struct xfs_mount *mp = bp->b_mount; 327 328 if (xfs_sb_version_hascrc(&mp->m_sb)) { 329 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 330 return false; 331 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 332 } 333 334 return true; 335 } 336 337 static int 338 xfs_btree_free_block( 339 struct xfs_btree_cur *cur, 340 struct xfs_buf *bp) 341 { 342 int error; 343 344 error = cur->bc_ops->free_block(cur, bp); 345 if (!error) { 346 xfs_trans_binval(cur->bc_tp, bp); 347 XFS_BTREE_STATS_INC(cur, free); 348 } 349 return error; 350 } 351 352 /* 353 * Delete the btree cursor. 354 */ 355 void 356 xfs_btree_del_cursor( 357 xfs_btree_cur_t *cur, /* btree cursor */ 358 int error) /* del because of error */ 359 { 360 int i; /* btree level */ 361 362 /* 363 * Clear the buffer pointers, and release the buffers. 364 * If we're doing this in the face of an error, we 365 * need to make sure to inspect all of the entries 366 * in the bc_bufs array for buffers to be unlocked. 367 * This is because some of the btree code works from 368 * level n down to 0, and if we get an error along 369 * the way we won't have initialized all the entries 370 * down to 0. 371 */ 372 for (i = 0; i < cur->bc_nlevels; i++) { 373 if (cur->bc_bufs[i]) 374 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 375 else if (!error) 376 break; 377 } 378 /* 379 * Can't free a bmap cursor without having dealt with the 380 * allocated indirect blocks' accounting. 381 */ 382 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || 383 cur->bc_private.b.allocated == 0); 384 /* 385 * Free the cursor. 386 */ 387 kmem_zone_free(xfs_btree_cur_zone, cur); 388 } 389 390 /* 391 * Duplicate the btree cursor. 392 * Allocate a new one, copy the record, re-get the buffers. 393 */ 394 int /* error */ 395 xfs_btree_dup_cursor( 396 xfs_btree_cur_t *cur, /* input cursor */ 397 xfs_btree_cur_t **ncur) /* output cursor */ 398 { 399 xfs_buf_t *bp; /* btree block's buffer pointer */ 400 int error; /* error return value */ 401 int i; /* level number of btree block */ 402 xfs_mount_t *mp; /* mount structure for filesystem */ 403 xfs_btree_cur_t *new; /* new cursor value */ 404 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 405 406 tp = cur->bc_tp; 407 mp = cur->bc_mp; 408 409 /* 410 * Allocate a new cursor like the old one. 411 */ 412 new = cur->bc_ops->dup_cursor(cur); 413 414 /* 415 * Copy the record currently in the cursor. 416 */ 417 new->bc_rec = cur->bc_rec; 418 419 /* 420 * For each level current, re-get the buffer and copy the ptr value. 421 */ 422 for (i = 0; i < new->bc_nlevels; i++) { 423 new->bc_ptrs[i] = cur->bc_ptrs[i]; 424 new->bc_ra[i] = cur->bc_ra[i]; 425 bp = cur->bc_bufs[i]; 426 if (bp) { 427 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 428 XFS_BUF_ADDR(bp), mp->m_bsize, 429 0, &bp, 430 cur->bc_ops->buf_ops); 431 if (error) { 432 xfs_btree_del_cursor(new, error); 433 *ncur = NULL; 434 return error; 435 } 436 } 437 new->bc_bufs[i] = bp; 438 } 439 *ncur = new; 440 return 0; 441 } 442 443 /* 444 * XFS btree block layout and addressing: 445 * 446 * There are two types of blocks in the btree: leaf and non-leaf blocks. 447 * 448 * The leaf record start with a header then followed by records containing 449 * the values. A non-leaf block also starts with the same header, and 450 * then first contains lookup keys followed by an equal number of pointers 451 * to the btree blocks at the previous level. 452 * 453 * +--------+-------+-------+-------+-------+-------+-------+ 454 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 455 * +--------+-------+-------+-------+-------+-------+-------+ 456 * 457 * +--------+-------+-------+-------+-------+-------+-------+ 458 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 459 * +--------+-------+-------+-------+-------+-------+-------+ 460 * 461 * The header is called struct xfs_btree_block for reasons better left unknown 462 * and comes in different versions for short (32bit) and long (64bit) block 463 * pointers. The record and key structures are defined by the btree instances 464 * and opaque to the btree core. The block pointers are simple disk endian 465 * integers, available in a short (32bit) and long (64bit) variant. 466 * 467 * The helpers below calculate the offset of a given record, key or pointer 468 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 469 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 470 * inside the btree block is done using indices starting at one, not zero! 471 * 472 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing 473 * overlapping intervals. In such a tree, records are still sorted lowest to 474 * highest and indexed by the smallest key value that refers to the record. 475 * However, nodes are different: each pointer has two associated keys -- one 476 * indexing the lowest key available in the block(s) below (the same behavior 477 * as the key in a regular btree) and another indexing the highest key 478 * available in the block(s) below. Because records are /not/ sorted by the 479 * highest key, all leaf block updates require us to compute the highest key 480 * that matches any record in the leaf and to recursively update the high keys 481 * in the nodes going further up in the tree, if necessary. Nodes look like 482 * this: 483 * 484 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 485 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | 486 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 487 * 488 * To perform an interval query on an overlapped tree, perform the usual 489 * depth-first search and use the low and high keys to decide if we can skip 490 * that particular node. If a leaf node is reached, return the records that 491 * intersect the interval. Note that an interval query may return numerous 492 * entries. For a non-overlapped tree, simply search for the record associated 493 * with the lowest key and iterate forward until a non-matching record is 494 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by 495 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in 496 * more detail. 497 * 498 * Why do we care about overlapping intervals? Let's say you have a bunch of 499 * reverse mapping records on a reflink filesystem: 500 * 501 * 1: +- file A startblock B offset C length D -----------+ 502 * 2: +- file E startblock F offset G length H --------------+ 503 * 3: +- file I startblock F offset J length K --+ 504 * 4: +- file L... --+ 505 * 506 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, 507 * we'd simply increment the length of record 1. But how do we find the record 508 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return 509 * record 3 because the keys are ordered first by startblock. An interval 510 * query would return records 1 and 2 because they both overlap (B+D-1), and 511 * from that we can pick out record 1 as the appropriate left neighbor. 512 * 513 * In the non-overlapped case you can do a LE lookup and decrement the cursor 514 * because a record's interval must end before the next record. 515 */ 516 517 /* 518 * Return size of the btree block header for this btree instance. 519 */ 520 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 521 { 522 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 523 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 524 return XFS_BTREE_LBLOCK_CRC_LEN; 525 return XFS_BTREE_LBLOCK_LEN; 526 } 527 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 528 return XFS_BTREE_SBLOCK_CRC_LEN; 529 return XFS_BTREE_SBLOCK_LEN; 530 } 531 532 /* 533 * Return size of btree block pointers for this btree instance. 534 */ 535 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 536 { 537 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 538 sizeof(__be64) : sizeof(__be32); 539 } 540 541 /* 542 * Calculate offset of the n-th record in a btree block. 543 */ 544 STATIC size_t 545 xfs_btree_rec_offset( 546 struct xfs_btree_cur *cur, 547 int n) 548 { 549 return xfs_btree_block_len(cur) + 550 (n - 1) * cur->bc_ops->rec_len; 551 } 552 553 /* 554 * Calculate offset of the n-th key in a btree block. 555 */ 556 STATIC size_t 557 xfs_btree_key_offset( 558 struct xfs_btree_cur *cur, 559 int n) 560 { 561 return xfs_btree_block_len(cur) + 562 (n - 1) * cur->bc_ops->key_len; 563 } 564 565 /* 566 * Calculate offset of the n-th high key in a btree block. 567 */ 568 STATIC size_t 569 xfs_btree_high_key_offset( 570 struct xfs_btree_cur *cur, 571 int n) 572 { 573 return xfs_btree_block_len(cur) + 574 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); 575 } 576 577 /* 578 * Calculate offset of the n-th block pointer in a btree block. 579 */ 580 STATIC size_t 581 xfs_btree_ptr_offset( 582 struct xfs_btree_cur *cur, 583 int n, 584 int level) 585 { 586 return xfs_btree_block_len(cur) + 587 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 588 (n - 1) * xfs_btree_ptr_len(cur); 589 } 590 591 /* 592 * Return a pointer to the n-th record in the btree block. 593 */ 594 union xfs_btree_rec * 595 xfs_btree_rec_addr( 596 struct xfs_btree_cur *cur, 597 int n, 598 struct xfs_btree_block *block) 599 { 600 return (union xfs_btree_rec *) 601 ((char *)block + xfs_btree_rec_offset(cur, n)); 602 } 603 604 /* 605 * Return a pointer to the n-th key in the btree block. 606 */ 607 union xfs_btree_key * 608 xfs_btree_key_addr( 609 struct xfs_btree_cur *cur, 610 int n, 611 struct xfs_btree_block *block) 612 { 613 return (union xfs_btree_key *) 614 ((char *)block + xfs_btree_key_offset(cur, n)); 615 } 616 617 /* 618 * Return a pointer to the n-th high key in the btree block. 619 */ 620 union xfs_btree_key * 621 xfs_btree_high_key_addr( 622 struct xfs_btree_cur *cur, 623 int n, 624 struct xfs_btree_block *block) 625 { 626 return (union xfs_btree_key *) 627 ((char *)block + xfs_btree_high_key_offset(cur, n)); 628 } 629 630 /* 631 * Return a pointer to the n-th block pointer in the btree block. 632 */ 633 union xfs_btree_ptr * 634 xfs_btree_ptr_addr( 635 struct xfs_btree_cur *cur, 636 int n, 637 struct xfs_btree_block *block) 638 { 639 int level = xfs_btree_get_level(block); 640 641 ASSERT(block->bb_level != 0); 642 643 return (union xfs_btree_ptr *) 644 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 645 } 646 647 /* 648 * Get the root block which is stored in the inode. 649 * 650 * For now this btree implementation assumes the btree root is always 651 * stored in the if_broot field of an inode fork. 652 */ 653 STATIC struct xfs_btree_block * 654 xfs_btree_get_iroot( 655 struct xfs_btree_cur *cur) 656 { 657 struct xfs_ifork *ifp; 658 659 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); 660 return (struct xfs_btree_block *)ifp->if_broot; 661 } 662 663 /* 664 * Retrieve the block pointer from the cursor at the given level. 665 * This may be an inode btree root or from a buffer. 666 */ 667 struct xfs_btree_block * /* generic btree block pointer */ 668 xfs_btree_get_block( 669 struct xfs_btree_cur *cur, /* btree cursor */ 670 int level, /* level in btree */ 671 struct xfs_buf **bpp) /* buffer containing the block */ 672 { 673 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 674 (level == cur->bc_nlevels - 1)) { 675 *bpp = NULL; 676 return xfs_btree_get_iroot(cur); 677 } 678 679 *bpp = cur->bc_bufs[level]; 680 return XFS_BUF_TO_BLOCK(*bpp); 681 } 682 683 /* 684 * Get a buffer for the block, return it with no data read. 685 * Long-form addressing. 686 */ 687 xfs_buf_t * /* buffer for fsbno */ 688 xfs_btree_get_bufl( 689 xfs_mount_t *mp, /* file system mount point */ 690 xfs_trans_t *tp, /* transaction pointer */ 691 xfs_fsblock_t fsbno) /* file system block number */ 692 { 693 xfs_daddr_t d; /* real disk block address */ 694 695 ASSERT(fsbno != NULLFSBLOCK); 696 d = XFS_FSB_TO_DADDR(mp, fsbno); 697 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); 698 } 699 700 /* 701 * Get a buffer for the block, return it with no data read. 702 * Short-form addressing. 703 */ 704 xfs_buf_t * /* buffer for agno/agbno */ 705 xfs_btree_get_bufs( 706 xfs_mount_t *mp, /* file system mount point */ 707 xfs_trans_t *tp, /* transaction pointer */ 708 xfs_agnumber_t agno, /* allocation group number */ 709 xfs_agblock_t agbno) /* allocation group block number */ 710 { 711 xfs_daddr_t d; /* real disk block address */ 712 713 ASSERT(agno != NULLAGNUMBER); 714 ASSERT(agbno != NULLAGBLOCK); 715 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 716 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); 717 } 718 719 /* 720 * Check for the cursor referring to the last block at the given level. 721 */ 722 int /* 1=is last block, 0=not last block */ 723 xfs_btree_islastblock( 724 xfs_btree_cur_t *cur, /* btree cursor */ 725 int level) /* level to check */ 726 { 727 struct xfs_btree_block *block; /* generic btree block pointer */ 728 xfs_buf_t *bp; /* buffer containing block */ 729 730 block = xfs_btree_get_block(cur, level, &bp); 731 xfs_btree_check_block(cur, block, level, bp); 732 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 733 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); 734 else 735 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); 736 } 737 738 /* 739 * Change the cursor to point to the first record at the given level. 740 * Other levels are unaffected. 741 */ 742 STATIC int /* success=1, failure=0 */ 743 xfs_btree_firstrec( 744 xfs_btree_cur_t *cur, /* btree cursor */ 745 int level) /* level to change */ 746 { 747 struct xfs_btree_block *block; /* generic btree block pointer */ 748 xfs_buf_t *bp; /* buffer containing block */ 749 750 /* 751 * Get the block pointer for this level. 752 */ 753 block = xfs_btree_get_block(cur, level, &bp); 754 if (xfs_btree_check_block(cur, block, level, bp)) 755 return 0; 756 /* 757 * It's empty, there is no such record. 758 */ 759 if (!block->bb_numrecs) 760 return 0; 761 /* 762 * Set the ptr value to 1, that's the first record/key. 763 */ 764 cur->bc_ptrs[level] = 1; 765 return 1; 766 } 767 768 /* 769 * Change the cursor to point to the last record in the current block 770 * at the given level. Other levels are unaffected. 771 */ 772 STATIC int /* success=1, failure=0 */ 773 xfs_btree_lastrec( 774 xfs_btree_cur_t *cur, /* btree cursor */ 775 int level) /* level to change */ 776 { 777 struct xfs_btree_block *block; /* generic btree block pointer */ 778 xfs_buf_t *bp; /* buffer containing block */ 779 780 /* 781 * Get the block pointer for this level. 782 */ 783 block = xfs_btree_get_block(cur, level, &bp); 784 if (xfs_btree_check_block(cur, block, level, bp)) 785 return 0; 786 /* 787 * It's empty, there is no such record. 788 */ 789 if (!block->bb_numrecs) 790 return 0; 791 /* 792 * Set the ptr value to numrecs, that's the last record/key. 793 */ 794 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); 795 return 1; 796 } 797 798 /* 799 * Compute first and last byte offsets for the fields given. 800 * Interprets the offsets table, which contains struct field offsets. 801 */ 802 void 803 xfs_btree_offsets( 804 int64_t fields, /* bitmask of fields */ 805 const short *offsets, /* table of field offsets */ 806 int nbits, /* number of bits to inspect */ 807 int *first, /* output: first byte offset */ 808 int *last) /* output: last byte offset */ 809 { 810 int i; /* current bit number */ 811 int64_t imask; /* mask for current bit number */ 812 813 ASSERT(fields != 0); 814 /* 815 * Find the lowest bit, so the first byte offset. 816 */ 817 for (i = 0, imask = 1LL; ; i++, imask <<= 1) { 818 if (imask & fields) { 819 *first = offsets[i]; 820 break; 821 } 822 } 823 /* 824 * Find the highest bit, so the last byte offset. 825 */ 826 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { 827 if (imask & fields) { 828 *last = offsets[i + 1] - 1; 829 break; 830 } 831 } 832 } 833 834 /* 835 * Get a buffer for the block, return it read in. 836 * Long-form addressing. 837 */ 838 int 839 xfs_btree_read_bufl( 840 struct xfs_mount *mp, /* file system mount point */ 841 struct xfs_trans *tp, /* transaction pointer */ 842 xfs_fsblock_t fsbno, /* file system block number */ 843 struct xfs_buf **bpp, /* buffer for fsbno */ 844 int refval, /* ref count value for buffer */ 845 const struct xfs_buf_ops *ops) 846 { 847 struct xfs_buf *bp; /* return value */ 848 xfs_daddr_t d; /* real disk block address */ 849 int error; 850 851 if (!xfs_verify_fsbno(mp, fsbno)) 852 return -EFSCORRUPTED; 853 d = XFS_FSB_TO_DADDR(mp, fsbno); 854 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 855 mp->m_bsize, 0, &bp, ops); 856 if (error) 857 return error; 858 if (bp) 859 xfs_buf_set_ref(bp, refval); 860 *bpp = bp; 861 return 0; 862 } 863 864 /* 865 * Read-ahead the block, don't wait for it, don't return a buffer. 866 * Long-form addressing. 867 */ 868 /* ARGSUSED */ 869 void 870 xfs_btree_reada_bufl( 871 struct xfs_mount *mp, /* file system mount point */ 872 xfs_fsblock_t fsbno, /* file system block number */ 873 xfs_extlen_t count, /* count of filesystem blocks */ 874 const struct xfs_buf_ops *ops) 875 { 876 xfs_daddr_t d; 877 878 ASSERT(fsbno != NULLFSBLOCK); 879 d = XFS_FSB_TO_DADDR(mp, fsbno); 880 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 881 } 882 883 /* 884 * Read-ahead the block, don't wait for it, don't return a buffer. 885 * Short-form addressing. 886 */ 887 /* ARGSUSED */ 888 void 889 xfs_btree_reada_bufs( 890 struct xfs_mount *mp, /* file system mount point */ 891 xfs_agnumber_t agno, /* allocation group number */ 892 xfs_agblock_t agbno, /* allocation group block number */ 893 xfs_extlen_t count, /* count of filesystem blocks */ 894 const struct xfs_buf_ops *ops) 895 { 896 xfs_daddr_t d; 897 898 ASSERT(agno != NULLAGNUMBER); 899 ASSERT(agbno != NULLAGBLOCK); 900 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 901 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 902 } 903 904 STATIC int 905 xfs_btree_readahead_lblock( 906 struct xfs_btree_cur *cur, 907 int lr, 908 struct xfs_btree_block *block) 909 { 910 int rval = 0; 911 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 912 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 913 914 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 915 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 916 cur->bc_ops->buf_ops); 917 rval++; 918 } 919 920 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 921 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 922 cur->bc_ops->buf_ops); 923 rval++; 924 } 925 926 return rval; 927 } 928 929 STATIC int 930 xfs_btree_readahead_sblock( 931 struct xfs_btree_cur *cur, 932 int lr, 933 struct xfs_btree_block *block) 934 { 935 int rval = 0; 936 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 937 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 938 939 940 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 941 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 942 left, 1, cur->bc_ops->buf_ops); 943 rval++; 944 } 945 946 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 947 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 948 right, 1, cur->bc_ops->buf_ops); 949 rval++; 950 } 951 952 return rval; 953 } 954 955 /* 956 * Read-ahead btree blocks, at the given level. 957 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 958 */ 959 STATIC int 960 xfs_btree_readahead( 961 struct xfs_btree_cur *cur, /* btree cursor */ 962 int lev, /* level in btree */ 963 int lr) /* left/right bits */ 964 { 965 struct xfs_btree_block *block; 966 967 /* 968 * No readahead needed if we are at the root level and the 969 * btree root is stored in the inode. 970 */ 971 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 972 (lev == cur->bc_nlevels - 1)) 973 return 0; 974 975 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) 976 return 0; 977 978 cur->bc_ra[lev] |= lr; 979 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); 980 981 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 982 return xfs_btree_readahead_lblock(cur, lr, block); 983 return xfs_btree_readahead_sblock(cur, lr, block); 984 } 985 986 STATIC int 987 xfs_btree_ptr_to_daddr( 988 struct xfs_btree_cur *cur, 989 union xfs_btree_ptr *ptr, 990 xfs_daddr_t *daddr) 991 { 992 xfs_fsblock_t fsbno; 993 xfs_agblock_t agbno; 994 int error; 995 996 error = xfs_btree_check_ptr(cur, ptr, 0, 1); 997 if (error) 998 return error; 999 1000 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1001 fsbno = be64_to_cpu(ptr->l); 1002 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); 1003 } else { 1004 agbno = be32_to_cpu(ptr->s); 1005 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, 1006 agbno); 1007 } 1008 1009 return 0; 1010 } 1011 1012 /* 1013 * Readahead @count btree blocks at the given @ptr location. 1014 * 1015 * We don't need to care about long or short form btrees here as we have a 1016 * method of converting the ptr directly to a daddr available to us. 1017 */ 1018 STATIC void 1019 xfs_btree_readahead_ptr( 1020 struct xfs_btree_cur *cur, 1021 union xfs_btree_ptr *ptr, 1022 xfs_extlen_t count) 1023 { 1024 xfs_daddr_t daddr; 1025 1026 if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr)) 1027 return; 1028 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr, 1029 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 1030 } 1031 1032 /* 1033 * Set the buffer for level "lev" in the cursor to bp, releasing 1034 * any previous buffer. 1035 */ 1036 STATIC void 1037 xfs_btree_setbuf( 1038 xfs_btree_cur_t *cur, /* btree cursor */ 1039 int lev, /* level in btree */ 1040 xfs_buf_t *bp) /* new buffer to set */ 1041 { 1042 struct xfs_btree_block *b; /* btree block */ 1043 1044 if (cur->bc_bufs[lev]) 1045 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); 1046 cur->bc_bufs[lev] = bp; 1047 cur->bc_ra[lev] = 0; 1048 1049 b = XFS_BUF_TO_BLOCK(bp); 1050 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1051 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 1052 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1053 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 1054 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1055 } else { 1056 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 1057 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1058 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 1059 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1060 } 1061 } 1062 1063 bool 1064 xfs_btree_ptr_is_null( 1065 struct xfs_btree_cur *cur, 1066 union xfs_btree_ptr *ptr) 1067 { 1068 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1069 return ptr->l == cpu_to_be64(NULLFSBLOCK); 1070 else 1071 return ptr->s == cpu_to_be32(NULLAGBLOCK); 1072 } 1073 1074 STATIC void 1075 xfs_btree_set_ptr_null( 1076 struct xfs_btree_cur *cur, 1077 union xfs_btree_ptr *ptr) 1078 { 1079 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1080 ptr->l = cpu_to_be64(NULLFSBLOCK); 1081 else 1082 ptr->s = cpu_to_be32(NULLAGBLOCK); 1083 } 1084 1085 /* 1086 * Get/set/init sibling pointers 1087 */ 1088 void 1089 xfs_btree_get_sibling( 1090 struct xfs_btree_cur *cur, 1091 struct xfs_btree_block *block, 1092 union xfs_btree_ptr *ptr, 1093 int lr) 1094 { 1095 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1096 1097 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1098 if (lr == XFS_BB_RIGHTSIB) 1099 ptr->l = block->bb_u.l.bb_rightsib; 1100 else 1101 ptr->l = block->bb_u.l.bb_leftsib; 1102 } else { 1103 if (lr == XFS_BB_RIGHTSIB) 1104 ptr->s = block->bb_u.s.bb_rightsib; 1105 else 1106 ptr->s = block->bb_u.s.bb_leftsib; 1107 } 1108 } 1109 1110 STATIC void 1111 xfs_btree_set_sibling( 1112 struct xfs_btree_cur *cur, 1113 struct xfs_btree_block *block, 1114 union xfs_btree_ptr *ptr, 1115 int lr) 1116 { 1117 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1118 1119 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1120 if (lr == XFS_BB_RIGHTSIB) 1121 block->bb_u.l.bb_rightsib = ptr->l; 1122 else 1123 block->bb_u.l.bb_leftsib = ptr->l; 1124 } else { 1125 if (lr == XFS_BB_RIGHTSIB) 1126 block->bb_u.s.bb_rightsib = ptr->s; 1127 else 1128 block->bb_u.s.bb_leftsib = ptr->s; 1129 } 1130 } 1131 1132 void 1133 xfs_btree_init_block_int( 1134 struct xfs_mount *mp, 1135 struct xfs_btree_block *buf, 1136 xfs_daddr_t blkno, 1137 xfs_btnum_t btnum, 1138 __u16 level, 1139 __u16 numrecs, 1140 __u64 owner, 1141 unsigned int flags) 1142 { 1143 int crc = xfs_sb_version_hascrc(&mp->m_sb); 1144 __u32 magic = xfs_btree_magic(crc, btnum); 1145 1146 buf->bb_magic = cpu_to_be32(magic); 1147 buf->bb_level = cpu_to_be16(level); 1148 buf->bb_numrecs = cpu_to_be16(numrecs); 1149 1150 if (flags & XFS_BTREE_LONG_PTRS) { 1151 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 1152 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1153 if (crc) { 1154 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1155 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1156 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); 1157 buf->bb_u.l.bb_pad = 0; 1158 buf->bb_u.l.bb_lsn = 0; 1159 } 1160 } else { 1161 /* owner is a 32 bit value on short blocks */ 1162 __u32 __owner = (__u32)owner; 1163 1164 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1165 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1166 if (crc) { 1167 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1168 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1169 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); 1170 buf->bb_u.s.bb_lsn = 0; 1171 } 1172 } 1173 } 1174 1175 void 1176 xfs_btree_init_block( 1177 struct xfs_mount *mp, 1178 struct xfs_buf *bp, 1179 xfs_btnum_t btnum, 1180 __u16 level, 1181 __u16 numrecs, 1182 __u64 owner) 1183 { 1184 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1185 btnum, level, numrecs, owner, 0); 1186 } 1187 1188 STATIC void 1189 xfs_btree_init_block_cur( 1190 struct xfs_btree_cur *cur, 1191 struct xfs_buf *bp, 1192 int level, 1193 int numrecs) 1194 { 1195 __u64 owner; 1196 1197 /* 1198 * we can pull the owner from the cursor right now as the different 1199 * owners align directly with the pointer size of the btree. This may 1200 * change in future, but is safe for current users of the generic btree 1201 * code. 1202 */ 1203 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1204 owner = cur->bc_private.b.ip->i_ino; 1205 else 1206 owner = cur->bc_private.a.agno; 1207 1208 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1209 cur->bc_btnum, level, numrecs, 1210 owner, cur->bc_flags); 1211 } 1212 1213 /* 1214 * Return true if ptr is the last record in the btree and 1215 * we need to track updates to this record. The decision 1216 * will be further refined in the update_lastrec method. 1217 */ 1218 STATIC int 1219 xfs_btree_is_lastrec( 1220 struct xfs_btree_cur *cur, 1221 struct xfs_btree_block *block, 1222 int level) 1223 { 1224 union xfs_btree_ptr ptr; 1225 1226 if (level > 0) 1227 return 0; 1228 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1229 return 0; 1230 1231 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1232 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1233 return 0; 1234 return 1; 1235 } 1236 1237 STATIC void 1238 xfs_btree_buf_to_ptr( 1239 struct xfs_btree_cur *cur, 1240 struct xfs_buf *bp, 1241 union xfs_btree_ptr *ptr) 1242 { 1243 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1244 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1245 XFS_BUF_ADDR(bp))); 1246 else { 1247 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1248 XFS_BUF_ADDR(bp))); 1249 } 1250 } 1251 1252 STATIC void 1253 xfs_btree_set_refs( 1254 struct xfs_btree_cur *cur, 1255 struct xfs_buf *bp) 1256 { 1257 switch (cur->bc_btnum) { 1258 case XFS_BTNUM_BNO: 1259 case XFS_BTNUM_CNT: 1260 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1261 break; 1262 case XFS_BTNUM_INO: 1263 case XFS_BTNUM_FINO: 1264 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1265 break; 1266 case XFS_BTNUM_BMAP: 1267 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1268 break; 1269 case XFS_BTNUM_RMAP: 1270 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); 1271 break; 1272 case XFS_BTNUM_REFC: 1273 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); 1274 break; 1275 default: 1276 ASSERT(0); 1277 } 1278 } 1279 1280 STATIC int 1281 xfs_btree_get_buf_block( 1282 struct xfs_btree_cur *cur, 1283 union xfs_btree_ptr *ptr, 1284 struct xfs_btree_block **block, 1285 struct xfs_buf **bpp) 1286 { 1287 struct xfs_mount *mp = cur->bc_mp; 1288 xfs_daddr_t d; 1289 int error; 1290 1291 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1292 if (error) 1293 return error; 1294 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1295 mp->m_bsize, 0); 1296 1297 if (!*bpp) 1298 return -ENOMEM; 1299 1300 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1301 *block = XFS_BUF_TO_BLOCK(*bpp); 1302 return 0; 1303 } 1304 1305 /* 1306 * Read in the buffer at the given ptr and return the buffer and 1307 * the block pointer within the buffer. 1308 */ 1309 STATIC int 1310 xfs_btree_read_buf_block( 1311 struct xfs_btree_cur *cur, 1312 union xfs_btree_ptr *ptr, 1313 int flags, 1314 struct xfs_btree_block **block, 1315 struct xfs_buf **bpp) 1316 { 1317 struct xfs_mount *mp = cur->bc_mp; 1318 xfs_daddr_t d; 1319 int error; 1320 1321 /* need to sort out how callers deal with failures first */ 1322 ASSERT(!(flags & XBF_TRYLOCK)); 1323 1324 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1325 if (error) 1326 return error; 1327 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1328 mp->m_bsize, flags, bpp, 1329 cur->bc_ops->buf_ops); 1330 if (error) 1331 return error; 1332 1333 xfs_btree_set_refs(cur, *bpp); 1334 *block = XFS_BUF_TO_BLOCK(*bpp); 1335 return 0; 1336 } 1337 1338 /* 1339 * Copy keys from one btree block to another. 1340 */ 1341 STATIC void 1342 xfs_btree_copy_keys( 1343 struct xfs_btree_cur *cur, 1344 union xfs_btree_key *dst_key, 1345 union xfs_btree_key *src_key, 1346 int numkeys) 1347 { 1348 ASSERT(numkeys >= 0); 1349 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1350 } 1351 1352 /* 1353 * Copy records from one btree block to another. 1354 */ 1355 STATIC void 1356 xfs_btree_copy_recs( 1357 struct xfs_btree_cur *cur, 1358 union xfs_btree_rec *dst_rec, 1359 union xfs_btree_rec *src_rec, 1360 int numrecs) 1361 { 1362 ASSERT(numrecs >= 0); 1363 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1364 } 1365 1366 /* 1367 * Copy block pointers from one btree block to another. 1368 */ 1369 STATIC void 1370 xfs_btree_copy_ptrs( 1371 struct xfs_btree_cur *cur, 1372 union xfs_btree_ptr *dst_ptr, 1373 union xfs_btree_ptr *src_ptr, 1374 int numptrs) 1375 { 1376 ASSERT(numptrs >= 0); 1377 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1378 } 1379 1380 /* 1381 * Shift keys one index left/right inside a single btree block. 1382 */ 1383 STATIC void 1384 xfs_btree_shift_keys( 1385 struct xfs_btree_cur *cur, 1386 union xfs_btree_key *key, 1387 int dir, 1388 int numkeys) 1389 { 1390 char *dst_key; 1391 1392 ASSERT(numkeys >= 0); 1393 ASSERT(dir == 1 || dir == -1); 1394 1395 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1396 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1397 } 1398 1399 /* 1400 * Shift records one index left/right inside a single btree block. 1401 */ 1402 STATIC void 1403 xfs_btree_shift_recs( 1404 struct xfs_btree_cur *cur, 1405 union xfs_btree_rec *rec, 1406 int dir, 1407 int numrecs) 1408 { 1409 char *dst_rec; 1410 1411 ASSERT(numrecs >= 0); 1412 ASSERT(dir == 1 || dir == -1); 1413 1414 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1415 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1416 } 1417 1418 /* 1419 * Shift block pointers one index left/right inside a single btree block. 1420 */ 1421 STATIC void 1422 xfs_btree_shift_ptrs( 1423 struct xfs_btree_cur *cur, 1424 union xfs_btree_ptr *ptr, 1425 int dir, 1426 int numptrs) 1427 { 1428 char *dst_ptr; 1429 1430 ASSERT(numptrs >= 0); 1431 ASSERT(dir == 1 || dir == -1); 1432 1433 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1434 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1435 } 1436 1437 /* 1438 * Log key values from the btree block. 1439 */ 1440 STATIC void 1441 xfs_btree_log_keys( 1442 struct xfs_btree_cur *cur, 1443 struct xfs_buf *bp, 1444 int first, 1445 int last) 1446 { 1447 1448 if (bp) { 1449 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1450 xfs_trans_log_buf(cur->bc_tp, bp, 1451 xfs_btree_key_offset(cur, first), 1452 xfs_btree_key_offset(cur, last + 1) - 1); 1453 } else { 1454 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1455 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1456 } 1457 } 1458 1459 /* 1460 * Log record values from the btree block. 1461 */ 1462 void 1463 xfs_btree_log_recs( 1464 struct xfs_btree_cur *cur, 1465 struct xfs_buf *bp, 1466 int first, 1467 int last) 1468 { 1469 1470 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1471 xfs_trans_log_buf(cur->bc_tp, bp, 1472 xfs_btree_rec_offset(cur, first), 1473 xfs_btree_rec_offset(cur, last + 1) - 1); 1474 1475 } 1476 1477 /* 1478 * Log block pointer fields from a btree block (nonleaf). 1479 */ 1480 STATIC void 1481 xfs_btree_log_ptrs( 1482 struct xfs_btree_cur *cur, /* btree cursor */ 1483 struct xfs_buf *bp, /* buffer containing btree block */ 1484 int first, /* index of first pointer to log */ 1485 int last) /* index of last pointer to log */ 1486 { 1487 1488 if (bp) { 1489 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1490 int level = xfs_btree_get_level(block); 1491 1492 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1493 xfs_trans_log_buf(cur->bc_tp, bp, 1494 xfs_btree_ptr_offset(cur, first, level), 1495 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1496 } else { 1497 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1498 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1499 } 1500 1501 } 1502 1503 /* 1504 * Log fields from a btree block header. 1505 */ 1506 void 1507 xfs_btree_log_block( 1508 struct xfs_btree_cur *cur, /* btree cursor */ 1509 struct xfs_buf *bp, /* buffer containing btree block */ 1510 int fields) /* mask of fields: XFS_BB_... */ 1511 { 1512 int first; /* first byte offset logged */ 1513 int last; /* last byte offset logged */ 1514 static const short soffsets[] = { /* table of offsets (short) */ 1515 offsetof(struct xfs_btree_block, bb_magic), 1516 offsetof(struct xfs_btree_block, bb_level), 1517 offsetof(struct xfs_btree_block, bb_numrecs), 1518 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1519 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1520 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1521 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1522 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1523 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1524 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1525 XFS_BTREE_SBLOCK_CRC_LEN 1526 }; 1527 static const short loffsets[] = { /* table of offsets (long) */ 1528 offsetof(struct xfs_btree_block, bb_magic), 1529 offsetof(struct xfs_btree_block, bb_level), 1530 offsetof(struct xfs_btree_block, bb_numrecs), 1531 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1532 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1533 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1534 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1535 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1536 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1537 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1538 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1539 XFS_BTREE_LBLOCK_CRC_LEN 1540 }; 1541 1542 if (bp) { 1543 int nbits; 1544 1545 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1546 /* 1547 * We don't log the CRC when updating a btree 1548 * block but instead recreate it during log 1549 * recovery. As the log buffers have checksums 1550 * of their own this is safe and avoids logging a crc 1551 * update in a lot of places. 1552 */ 1553 if (fields == XFS_BB_ALL_BITS) 1554 fields = XFS_BB_ALL_BITS_CRC; 1555 nbits = XFS_BB_NUM_BITS_CRC; 1556 } else { 1557 nbits = XFS_BB_NUM_BITS; 1558 } 1559 xfs_btree_offsets(fields, 1560 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1561 loffsets : soffsets, 1562 nbits, &first, &last); 1563 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1564 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1565 } else { 1566 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1567 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1568 } 1569 } 1570 1571 /* 1572 * Increment cursor by one record at the level. 1573 * For nonzero levels the leaf-ward information is untouched. 1574 */ 1575 int /* error */ 1576 xfs_btree_increment( 1577 struct xfs_btree_cur *cur, 1578 int level, 1579 int *stat) /* success/failure */ 1580 { 1581 struct xfs_btree_block *block; 1582 union xfs_btree_ptr ptr; 1583 struct xfs_buf *bp; 1584 int error; /* error return value */ 1585 int lev; 1586 1587 ASSERT(level < cur->bc_nlevels); 1588 1589 /* Read-ahead to the right at this level. */ 1590 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1591 1592 /* Get a pointer to the btree block. */ 1593 block = xfs_btree_get_block(cur, level, &bp); 1594 1595 #ifdef DEBUG 1596 error = xfs_btree_check_block(cur, block, level, bp); 1597 if (error) 1598 goto error0; 1599 #endif 1600 1601 /* We're done if we remain in the block after the increment. */ 1602 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) 1603 goto out1; 1604 1605 /* Fail if we just went off the right edge of the tree. */ 1606 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1607 if (xfs_btree_ptr_is_null(cur, &ptr)) 1608 goto out0; 1609 1610 XFS_BTREE_STATS_INC(cur, increment); 1611 1612 /* 1613 * March up the tree incrementing pointers. 1614 * Stop when we don't go off the right edge of a block. 1615 */ 1616 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1617 block = xfs_btree_get_block(cur, lev, &bp); 1618 1619 #ifdef DEBUG 1620 error = xfs_btree_check_block(cur, block, lev, bp); 1621 if (error) 1622 goto error0; 1623 #endif 1624 1625 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) 1626 break; 1627 1628 /* Read-ahead the right block for the next loop. */ 1629 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1630 } 1631 1632 /* 1633 * If we went off the root then we are either seriously 1634 * confused or have the tree root in an inode. 1635 */ 1636 if (lev == cur->bc_nlevels) { 1637 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1638 goto out0; 1639 ASSERT(0); 1640 error = -EFSCORRUPTED; 1641 goto error0; 1642 } 1643 ASSERT(lev < cur->bc_nlevels); 1644 1645 /* 1646 * Now walk back down the tree, fixing up the cursor's buffer 1647 * pointers and key numbers. 1648 */ 1649 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1650 union xfs_btree_ptr *ptrp; 1651 1652 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1653 --lev; 1654 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1655 if (error) 1656 goto error0; 1657 1658 xfs_btree_setbuf(cur, lev, bp); 1659 cur->bc_ptrs[lev] = 1; 1660 } 1661 out1: 1662 *stat = 1; 1663 return 0; 1664 1665 out0: 1666 *stat = 0; 1667 return 0; 1668 1669 error0: 1670 return error; 1671 } 1672 1673 /* 1674 * Decrement cursor by one record at the level. 1675 * For nonzero levels the leaf-ward information is untouched. 1676 */ 1677 int /* error */ 1678 xfs_btree_decrement( 1679 struct xfs_btree_cur *cur, 1680 int level, 1681 int *stat) /* success/failure */ 1682 { 1683 struct xfs_btree_block *block; 1684 xfs_buf_t *bp; 1685 int error; /* error return value */ 1686 int lev; 1687 union xfs_btree_ptr ptr; 1688 1689 ASSERT(level < cur->bc_nlevels); 1690 1691 /* Read-ahead to the left at this level. */ 1692 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1693 1694 /* We're done if we remain in the block after the decrement. */ 1695 if (--cur->bc_ptrs[level] > 0) 1696 goto out1; 1697 1698 /* Get a pointer to the btree block. */ 1699 block = xfs_btree_get_block(cur, level, &bp); 1700 1701 #ifdef DEBUG 1702 error = xfs_btree_check_block(cur, block, level, bp); 1703 if (error) 1704 goto error0; 1705 #endif 1706 1707 /* Fail if we just went off the left edge of the tree. */ 1708 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1709 if (xfs_btree_ptr_is_null(cur, &ptr)) 1710 goto out0; 1711 1712 XFS_BTREE_STATS_INC(cur, decrement); 1713 1714 /* 1715 * March up the tree decrementing pointers. 1716 * Stop when we don't go off the left edge of a block. 1717 */ 1718 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1719 if (--cur->bc_ptrs[lev] > 0) 1720 break; 1721 /* Read-ahead the left block for the next loop. */ 1722 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1723 } 1724 1725 /* 1726 * If we went off the root then we are seriously confused. 1727 * or the root of the tree is in an inode. 1728 */ 1729 if (lev == cur->bc_nlevels) { 1730 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1731 goto out0; 1732 ASSERT(0); 1733 error = -EFSCORRUPTED; 1734 goto error0; 1735 } 1736 ASSERT(lev < cur->bc_nlevels); 1737 1738 /* 1739 * Now walk back down the tree, fixing up the cursor's buffer 1740 * pointers and key numbers. 1741 */ 1742 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1743 union xfs_btree_ptr *ptrp; 1744 1745 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1746 --lev; 1747 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1748 if (error) 1749 goto error0; 1750 xfs_btree_setbuf(cur, lev, bp); 1751 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); 1752 } 1753 out1: 1754 *stat = 1; 1755 return 0; 1756 1757 out0: 1758 *stat = 0; 1759 return 0; 1760 1761 error0: 1762 return error; 1763 } 1764 1765 int 1766 xfs_btree_lookup_get_block( 1767 struct xfs_btree_cur *cur, /* btree cursor */ 1768 int level, /* level in the btree */ 1769 union xfs_btree_ptr *pp, /* ptr to btree block */ 1770 struct xfs_btree_block **blkp) /* return btree block */ 1771 { 1772 struct xfs_buf *bp; /* buffer pointer for btree block */ 1773 xfs_daddr_t daddr; 1774 int error = 0; 1775 1776 /* special case the root block if in an inode */ 1777 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1778 (level == cur->bc_nlevels - 1)) { 1779 *blkp = xfs_btree_get_iroot(cur); 1780 return 0; 1781 } 1782 1783 /* 1784 * If the old buffer at this level for the disk address we are 1785 * looking for re-use it. 1786 * 1787 * Otherwise throw it away and get a new one. 1788 */ 1789 bp = cur->bc_bufs[level]; 1790 error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); 1791 if (error) 1792 return error; 1793 if (bp && XFS_BUF_ADDR(bp) == daddr) { 1794 *blkp = XFS_BUF_TO_BLOCK(bp); 1795 return 0; 1796 } 1797 1798 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1799 if (error) 1800 return error; 1801 1802 /* Check the inode owner since the verifiers don't. */ 1803 if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && 1804 !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) && 1805 (cur->bc_flags & XFS_BTREE_LONG_PTRS) && 1806 be64_to_cpu((*blkp)->bb_u.l.bb_owner) != 1807 cur->bc_private.b.ip->i_ino) 1808 goto out_bad; 1809 1810 /* Did we get the level we were looking for? */ 1811 if (be16_to_cpu((*blkp)->bb_level) != level) 1812 goto out_bad; 1813 1814 /* Check that internal nodes have at least one record. */ 1815 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) 1816 goto out_bad; 1817 1818 xfs_btree_setbuf(cur, level, bp); 1819 return 0; 1820 1821 out_bad: 1822 *blkp = NULL; 1823 xfs_trans_brelse(cur->bc_tp, bp); 1824 return -EFSCORRUPTED; 1825 } 1826 1827 /* 1828 * Get current search key. For level 0 we don't actually have a key 1829 * structure so we make one up from the record. For all other levels 1830 * we just return the right key. 1831 */ 1832 STATIC union xfs_btree_key * 1833 xfs_lookup_get_search_key( 1834 struct xfs_btree_cur *cur, 1835 int level, 1836 int keyno, 1837 struct xfs_btree_block *block, 1838 union xfs_btree_key *kp) 1839 { 1840 if (level == 0) { 1841 cur->bc_ops->init_key_from_rec(kp, 1842 xfs_btree_rec_addr(cur, keyno, block)); 1843 return kp; 1844 } 1845 1846 return xfs_btree_key_addr(cur, keyno, block); 1847 } 1848 1849 /* 1850 * Lookup the record. The cursor is made to point to it, based on dir. 1851 * stat is set to 0 if can't find any such record, 1 for success. 1852 */ 1853 int /* error */ 1854 xfs_btree_lookup( 1855 struct xfs_btree_cur *cur, /* btree cursor */ 1856 xfs_lookup_t dir, /* <=, ==, or >= */ 1857 int *stat) /* success/failure */ 1858 { 1859 struct xfs_btree_block *block; /* current btree block */ 1860 int64_t diff; /* difference for the current key */ 1861 int error; /* error return value */ 1862 int keyno; /* current key number */ 1863 int level; /* level in the btree */ 1864 union xfs_btree_ptr *pp; /* ptr to btree block */ 1865 union xfs_btree_ptr ptr; /* ptr to btree block */ 1866 1867 XFS_BTREE_STATS_INC(cur, lookup); 1868 1869 /* No such thing as a zero-level tree. */ 1870 if (cur->bc_nlevels == 0) 1871 return -EFSCORRUPTED; 1872 1873 block = NULL; 1874 keyno = 0; 1875 1876 /* initialise start pointer from cursor */ 1877 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1878 pp = &ptr; 1879 1880 /* 1881 * Iterate over each level in the btree, starting at the root. 1882 * For each level above the leaves, find the key we need, based 1883 * on the lookup record, then follow the corresponding block 1884 * pointer down to the next level. 1885 */ 1886 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1887 /* Get the block we need to do the lookup on. */ 1888 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1889 if (error) 1890 goto error0; 1891 1892 if (diff == 0) { 1893 /* 1894 * If we already had a key match at a higher level, we 1895 * know we need to use the first entry in this block. 1896 */ 1897 keyno = 1; 1898 } else { 1899 /* Otherwise search this block. Do a binary search. */ 1900 1901 int high; /* high entry number */ 1902 int low; /* low entry number */ 1903 1904 /* Set low and high entry numbers, 1-based. */ 1905 low = 1; 1906 high = xfs_btree_get_numrecs(block); 1907 if (!high) { 1908 /* Block is empty, must be an empty leaf. */ 1909 if (level != 0 || cur->bc_nlevels != 1) { 1910 XFS_CORRUPTION_ERROR(__func__, 1911 XFS_ERRLEVEL_LOW, 1912 cur->bc_mp, block, 1913 sizeof(*block)); 1914 return -EFSCORRUPTED; 1915 } 1916 1917 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; 1918 *stat = 0; 1919 return 0; 1920 } 1921 1922 /* Binary search the block. */ 1923 while (low <= high) { 1924 union xfs_btree_key key; 1925 union xfs_btree_key *kp; 1926 1927 XFS_BTREE_STATS_INC(cur, compare); 1928 1929 /* keyno is average of low and high. */ 1930 keyno = (low + high) >> 1; 1931 1932 /* Get current search key */ 1933 kp = xfs_lookup_get_search_key(cur, level, 1934 keyno, block, &key); 1935 1936 /* 1937 * Compute difference to get next direction: 1938 * - less than, move right 1939 * - greater than, move left 1940 * - equal, we're done 1941 */ 1942 diff = cur->bc_ops->key_diff(cur, kp); 1943 if (diff < 0) 1944 low = keyno + 1; 1945 else if (diff > 0) 1946 high = keyno - 1; 1947 else 1948 break; 1949 } 1950 } 1951 1952 /* 1953 * If there are more levels, set up for the next level 1954 * by getting the block number and filling in the cursor. 1955 */ 1956 if (level > 0) { 1957 /* 1958 * If we moved left, need the previous key number, 1959 * unless there isn't one. 1960 */ 1961 if (diff > 0 && --keyno < 1) 1962 keyno = 1; 1963 pp = xfs_btree_ptr_addr(cur, keyno, block); 1964 1965 error = xfs_btree_debug_check_ptr(cur, pp, 0, level); 1966 if (error) 1967 goto error0; 1968 1969 cur->bc_ptrs[level] = keyno; 1970 } 1971 } 1972 1973 /* Done with the search. See if we need to adjust the results. */ 1974 if (dir != XFS_LOOKUP_LE && diff < 0) { 1975 keyno++; 1976 /* 1977 * If ge search and we went off the end of the block, but it's 1978 * not the last block, we're in the wrong block. 1979 */ 1980 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1981 if (dir == XFS_LOOKUP_GE && 1982 keyno > xfs_btree_get_numrecs(block) && 1983 !xfs_btree_ptr_is_null(cur, &ptr)) { 1984 int i; 1985 1986 cur->bc_ptrs[0] = keyno; 1987 error = xfs_btree_increment(cur, 0, &i); 1988 if (error) 1989 goto error0; 1990 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); 1991 *stat = 1; 1992 return 0; 1993 } 1994 } else if (dir == XFS_LOOKUP_LE && diff > 0) 1995 keyno--; 1996 cur->bc_ptrs[0] = keyno; 1997 1998 /* Return if we succeeded or not. */ 1999 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 2000 *stat = 0; 2001 else if (dir != XFS_LOOKUP_EQ || diff == 0) 2002 *stat = 1; 2003 else 2004 *stat = 0; 2005 return 0; 2006 2007 error0: 2008 return error; 2009 } 2010 2011 /* Find the high key storage area from a regular key. */ 2012 union xfs_btree_key * 2013 xfs_btree_high_key_from_key( 2014 struct xfs_btree_cur *cur, 2015 union xfs_btree_key *key) 2016 { 2017 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2018 return (union xfs_btree_key *)((char *)key + 2019 (cur->bc_ops->key_len / 2)); 2020 } 2021 2022 /* Determine the low (and high if overlapped) keys of a leaf block */ 2023 STATIC void 2024 xfs_btree_get_leaf_keys( 2025 struct xfs_btree_cur *cur, 2026 struct xfs_btree_block *block, 2027 union xfs_btree_key *key) 2028 { 2029 union xfs_btree_key max_hkey; 2030 union xfs_btree_key hkey; 2031 union xfs_btree_rec *rec; 2032 union xfs_btree_key *high; 2033 int n; 2034 2035 rec = xfs_btree_rec_addr(cur, 1, block); 2036 cur->bc_ops->init_key_from_rec(key, rec); 2037 2038 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2039 2040 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); 2041 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2042 rec = xfs_btree_rec_addr(cur, n, block); 2043 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 2044 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey) 2045 > 0) 2046 max_hkey = hkey; 2047 } 2048 2049 high = xfs_btree_high_key_from_key(cur, key); 2050 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); 2051 } 2052 } 2053 2054 /* Determine the low (and high if overlapped) keys of a node block */ 2055 STATIC void 2056 xfs_btree_get_node_keys( 2057 struct xfs_btree_cur *cur, 2058 struct xfs_btree_block *block, 2059 union xfs_btree_key *key) 2060 { 2061 union xfs_btree_key *hkey; 2062 union xfs_btree_key *max_hkey; 2063 union xfs_btree_key *high; 2064 int n; 2065 2066 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2067 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2068 cur->bc_ops->key_len / 2); 2069 2070 max_hkey = xfs_btree_high_key_addr(cur, 1, block); 2071 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2072 hkey = xfs_btree_high_key_addr(cur, n, block); 2073 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0) 2074 max_hkey = hkey; 2075 } 2076 2077 high = xfs_btree_high_key_from_key(cur, key); 2078 memcpy(high, max_hkey, cur->bc_ops->key_len / 2); 2079 } else { 2080 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2081 cur->bc_ops->key_len); 2082 } 2083 } 2084 2085 /* Derive the keys for any btree block. */ 2086 void 2087 xfs_btree_get_keys( 2088 struct xfs_btree_cur *cur, 2089 struct xfs_btree_block *block, 2090 union xfs_btree_key *key) 2091 { 2092 if (be16_to_cpu(block->bb_level) == 0) 2093 xfs_btree_get_leaf_keys(cur, block, key); 2094 else 2095 xfs_btree_get_node_keys(cur, block, key); 2096 } 2097 2098 /* 2099 * Decide if we need to update the parent keys of a btree block. For 2100 * a standard btree this is only necessary if we're updating the first 2101 * record/key. For an overlapping btree, we must always update the 2102 * keys because the highest key can be in any of the records or keys 2103 * in the block. 2104 */ 2105 static inline bool 2106 xfs_btree_needs_key_update( 2107 struct xfs_btree_cur *cur, 2108 int ptr) 2109 { 2110 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; 2111 } 2112 2113 /* 2114 * Update the low and high parent keys of the given level, progressing 2115 * towards the root. If force_all is false, stop if the keys for a given 2116 * level do not need updating. 2117 */ 2118 STATIC int 2119 __xfs_btree_updkeys( 2120 struct xfs_btree_cur *cur, 2121 int level, 2122 struct xfs_btree_block *block, 2123 struct xfs_buf *bp0, 2124 bool force_all) 2125 { 2126 union xfs_btree_key key; /* keys from current level */ 2127 union xfs_btree_key *lkey; /* keys from the next level up */ 2128 union xfs_btree_key *hkey; 2129 union xfs_btree_key *nlkey; /* keys from the next level up */ 2130 union xfs_btree_key *nhkey; 2131 struct xfs_buf *bp; 2132 int ptr; 2133 2134 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2135 2136 /* Exit if there aren't any parent levels to update. */ 2137 if (level + 1 >= cur->bc_nlevels) 2138 return 0; 2139 2140 trace_xfs_btree_updkeys(cur, level, bp0); 2141 2142 lkey = &key; 2143 hkey = xfs_btree_high_key_from_key(cur, lkey); 2144 xfs_btree_get_keys(cur, block, lkey); 2145 for (level++; level < cur->bc_nlevels; level++) { 2146 #ifdef DEBUG 2147 int error; 2148 #endif 2149 block = xfs_btree_get_block(cur, level, &bp); 2150 trace_xfs_btree_updkeys(cur, level, bp); 2151 #ifdef DEBUG 2152 error = xfs_btree_check_block(cur, block, level, bp); 2153 if (error) 2154 return error; 2155 #endif 2156 ptr = cur->bc_ptrs[level]; 2157 nlkey = xfs_btree_key_addr(cur, ptr, block); 2158 nhkey = xfs_btree_high_key_addr(cur, ptr, block); 2159 if (!force_all && 2160 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 || 2161 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0)) 2162 break; 2163 xfs_btree_copy_keys(cur, nlkey, lkey, 1); 2164 xfs_btree_log_keys(cur, bp, ptr, ptr); 2165 if (level + 1 >= cur->bc_nlevels) 2166 break; 2167 xfs_btree_get_node_keys(cur, block, lkey); 2168 } 2169 2170 return 0; 2171 } 2172 2173 /* Update all the keys from some level in cursor back to the root. */ 2174 STATIC int 2175 xfs_btree_updkeys_force( 2176 struct xfs_btree_cur *cur, 2177 int level) 2178 { 2179 struct xfs_buf *bp; 2180 struct xfs_btree_block *block; 2181 2182 block = xfs_btree_get_block(cur, level, &bp); 2183 return __xfs_btree_updkeys(cur, level, block, bp, true); 2184 } 2185 2186 /* 2187 * Update the parent keys of the given level, progressing towards the root. 2188 */ 2189 STATIC int 2190 xfs_btree_update_keys( 2191 struct xfs_btree_cur *cur, 2192 int level) 2193 { 2194 struct xfs_btree_block *block; 2195 struct xfs_buf *bp; 2196 union xfs_btree_key *kp; 2197 union xfs_btree_key key; 2198 int ptr; 2199 2200 ASSERT(level >= 0); 2201 2202 block = xfs_btree_get_block(cur, level, &bp); 2203 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) 2204 return __xfs_btree_updkeys(cur, level, block, bp, false); 2205 2206 /* 2207 * Go up the tree from this level toward the root. 2208 * At each level, update the key value to the value input. 2209 * Stop when we reach a level where the cursor isn't pointing 2210 * at the first entry in the block. 2211 */ 2212 xfs_btree_get_keys(cur, block, &key); 2213 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2214 #ifdef DEBUG 2215 int error; 2216 #endif 2217 block = xfs_btree_get_block(cur, level, &bp); 2218 #ifdef DEBUG 2219 error = xfs_btree_check_block(cur, block, level, bp); 2220 if (error) 2221 return error; 2222 #endif 2223 ptr = cur->bc_ptrs[level]; 2224 kp = xfs_btree_key_addr(cur, ptr, block); 2225 xfs_btree_copy_keys(cur, kp, &key, 1); 2226 xfs_btree_log_keys(cur, bp, ptr, ptr); 2227 } 2228 2229 return 0; 2230 } 2231 2232 /* 2233 * Update the record referred to by cur to the value in the 2234 * given record. This either works (return 0) or gets an 2235 * EFSCORRUPTED error. 2236 */ 2237 int 2238 xfs_btree_update( 2239 struct xfs_btree_cur *cur, 2240 union xfs_btree_rec *rec) 2241 { 2242 struct xfs_btree_block *block; 2243 struct xfs_buf *bp; 2244 int error; 2245 int ptr; 2246 union xfs_btree_rec *rp; 2247 2248 /* Pick up the current block. */ 2249 block = xfs_btree_get_block(cur, 0, &bp); 2250 2251 #ifdef DEBUG 2252 error = xfs_btree_check_block(cur, block, 0, bp); 2253 if (error) 2254 goto error0; 2255 #endif 2256 /* Get the address of the rec to be updated. */ 2257 ptr = cur->bc_ptrs[0]; 2258 rp = xfs_btree_rec_addr(cur, ptr, block); 2259 2260 /* Fill in the new contents and log them. */ 2261 xfs_btree_copy_recs(cur, rp, rec, 1); 2262 xfs_btree_log_recs(cur, bp, ptr, ptr); 2263 2264 /* 2265 * If we are tracking the last record in the tree and 2266 * we are at the far right edge of the tree, update it. 2267 */ 2268 if (xfs_btree_is_lastrec(cur, block, 0)) { 2269 cur->bc_ops->update_lastrec(cur, block, rec, 2270 ptr, LASTREC_UPDATE); 2271 } 2272 2273 /* Pass new key value up to our parent. */ 2274 if (xfs_btree_needs_key_update(cur, ptr)) { 2275 error = xfs_btree_update_keys(cur, 0); 2276 if (error) 2277 goto error0; 2278 } 2279 2280 return 0; 2281 2282 error0: 2283 return error; 2284 } 2285 2286 /* 2287 * Move 1 record left from cur/level if possible. 2288 * Update cur to reflect the new path. 2289 */ 2290 STATIC int /* error */ 2291 xfs_btree_lshift( 2292 struct xfs_btree_cur *cur, 2293 int level, 2294 int *stat) /* success/failure */ 2295 { 2296 struct xfs_buf *lbp; /* left buffer pointer */ 2297 struct xfs_btree_block *left; /* left btree block */ 2298 int lrecs; /* left record count */ 2299 struct xfs_buf *rbp; /* right buffer pointer */ 2300 struct xfs_btree_block *right; /* right btree block */ 2301 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2302 int rrecs; /* right record count */ 2303 union xfs_btree_ptr lptr; /* left btree pointer */ 2304 union xfs_btree_key *rkp = NULL; /* right btree key */ 2305 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2306 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2307 int error; /* error return value */ 2308 int i; 2309 2310 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2311 level == cur->bc_nlevels - 1) 2312 goto out0; 2313 2314 /* Set up variables for this block as "right". */ 2315 right = xfs_btree_get_block(cur, level, &rbp); 2316 2317 #ifdef DEBUG 2318 error = xfs_btree_check_block(cur, right, level, rbp); 2319 if (error) 2320 goto error0; 2321 #endif 2322 2323 /* If we've got no left sibling then we can't shift an entry left. */ 2324 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2325 if (xfs_btree_ptr_is_null(cur, &lptr)) 2326 goto out0; 2327 2328 /* 2329 * If the cursor entry is the one that would be moved, don't 2330 * do it... it's too complicated. 2331 */ 2332 if (cur->bc_ptrs[level] <= 1) 2333 goto out0; 2334 2335 /* Set up the left neighbor as "left". */ 2336 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2337 if (error) 2338 goto error0; 2339 2340 /* If it's full, it can't take another entry. */ 2341 lrecs = xfs_btree_get_numrecs(left); 2342 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2343 goto out0; 2344 2345 rrecs = xfs_btree_get_numrecs(right); 2346 2347 /* 2348 * We add one entry to the left side and remove one for the right side. 2349 * Account for it here, the changes will be updated on disk and logged 2350 * later. 2351 */ 2352 lrecs++; 2353 rrecs--; 2354 2355 XFS_BTREE_STATS_INC(cur, lshift); 2356 XFS_BTREE_STATS_ADD(cur, moves, 1); 2357 2358 /* 2359 * If non-leaf, copy a key and a ptr to the left block. 2360 * Log the changes to the left block. 2361 */ 2362 if (level > 0) { 2363 /* It's a non-leaf. Move keys and pointers. */ 2364 union xfs_btree_key *lkp; /* left btree key */ 2365 union xfs_btree_ptr *lpp; /* left address pointer */ 2366 2367 lkp = xfs_btree_key_addr(cur, lrecs, left); 2368 rkp = xfs_btree_key_addr(cur, 1, right); 2369 2370 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2371 rpp = xfs_btree_ptr_addr(cur, 1, right); 2372 2373 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level); 2374 if (error) 2375 goto error0; 2376 2377 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2378 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2379 2380 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2381 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2382 2383 ASSERT(cur->bc_ops->keys_inorder(cur, 2384 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2385 } else { 2386 /* It's a leaf. Move records. */ 2387 union xfs_btree_rec *lrp; /* left record pointer */ 2388 2389 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2390 rrp = xfs_btree_rec_addr(cur, 1, right); 2391 2392 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2393 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2394 2395 ASSERT(cur->bc_ops->recs_inorder(cur, 2396 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2397 } 2398 2399 xfs_btree_set_numrecs(left, lrecs); 2400 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2401 2402 xfs_btree_set_numrecs(right, rrecs); 2403 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2404 2405 /* 2406 * Slide the contents of right down one entry. 2407 */ 2408 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2409 if (level > 0) { 2410 /* It's a nonleaf. operate on keys and ptrs */ 2411 int i; /* loop index */ 2412 2413 for (i = 0; i < rrecs; i++) { 2414 error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level); 2415 if (error) 2416 goto error0; 2417 } 2418 2419 xfs_btree_shift_keys(cur, 2420 xfs_btree_key_addr(cur, 2, right), 2421 -1, rrecs); 2422 xfs_btree_shift_ptrs(cur, 2423 xfs_btree_ptr_addr(cur, 2, right), 2424 -1, rrecs); 2425 2426 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2427 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2428 } else { 2429 /* It's a leaf. operate on records */ 2430 xfs_btree_shift_recs(cur, 2431 xfs_btree_rec_addr(cur, 2, right), 2432 -1, rrecs); 2433 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2434 } 2435 2436 /* 2437 * Using a temporary cursor, update the parent key values of the 2438 * block on the left. 2439 */ 2440 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2441 error = xfs_btree_dup_cursor(cur, &tcur); 2442 if (error) 2443 goto error0; 2444 i = xfs_btree_firstrec(tcur, level); 2445 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2446 2447 error = xfs_btree_decrement(tcur, level, &i); 2448 if (error) 2449 goto error1; 2450 2451 /* Update the parent high keys of the left block, if needed. */ 2452 error = xfs_btree_update_keys(tcur, level); 2453 if (error) 2454 goto error1; 2455 2456 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2457 } 2458 2459 /* Update the parent keys of the right block. */ 2460 error = xfs_btree_update_keys(cur, level); 2461 if (error) 2462 goto error0; 2463 2464 /* Slide the cursor value left one. */ 2465 cur->bc_ptrs[level]--; 2466 2467 *stat = 1; 2468 return 0; 2469 2470 out0: 2471 *stat = 0; 2472 return 0; 2473 2474 error0: 2475 return error; 2476 2477 error1: 2478 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2479 return error; 2480 } 2481 2482 /* 2483 * Move 1 record right from cur/level if possible. 2484 * Update cur to reflect the new path. 2485 */ 2486 STATIC int /* error */ 2487 xfs_btree_rshift( 2488 struct xfs_btree_cur *cur, 2489 int level, 2490 int *stat) /* success/failure */ 2491 { 2492 struct xfs_buf *lbp; /* left buffer pointer */ 2493 struct xfs_btree_block *left; /* left btree block */ 2494 struct xfs_buf *rbp; /* right buffer pointer */ 2495 struct xfs_btree_block *right; /* right btree block */ 2496 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2497 union xfs_btree_ptr rptr; /* right block pointer */ 2498 union xfs_btree_key *rkp; /* right btree key */ 2499 int rrecs; /* right record count */ 2500 int lrecs; /* left record count */ 2501 int error; /* error return value */ 2502 int i; /* loop counter */ 2503 2504 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2505 (level == cur->bc_nlevels - 1)) 2506 goto out0; 2507 2508 /* Set up variables for this block as "left". */ 2509 left = xfs_btree_get_block(cur, level, &lbp); 2510 2511 #ifdef DEBUG 2512 error = xfs_btree_check_block(cur, left, level, lbp); 2513 if (error) 2514 goto error0; 2515 #endif 2516 2517 /* If we've got no right sibling then we can't shift an entry right. */ 2518 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2519 if (xfs_btree_ptr_is_null(cur, &rptr)) 2520 goto out0; 2521 2522 /* 2523 * If the cursor entry is the one that would be moved, don't 2524 * do it... it's too complicated. 2525 */ 2526 lrecs = xfs_btree_get_numrecs(left); 2527 if (cur->bc_ptrs[level] >= lrecs) 2528 goto out0; 2529 2530 /* Set up the right neighbor as "right". */ 2531 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2532 if (error) 2533 goto error0; 2534 2535 /* If it's full, it can't take another entry. */ 2536 rrecs = xfs_btree_get_numrecs(right); 2537 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2538 goto out0; 2539 2540 XFS_BTREE_STATS_INC(cur, rshift); 2541 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2542 2543 /* 2544 * Make a hole at the start of the right neighbor block, then 2545 * copy the last left block entry to the hole. 2546 */ 2547 if (level > 0) { 2548 /* It's a nonleaf. make a hole in the keys and ptrs */ 2549 union xfs_btree_key *lkp; 2550 union xfs_btree_ptr *lpp; 2551 union xfs_btree_ptr *rpp; 2552 2553 lkp = xfs_btree_key_addr(cur, lrecs, left); 2554 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2555 rkp = xfs_btree_key_addr(cur, 1, right); 2556 rpp = xfs_btree_ptr_addr(cur, 1, right); 2557 2558 for (i = rrecs - 1; i >= 0; i--) { 2559 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 2560 if (error) 2561 goto error0; 2562 } 2563 2564 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2565 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2566 2567 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level); 2568 if (error) 2569 goto error0; 2570 2571 /* Now put the new data in, and log it. */ 2572 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2573 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2574 2575 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2576 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2577 2578 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2579 xfs_btree_key_addr(cur, 2, right))); 2580 } else { 2581 /* It's a leaf. make a hole in the records */ 2582 union xfs_btree_rec *lrp; 2583 union xfs_btree_rec *rrp; 2584 2585 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2586 rrp = xfs_btree_rec_addr(cur, 1, right); 2587 2588 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2589 2590 /* Now put the new data in, and log it. */ 2591 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2592 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2593 } 2594 2595 /* 2596 * Decrement and log left's numrecs, bump and log right's numrecs. 2597 */ 2598 xfs_btree_set_numrecs(left, --lrecs); 2599 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2600 2601 xfs_btree_set_numrecs(right, ++rrecs); 2602 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2603 2604 /* 2605 * Using a temporary cursor, update the parent key values of the 2606 * block on the right. 2607 */ 2608 error = xfs_btree_dup_cursor(cur, &tcur); 2609 if (error) 2610 goto error0; 2611 i = xfs_btree_lastrec(tcur, level); 2612 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2613 2614 error = xfs_btree_increment(tcur, level, &i); 2615 if (error) 2616 goto error1; 2617 2618 /* Update the parent high keys of the left block, if needed. */ 2619 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2620 error = xfs_btree_update_keys(cur, level); 2621 if (error) 2622 goto error1; 2623 } 2624 2625 /* Update the parent keys of the right block. */ 2626 error = xfs_btree_update_keys(tcur, level); 2627 if (error) 2628 goto error1; 2629 2630 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2631 2632 *stat = 1; 2633 return 0; 2634 2635 out0: 2636 *stat = 0; 2637 return 0; 2638 2639 error0: 2640 return error; 2641 2642 error1: 2643 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2644 return error; 2645 } 2646 2647 /* 2648 * Split cur/level block in half. 2649 * Return new block number and the key to its first 2650 * record (to be inserted into parent). 2651 */ 2652 STATIC int /* error */ 2653 __xfs_btree_split( 2654 struct xfs_btree_cur *cur, 2655 int level, 2656 union xfs_btree_ptr *ptrp, 2657 union xfs_btree_key *key, 2658 struct xfs_btree_cur **curp, 2659 int *stat) /* success/failure */ 2660 { 2661 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2662 struct xfs_buf *lbp; /* left buffer pointer */ 2663 struct xfs_btree_block *left; /* left btree block */ 2664 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2665 struct xfs_buf *rbp; /* right buffer pointer */ 2666 struct xfs_btree_block *right; /* right btree block */ 2667 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2668 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2669 struct xfs_btree_block *rrblock; /* right-right btree block */ 2670 int lrecs; 2671 int rrecs; 2672 int src_index; 2673 int error; /* error return value */ 2674 int i; 2675 2676 XFS_BTREE_STATS_INC(cur, split); 2677 2678 /* Set up left block (current one). */ 2679 left = xfs_btree_get_block(cur, level, &lbp); 2680 2681 #ifdef DEBUG 2682 error = xfs_btree_check_block(cur, left, level, lbp); 2683 if (error) 2684 goto error0; 2685 #endif 2686 2687 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2688 2689 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2690 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2691 if (error) 2692 goto error0; 2693 if (*stat == 0) 2694 goto out0; 2695 XFS_BTREE_STATS_INC(cur, alloc); 2696 2697 /* Set up the new block as "right". */ 2698 error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp); 2699 if (error) 2700 goto error0; 2701 2702 /* Fill in the btree header for the new right block. */ 2703 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2704 2705 /* 2706 * Split the entries between the old and the new block evenly. 2707 * Make sure that if there's an odd number of entries now, that 2708 * each new block will have the same number of entries. 2709 */ 2710 lrecs = xfs_btree_get_numrecs(left); 2711 rrecs = lrecs / 2; 2712 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) 2713 rrecs++; 2714 src_index = (lrecs - rrecs + 1); 2715 2716 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2717 2718 /* Adjust numrecs for the later get_*_keys() calls. */ 2719 lrecs -= rrecs; 2720 xfs_btree_set_numrecs(left, lrecs); 2721 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2722 2723 /* 2724 * Copy btree block entries from the left block over to the 2725 * new block, the right. Update the right block and log the 2726 * changes. 2727 */ 2728 if (level > 0) { 2729 /* It's a non-leaf. Move keys and pointers. */ 2730 union xfs_btree_key *lkp; /* left btree key */ 2731 union xfs_btree_ptr *lpp; /* left address pointer */ 2732 union xfs_btree_key *rkp; /* right btree key */ 2733 union xfs_btree_ptr *rpp; /* right address pointer */ 2734 2735 lkp = xfs_btree_key_addr(cur, src_index, left); 2736 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2737 rkp = xfs_btree_key_addr(cur, 1, right); 2738 rpp = xfs_btree_ptr_addr(cur, 1, right); 2739 2740 for (i = src_index; i < rrecs; i++) { 2741 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 2742 if (error) 2743 goto error0; 2744 } 2745 2746 /* Copy the keys & pointers to the new block. */ 2747 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2748 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2749 2750 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2751 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2752 2753 /* Stash the keys of the new block for later insertion. */ 2754 xfs_btree_get_node_keys(cur, right, key); 2755 } else { 2756 /* It's a leaf. Move records. */ 2757 union xfs_btree_rec *lrp; /* left record pointer */ 2758 union xfs_btree_rec *rrp; /* right record pointer */ 2759 2760 lrp = xfs_btree_rec_addr(cur, src_index, left); 2761 rrp = xfs_btree_rec_addr(cur, 1, right); 2762 2763 /* Copy records to the new block. */ 2764 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2765 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2766 2767 /* Stash the keys of the new block for later insertion. */ 2768 xfs_btree_get_leaf_keys(cur, right, key); 2769 } 2770 2771 /* 2772 * Find the left block number by looking in the buffer. 2773 * Adjust sibling pointers. 2774 */ 2775 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2776 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2777 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2778 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2779 2780 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2781 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2782 2783 /* 2784 * If there's a block to the new block's right, make that block 2785 * point back to right instead of to left. 2786 */ 2787 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2788 error = xfs_btree_read_buf_block(cur, &rrptr, 2789 0, &rrblock, &rrbp); 2790 if (error) 2791 goto error0; 2792 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2793 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2794 } 2795 2796 /* Update the parent high keys of the left block, if needed. */ 2797 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2798 error = xfs_btree_update_keys(cur, level); 2799 if (error) 2800 goto error0; 2801 } 2802 2803 /* 2804 * If the cursor is really in the right block, move it there. 2805 * If it's just pointing past the last entry in left, then we'll 2806 * insert there, so don't change anything in that case. 2807 */ 2808 if (cur->bc_ptrs[level] > lrecs + 1) { 2809 xfs_btree_setbuf(cur, level, rbp); 2810 cur->bc_ptrs[level] -= lrecs; 2811 } 2812 /* 2813 * If there are more levels, we'll need another cursor which refers 2814 * the right block, no matter where this cursor was. 2815 */ 2816 if (level + 1 < cur->bc_nlevels) { 2817 error = xfs_btree_dup_cursor(cur, curp); 2818 if (error) 2819 goto error0; 2820 (*curp)->bc_ptrs[level + 1]++; 2821 } 2822 *ptrp = rptr; 2823 *stat = 1; 2824 return 0; 2825 out0: 2826 *stat = 0; 2827 return 0; 2828 2829 error0: 2830 return error; 2831 } 2832 2833 struct xfs_btree_split_args { 2834 struct xfs_btree_cur *cur; 2835 int level; 2836 union xfs_btree_ptr *ptrp; 2837 union xfs_btree_key *key; 2838 struct xfs_btree_cur **curp; 2839 int *stat; /* success/failure */ 2840 int result; 2841 bool kswapd; /* allocation in kswapd context */ 2842 struct completion *done; 2843 struct work_struct work; 2844 }; 2845 2846 /* 2847 * Stack switching interfaces for allocation 2848 */ 2849 static void 2850 xfs_btree_split_worker( 2851 struct work_struct *work) 2852 { 2853 struct xfs_btree_split_args *args = container_of(work, 2854 struct xfs_btree_split_args, work); 2855 unsigned long pflags; 2856 unsigned long new_pflags = PF_MEMALLOC_NOFS; 2857 2858 /* 2859 * we are in a transaction context here, but may also be doing work 2860 * in kswapd context, and hence we may need to inherit that state 2861 * temporarily to ensure that we don't block waiting for memory reclaim 2862 * in any way. 2863 */ 2864 if (args->kswapd) 2865 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2866 2867 current_set_flags_nested(&pflags, new_pflags); 2868 2869 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2870 args->key, args->curp, args->stat); 2871 complete(args->done); 2872 2873 current_restore_flags_nested(&pflags, new_pflags); 2874 } 2875 2876 /* 2877 * BMBT split requests often come in with little stack to work on. Push 2878 * them off to a worker thread so there is lots of stack to use. For the other 2879 * btree types, just call directly to avoid the context switch overhead here. 2880 */ 2881 STATIC int /* error */ 2882 xfs_btree_split( 2883 struct xfs_btree_cur *cur, 2884 int level, 2885 union xfs_btree_ptr *ptrp, 2886 union xfs_btree_key *key, 2887 struct xfs_btree_cur **curp, 2888 int *stat) /* success/failure */ 2889 { 2890 struct xfs_btree_split_args args; 2891 DECLARE_COMPLETION_ONSTACK(done); 2892 2893 if (cur->bc_btnum != XFS_BTNUM_BMAP) 2894 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2895 2896 args.cur = cur; 2897 args.level = level; 2898 args.ptrp = ptrp; 2899 args.key = key; 2900 args.curp = curp; 2901 args.stat = stat; 2902 args.done = &done; 2903 args.kswapd = current_is_kswapd(); 2904 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2905 queue_work(xfs_alloc_wq, &args.work); 2906 wait_for_completion(&done); 2907 destroy_work_on_stack(&args.work); 2908 return args.result; 2909 } 2910 2911 2912 /* 2913 * Copy the old inode root contents into a real block and make the 2914 * broot point to it. 2915 */ 2916 int /* error */ 2917 xfs_btree_new_iroot( 2918 struct xfs_btree_cur *cur, /* btree cursor */ 2919 int *logflags, /* logging flags for inode */ 2920 int *stat) /* return status - 0 fail */ 2921 { 2922 struct xfs_buf *cbp; /* buffer for cblock */ 2923 struct xfs_btree_block *block; /* btree block */ 2924 struct xfs_btree_block *cblock; /* child btree block */ 2925 union xfs_btree_key *ckp; /* child key pointer */ 2926 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2927 union xfs_btree_key *kp; /* pointer to btree key */ 2928 union xfs_btree_ptr *pp; /* pointer to block addr */ 2929 union xfs_btree_ptr nptr; /* new block addr */ 2930 int level; /* btree level */ 2931 int error; /* error return code */ 2932 int i; /* loop counter */ 2933 2934 XFS_BTREE_STATS_INC(cur, newroot); 2935 2936 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2937 2938 level = cur->bc_nlevels - 1; 2939 2940 block = xfs_btree_get_iroot(cur); 2941 pp = xfs_btree_ptr_addr(cur, 1, block); 2942 2943 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2944 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 2945 if (error) 2946 goto error0; 2947 if (*stat == 0) 2948 return 0; 2949 2950 XFS_BTREE_STATS_INC(cur, alloc); 2951 2952 /* Copy the root into a real block. */ 2953 error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp); 2954 if (error) 2955 goto error0; 2956 2957 /* 2958 * we can't just memcpy() the root in for CRC enabled btree blocks. 2959 * In that case have to also ensure the blkno remains correct 2960 */ 2961 memcpy(cblock, block, xfs_btree_block_len(cur)); 2962 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 2963 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 2964 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); 2965 else 2966 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); 2967 } 2968 2969 be16_add_cpu(&block->bb_level, 1); 2970 xfs_btree_set_numrecs(block, 1); 2971 cur->bc_nlevels++; 2972 cur->bc_ptrs[level + 1] = 1; 2973 2974 kp = xfs_btree_key_addr(cur, 1, block); 2975 ckp = xfs_btree_key_addr(cur, 1, cblock); 2976 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 2977 2978 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 2979 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 2980 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 2981 if (error) 2982 goto error0; 2983 } 2984 2985 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 2986 2987 error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level); 2988 if (error) 2989 goto error0; 2990 2991 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 2992 2993 xfs_iroot_realloc(cur->bc_private.b.ip, 2994 1 - xfs_btree_get_numrecs(cblock), 2995 cur->bc_private.b.whichfork); 2996 2997 xfs_btree_setbuf(cur, level, cbp); 2998 2999 /* 3000 * Do all this logging at the end so that 3001 * the root is at the right level. 3002 */ 3003 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 3004 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3005 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3006 3007 *logflags |= 3008 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); 3009 *stat = 1; 3010 return 0; 3011 error0: 3012 return error; 3013 } 3014 3015 /* 3016 * Allocate a new root block, fill it in. 3017 */ 3018 STATIC int /* error */ 3019 xfs_btree_new_root( 3020 struct xfs_btree_cur *cur, /* btree cursor */ 3021 int *stat) /* success/failure */ 3022 { 3023 struct xfs_btree_block *block; /* one half of the old root block */ 3024 struct xfs_buf *bp; /* buffer containing block */ 3025 int error; /* error return value */ 3026 struct xfs_buf *lbp; /* left buffer pointer */ 3027 struct xfs_btree_block *left; /* left btree block */ 3028 struct xfs_buf *nbp; /* new (root) buffer */ 3029 struct xfs_btree_block *new; /* new (root) btree block */ 3030 int nptr; /* new value for key index, 1 or 2 */ 3031 struct xfs_buf *rbp; /* right buffer pointer */ 3032 struct xfs_btree_block *right; /* right btree block */ 3033 union xfs_btree_ptr rptr; 3034 union xfs_btree_ptr lptr; 3035 3036 XFS_BTREE_STATS_INC(cur, newroot); 3037 3038 /* initialise our start point from the cursor */ 3039 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 3040 3041 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 3042 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 3043 if (error) 3044 goto error0; 3045 if (*stat == 0) 3046 goto out0; 3047 XFS_BTREE_STATS_INC(cur, alloc); 3048 3049 /* Set up the new block. */ 3050 error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp); 3051 if (error) 3052 goto error0; 3053 3054 /* Set the root in the holding structure increasing the level by 1. */ 3055 cur->bc_ops->set_root(cur, &lptr, 1); 3056 3057 /* 3058 * At the previous root level there are now two blocks: the old root, 3059 * and the new block generated when it was split. We don't know which 3060 * one the cursor is pointing at, so we set up variables "left" and 3061 * "right" for each case. 3062 */ 3063 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 3064 3065 #ifdef DEBUG 3066 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 3067 if (error) 3068 goto error0; 3069 #endif 3070 3071 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3072 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3073 /* Our block is left, pick up the right block. */ 3074 lbp = bp; 3075 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 3076 left = block; 3077 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3078 if (error) 3079 goto error0; 3080 bp = rbp; 3081 nptr = 1; 3082 } else { 3083 /* Our block is right, pick up the left block. */ 3084 rbp = bp; 3085 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 3086 right = block; 3087 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 3088 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3089 if (error) 3090 goto error0; 3091 bp = lbp; 3092 nptr = 2; 3093 } 3094 3095 /* Fill in the new block's btree header and log it. */ 3096 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3097 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3098 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 3099 !xfs_btree_ptr_is_null(cur, &rptr)); 3100 3101 /* Fill in the key data in the new root. */ 3102 if (xfs_btree_get_level(left) > 0) { 3103 /* 3104 * Get the keys for the left block's keys and put them directly 3105 * in the parent block. Do the same for the right block. 3106 */ 3107 xfs_btree_get_node_keys(cur, left, 3108 xfs_btree_key_addr(cur, 1, new)); 3109 xfs_btree_get_node_keys(cur, right, 3110 xfs_btree_key_addr(cur, 2, new)); 3111 } else { 3112 /* 3113 * Get the keys for the left block's records and put them 3114 * directly in the parent block. Do the same for the right 3115 * block. 3116 */ 3117 xfs_btree_get_leaf_keys(cur, left, 3118 xfs_btree_key_addr(cur, 1, new)); 3119 xfs_btree_get_leaf_keys(cur, right, 3120 xfs_btree_key_addr(cur, 2, new)); 3121 } 3122 xfs_btree_log_keys(cur, nbp, 1, 2); 3123 3124 /* Fill in the pointer data in the new root. */ 3125 xfs_btree_copy_ptrs(cur, 3126 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 3127 xfs_btree_copy_ptrs(cur, 3128 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 3129 xfs_btree_log_ptrs(cur, nbp, 1, 2); 3130 3131 /* Fix up the cursor. */ 3132 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 3133 cur->bc_ptrs[cur->bc_nlevels] = nptr; 3134 cur->bc_nlevels++; 3135 *stat = 1; 3136 return 0; 3137 error0: 3138 return error; 3139 out0: 3140 *stat = 0; 3141 return 0; 3142 } 3143 3144 STATIC int 3145 xfs_btree_make_block_unfull( 3146 struct xfs_btree_cur *cur, /* btree cursor */ 3147 int level, /* btree level */ 3148 int numrecs,/* # of recs in block */ 3149 int *oindex,/* old tree index */ 3150 int *index, /* new tree index */ 3151 union xfs_btree_ptr *nptr, /* new btree ptr */ 3152 struct xfs_btree_cur **ncur, /* new btree cursor */ 3153 union xfs_btree_key *key, /* key of new block */ 3154 int *stat) 3155 { 3156 int error = 0; 3157 3158 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3159 level == cur->bc_nlevels - 1) { 3160 struct xfs_inode *ip = cur->bc_private.b.ip; 3161 3162 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3163 /* A root block that can be made bigger. */ 3164 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 3165 *stat = 1; 3166 } else { 3167 /* A root block that needs replacing */ 3168 int logflags = 0; 3169 3170 error = xfs_btree_new_iroot(cur, &logflags, stat); 3171 if (error || *stat == 0) 3172 return error; 3173 3174 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 3175 } 3176 3177 return 0; 3178 } 3179 3180 /* First, try shifting an entry to the right neighbor. */ 3181 error = xfs_btree_rshift(cur, level, stat); 3182 if (error || *stat) 3183 return error; 3184 3185 /* Next, try shifting an entry to the left neighbor. */ 3186 error = xfs_btree_lshift(cur, level, stat); 3187 if (error) 3188 return error; 3189 3190 if (*stat) { 3191 *oindex = *index = cur->bc_ptrs[level]; 3192 return 0; 3193 } 3194 3195 /* 3196 * Next, try splitting the current block in half. 3197 * 3198 * If this works we have to re-set our variables because we 3199 * could be in a different block now. 3200 */ 3201 error = xfs_btree_split(cur, level, nptr, key, ncur, stat); 3202 if (error || *stat == 0) 3203 return error; 3204 3205 3206 *index = cur->bc_ptrs[level]; 3207 return 0; 3208 } 3209 3210 /* 3211 * Insert one record/level. Return information to the caller 3212 * allowing the next level up to proceed if necessary. 3213 */ 3214 STATIC int 3215 xfs_btree_insrec( 3216 struct xfs_btree_cur *cur, /* btree cursor */ 3217 int level, /* level to insert record at */ 3218 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3219 union xfs_btree_rec *rec, /* record to insert */ 3220 union xfs_btree_key *key, /* i/o: block key for ptrp */ 3221 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3222 int *stat) /* success/failure */ 3223 { 3224 struct xfs_btree_block *block; /* btree block */ 3225 struct xfs_buf *bp; /* buffer for block */ 3226 union xfs_btree_ptr nptr; /* new block ptr */ 3227 struct xfs_btree_cur *ncur; /* new btree cursor */ 3228 union xfs_btree_key nkey; /* new block key */ 3229 union xfs_btree_key *lkey; 3230 int optr; /* old key/record index */ 3231 int ptr; /* key/record index */ 3232 int numrecs;/* number of records */ 3233 int error; /* error return value */ 3234 int i; 3235 xfs_daddr_t old_bn; 3236 3237 ncur = NULL; 3238 lkey = &nkey; 3239 3240 /* 3241 * If we have an external root pointer, and we've made it to the 3242 * root level, allocate a new root block and we're done. 3243 */ 3244 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3245 (level >= cur->bc_nlevels)) { 3246 error = xfs_btree_new_root(cur, stat); 3247 xfs_btree_set_ptr_null(cur, ptrp); 3248 3249 return error; 3250 } 3251 3252 /* If we're off the left edge, return failure. */ 3253 ptr = cur->bc_ptrs[level]; 3254 if (ptr == 0) { 3255 *stat = 0; 3256 return 0; 3257 } 3258 3259 optr = ptr; 3260 3261 XFS_BTREE_STATS_INC(cur, insrec); 3262 3263 /* Get pointers to the btree buffer and block. */ 3264 block = xfs_btree_get_block(cur, level, &bp); 3265 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL; 3266 numrecs = xfs_btree_get_numrecs(block); 3267 3268 #ifdef DEBUG 3269 error = xfs_btree_check_block(cur, block, level, bp); 3270 if (error) 3271 goto error0; 3272 3273 /* Check that the new entry is being inserted in the right place. */ 3274 if (ptr <= numrecs) { 3275 if (level == 0) { 3276 ASSERT(cur->bc_ops->recs_inorder(cur, rec, 3277 xfs_btree_rec_addr(cur, ptr, block))); 3278 } else { 3279 ASSERT(cur->bc_ops->keys_inorder(cur, key, 3280 xfs_btree_key_addr(cur, ptr, block))); 3281 } 3282 } 3283 #endif 3284 3285 /* 3286 * If the block is full, we can't insert the new entry until we 3287 * make the block un-full. 3288 */ 3289 xfs_btree_set_ptr_null(cur, &nptr); 3290 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3291 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3292 &optr, &ptr, &nptr, &ncur, lkey, stat); 3293 if (error || *stat == 0) 3294 goto error0; 3295 } 3296 3297 /* 3298 * The current block may have changed if the block was 3299 * previously full and we have just made space in it. 3300 */ 3301 block = xfs_btree_get_block(cur, level, &bp); 3302 numrecs = xfs_btree_get_numrecs(block); 3303 3304 #ifdef DEBUG 3305 error = xfs_btree_check_block(cur, block, level, bp); 3306 if (error) 3307 return error; 3308 #endif 3309 3310 /* 3311 * At this point we know there's room for our new entry in the block 3312 * we're pointing at. 3313 */ 3314 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3315 3316 if (level > 0) { 3317 /* It's a nonleaf. make a hole in the keys and ptrs */ 3318 union xfs_btree_key *kp; 3319 union xfs_btree_ptr *pp; 3320 3321 kp = xfs_btree_key_addr(cur, ptr, block); 3322 pp = xfs_btree_ptr_addr(cur, ptr, block); 3323 3324 for (i = numrecs - ptr; i >= 0; i--) { 3325 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 3326 if (error) 3327 return error; 3328 } 3329 3330 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3331 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3332 3333 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level); 3334 if (error) 3335 goto error0; 3336 3337 /* Now put the new data in, bump numrecs and log it. */ 3338 xfs_btree_copy_keys(cur, kp, key, 1); 3339 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3340 numrecs++; 3341 xfs_btree_set_numrecs(block, numrecs); 3342 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3343 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3344 #ifdef DEBUG 3345 if (ptr < numrecs) { 3346 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3347 xfs_btree_key_addr(cur, ptr + 1, block))); 3348 } 3349 #endif 3350 } else { 3351 /* It's a leaf. make a hole in the records */ 3352 union xfs_btree_rec *rp; 3353 3354 rp = xfs_btree_rec_addr(cur, ptr, block); 3355 3356 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3357 3358 /* Now put the new data in, bump numrecs and log it. */ 3359 xfs_btree_copy_recs(cur, rp, rec, 1); 3360 xfs_btree_set_numrecs(block, ++numrecs); 3361 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3362 #ifdef DEBUG 3363 if (ptr < numrecs) { 3364 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3365 xfs_btree_rec_addr(cur, ptr + 1, block))); 3366 } 3367 #endif 3368 } 3369 3370 /* Log the new number of records in the btree header. */ 3371 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3372 3373 /* 3374 * If we just inserted into a new tree block, we have to 3375 * recalculate nkey here because nkey is out of date. 3376 * 3377 * Otherwise we're just updating an existing block (having shoved 3378 * some records into the new tree block), so use the regular key 3379 * update mechanism. 3380 */ 3381 if (bp && bp->b_bn != old_bn) { 3382 xfs_btree_get_keys(cur, block, lkey); 3383 } else if (xfs_btree_needs_key_update(cur, optr)) { 3384 error = xfs_btree_update_keys(cur, level); 3385 if (error) 3386 goto error0; 3387 } 3388 3389 /* 3390 * If we are tracking the last record in the tree and 3391 * we are at the far right edge of the tree, update it. 3392 */ 3393 if (xfs_btree_is_lastrec(cur, block, level)) { 3394 cur->bc_ops->update_lastrec(cur, block, rec, 3395 ptr, LASTREC_INSREC); 3396 } 3397 3398 /* 3399 * Return the new block number, if any. 3400 * If there is one, give back a record value and a cursor too. 3401 */ 3402 *ptrp = nptr; 3403 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3404 xfs_btree_copy_keys(cur, key, lkey, 1); 3405 *curp = ncur; 3406 } 3407 3408 *stat = 1; 3409 return 0; 3410 3411 error0: 3412 return error; 3413 } 3414 3415 /* 3416 * Insert the record at the point referenced by cur. 3417 * 3418 * A multi-level split of the tree on insert will invalidate the original 3419 * cursor. All callers of this function should assume that the cursor is 3420 * no longer valid and revalidate it. 3421 */ 3422 int 3423 xfs_btree_insert( 3424 struct xfs_btree_cur *cur, 3425 int *stat) 3426 { 3427 int error; /* error return value */ 3428 int i; /* result value, 0 for failure */ 3429 int level; /* current level number in btree */ 3430 union xfs_btree_ptr nptr; /* new block number (split result) */ 3431 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3432 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3433 union xfs_btree_key bkey; /* key of block to insert */ 3434 union xfs_btree_key *key; 3435 union xfs_btree_rec rec; /* record to insert */ 3436 3437 level = 0; 3438 ncur = NULL; 3439 pcur = cur; 3440 key = &bkey; 3441 3442 xfs_btree_set_ptr_null(cur, &nptr); 3443 3444 /* Make a key out of the record data to be inserted, and save it. */ 3445 cur->bc_ops->init_rec_from_cur(cur, &rec); 3446 cur->bc_ops->init_key_from_rec(key, &rec); 3447 3448 /* 3449 * Loop going up the tree, starting at the leaf level. 3450 * Stop when we don't get a split block, that must mean that 3451 * the insert is finished with this level. 3452 */ 3453 do { 3454 /* 3455 * Insert nrec/nptr into this level of the tree. 3456 * Note if we fail, nptr will be null. 3457 */ 3458 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, 3459 &ncur, &i); 3460 if (error) { 3461 if (pcur != cur) 3462 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3463 goto error0; 3464 } 3465 3466 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3467 level++; 3468 3469 /* 3470 * See if the cursor we just used is trash. 3471 * Can't trash the caller's cursor, but otherwise we should 3472 * if ncur is a new cursor or we're about to be done. 3473 */ 3474 if (pcur != cur && 3475 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3476 /* Save the state from the cursor before we trash it */ 3477 if (cur->bc_ops->update_cursor) 3478 cur->bc_ops->update_cursor(pcur, cur); 3479 cur->bc_nlevels = pcur->bc_nlevels; 3480 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3481 } 3482 /* If we got a new cursor, switch to it. */ 3483 if (ncur) { 3484 pcur = ncur; 3485 ncur = NULL; 3486 } 3487 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3488 3489 *stat = i; 3490 return 0; 3491 error0: 3492 return error; 3493 } 3494 3495 /* 3496 * Try to merge a non-leaf block back into the inode root. 3497 * 3498 * Note: the killroot names comes from the fact that we're effectively 3499 * killing the old root block. But because we can't just delete the 3500 * inode we have to copy the single block it was pointing to into the 3501 * inode. 3502 */ 3503 STATIC int 3504 xfs_btree_kill_iroot( 3505 struct xfs_btree_cur *cur) 3506 { 3507 int whichfork = cur->bc_private.b.whichfork; 3508 struct xfs_inode *ip = cur->bc_private.b.ip; 3509 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3510 struct xfs_btree_block *block; 3511 struct xfs_btree_block *cblock; 3512 union xfs_btree_key *kp; 3513 union xfs_btree_key *ckp; 3514 union xfs_btree_ptr *pp; 3515 union xfs_btree_ptr *cpp; 3516 struct xfs_buf *cbp; 3517 int level; 3518 int index; 3519 int numrecs; 3520 int error; 3521 #ifdef DEBUG 3522 union xfs_btree_ptr ptr; 3523 #endif 3524 int i; 3525 3526 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3527 ASSERT(cur->bc_nlevels > 1); 3528 3529 /* 3530 * Don't deal with the root block needs to be a leaf case. 3531 * We're just going to turn the thing back into extents anyway. 3532 */ 3533 level = cur->bc_nlevels - 1; 3534 if (level == 1) 3535 goto out0; 3536 3537 /* 3538 * Give up if the root has multiple children. 3539 */ 3540 block = xfs_btree_get_iroot(cur); 3541 if (xfs_btree_get_numrecs(block) != 1) 3542 goto out0; 3543 3544 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3545 numrecs = xfs_btree_get_numrecs(cblock); 3546 3547 /* 3548 * Only do this if the next level will fit. 3549 * Then the data must be copied up to the inode, 3550 * instead of freeing the root you free the next level. 3551 */ 3552 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3553 goto out0; 3554 3555 XFS_BTREE_STATS_INC(cur, killroot); 3556 3557 #ifdef DEBUG 3558 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3559 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3560 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3561 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3562 #endif 3563 3564 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3565 if (index) { 3566 xfs_iroot_realloc(cur->bc_private.b.ip, index, 3567 cur->bc_private.b.whichfork); 3568 block = ifp->if_broot; 3569 } 3570 3571 be16_add_cpu(&block->bb_numrecs, index); 3572 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3573 3574 kp = xfs_btree_key_addr(cur, 1, block); 3575 ckp = xfs_btree_key_addr(cur, 1, cblock); 3576 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3577 3578 pp = xfs_btree_ptr_addr(cur, 1, block); 3579 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3580 3581 for (i = 0; i < numrecs; i++) { 3582 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1); 3583 if (error) 3584 return error; 3585 } 3586 3587 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3588 3589 error = xfs_btree_free_block(cur, cbp); 3590 if (error) 3591 return error; 3592 3593 cur->bc_bufs[level - 1] = NULL; 3594 be16_add_cpu(&block->bb_level, -1); 3595 xfs_trans_log_inode(cur->bc_tp, ip, 3596 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 3597 cur->bc_nlevels--; 3598 out0: 3599 return 0; 3600 } 3601 3602 /* 3603 * Kill the current root node, and replace it with it's only child node. 3604 */ 3605 STATIC int 3606 xfs_btree_kill_root( 3607 struct xfs_btree_cur *cur, 3608 struct xfs_buf *bp, 3609 int level, 3610 union xfs_btree_ptr *newroot) 3611 { 3612 int error; 3613 3614 XFS_BTREE_STATS_INC(cur, killroot); 3615 3616 /* 3617 * Update the root pointer, decreasing the level by 1 and then 3618 * free the old root. 3619 */ 3620 cur->bc_ops->set_root(cur, newroot, -1); 3621 3622 error = xfs_btree_free_block(cur, bp); 3623 if (error) 3624 return error; 3625 3626 cur->bc_bufs[level] = NULL; 3627 cur->bc_ra[level] = 0; 3628 cur->bc_nlevels--; 3629 3630 return 0; 3631 } 3632 3633 STATIC int 3634 xfs_btree_dec_cursor( 3635 struct xfs_btree_cur *cur, 3636 int level, 3637 int *stat) 3638 { 3639 int error; 3640 int i; 3641 3642 if (level > 0) { 3643 error = xfs_btree_decrement(cur, level, &i); 3644 if (error) 3645 return error; 3646 } 3647 3648 *stat = 1; 3649 return 0; 3650 } 3651 3652 /* 3653 * Single level of the btree record deletion routine. 3654 * Delete record pointed to by cur/level. 3655 * Remove the record from its block then rebalance the tree. 3656 * Return 0 for error, 1 for done, 2 to go on to the next level. 3657 */ 3658 STATIC int /* error */ 3659 xfs_btree_delrec( 3660 struct xfs_btree_cur *cur, /* btree cursor */ 3661 int level, /* level removing record from */ 3662 int *stat) /* fail/done/go-on */ 3663 { 3664 struct xfs_btree_block *block; /* btree block */ 3665 union xfs_btree_ptr cptr; /* current block ptr */ 3666 struct xfs_buf *bp; /* buffer for block */ 3667 int error; /* error return value */ 3668 int i; /* loop counter */ 3669 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3670 struct xfs_buf *lbp; /* left buffer pointer */ 3671 struct xfs_btree_block *left; /* left btree block */ 3672 int lrecs = 0; /* left record count */ 3673 int ptr; /* key/record index */ 3674 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3675 struct xfs_buf *rbp; /* right buffer pointer */ 3676 struct xfs_btree_block *right; /* right btree block */ 3677 struct xfs_btree_block *rrblock; /* right-right btree block */ 3678 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3679 int rrecs = 0; /* right record count */ 3680 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3681 int numrecs; /* temporary numrec count */ 3682 3683 tcur = NULL; 3684 3685 /* Get the index of the entry being deleted, check for nothing there. */ 3686 ptr = cur->bc_ptrs[level]; 3687 if (ptr == 0) { 3688 *stat = 0; 3689 return 0; 3690 } 3691 3692 /* Get the buffer & block containing the record or key/ptr. */ 3693 block = xfs_btree_get_block(cur, level, &bp); 3694 numrecs = xfs_btree_get_numrecs(block); 3695 3696 #ifdef DEBUG 3697 error = xfs_btree_check_block(cur, block, level, bp); 3698 if (error) 3699 goto error0; 3700 #endif 3701 3702 /* Fail if we're off the end of the block. */ 3703 if (ptr > numrecs) { 3704 *stat = 0; 3705 return 0; 3706 } 3707 3708 XFS_BTREE_STATS_INC(cur, delrec); 3709 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3710 3711 /* Excise the entries being deleted. */ 3712 if (level > 0) { 3713 /* It's a nonleaf. operate on keys and ptrs */ 3714 union xfs_btree_key *lkp; 3715 union xfs_btree_ptr *lpp; 3716 3717 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3718 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3719 3720 for (i = 0; i < numrecs - ptr; i++) { 3721 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 3722 if (error) 3723 goto error0; 3724 } 3725 3726 if (ptr < numrecs) { 3727 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3728 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3729 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3730 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3731 } 3732 } else { 3733 /* It's a leaf. operate on records */ 3734 if (ptr < numrecs) { 3735 xfs_btree_shift_recs(cur, 3736 xfs_btree_rec_addr(cur, ptr + 1, block), 3737 -1, numrecs - ptr); 3738 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3739 } 3740 } 3741 3742 /* 3743 * Decrement and log the number of entries in the block. 3744 */ 3745 xfs_btree_set_numrecs(block, --numrecs); 3746 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3747 3748 /* 3749 * If we are tracking the last record in the tree and 3750 * we are at the far right edge of the tree, update it. 3751 */ 3752 if (xfs_btree_is_lastrec(cur, block, level)) { 3753 cur->bc_ops->update_lastrec(cur, block, NULL, 3754 ptr, LASTREC_DELREC); 3755 } 3756 3757 /* 3758 * We're at the root level. First, shrink the root block in-memory. 3759 * Try to get rid of the next level down. If we can't then there's 3760 * nothing left to do. 3761 */ 3762 if (level == cur->bc_nlevels - 1) { 3763 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3764 xfs_iroot_realloc(cur->bc_private.b.ip, -1, 3765 cur->bc_private.b.whichfork); 3766 3767 error = xfs_btree_kill_iroot(cur); 3768 if (error) 3769 goto error0; 3770 3771 error = xfs_btree_dec_cursor(cur, level, stat); 3772 if (error) 3773 goto error0; 3774 *stat = 1; 3775 return 0; 3776 } 3777 3778 /* 3779 * If this is the root level, and there's only one entry left, 3780 * and it's NOT the leaf level, then we can get rid of this 3781 * level. 3782 */ 3783 if (numrecs == 1 && level > 0) { 3784 union xfs_btree_ptr *pp; 3785 /* 3786 * pp is still set to the first pointer in the block. 3787 * Make it the new root of the btree. 3788 */ 3789 pp = xfs_btree_ptr_addr(cur, 1, block); 3790 error = xfs_btree_kill_root(cur, bp, level, pp); 3791 if (error) 3792 goto error0; 3793 } else if (level > 0) { 3794 error = xfs_btree_dec_cursor(cur, level, stat); 3795 if (error) 3796 goto error0; 3797 } 3798 *stat = 1; 3799 return 0; 3800 } 3801 3802 /* 3803 * If we deleted the leftmost entry in the block, update the 3804 * key values above us in the tree. 3805 */ 3806 if (xfs_btree_needs_key_update(cur, ptr)) { 3807 error = xfs_btree_update_keys(cur, level); 3808 if (error) 3809 goto error0; 3810 } 3811 3812 /* 3813 * If the number of records remaining in the block is at least 3814 * the minimum, we're done. 3815 */ 3816 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3817 error = xfs_btree_dec_cursor(cur, level, stat); 3818 if (error) 3819 goto error0; 3820 return 0; 3821 } 3822 3823 /* 3824 * Otherwise, we have to move some records around to keep the 3825 * tree balanced. Look at the left and right sibling blocks to 3826 * see if we can re-balance by moving only one record. 3827 */ 3828 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3829 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3830 3831 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3832 /* 3833 * One child of root, need to get a chance to copy its contents 3834 * into the root and delete it. Can't go up to next level, 3835 * there's nothing to delete there. 3836 */ 3837 if (xfs_btree_ptr_is_null(cur, &rptr) && 3838 xfs_btree_ptr_is_null(cur, &lptr) && 3839 level == cur->bc_nlevels - 2) { 3840 error = xfs_btree_kill_iroot(cur); 3841 if (!error) 3842 error = xfs_btree_dec_cursor(cur, level, stat); 3843 if (error) 3844 goto error0; 3845 return 0; 3846 } 3847 } 3848 3849 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3850 !xfs_btree_ptr_is_null(cur, &lptr)); 3851 3852 /* 3853 * Duplicate the cursor so our btree manipulations here won't 3854 * disrupt the next level up. 3855 */ 3856 error = xfs_btree_dup_cursor(cur, &tcur); 3857 if (error) 3858 goto error0; 3859 3860 /* 3861 * If there's a right sibling, see if it's ok to shift an entry 3862 * out of it. 3863 */ 3864 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3865 /* 3866 * Move the temp cursor to the last entry in the next block. 3867 * Actually any entry but the first would suffice. 3868 */ 3869 i = xfs_btree_lastrec(tcur, level); 3870 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3871 3872 error = xfs_btree_increment(tcur, level, &i); 3873 if (error) 3874 goto error0; 3875 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3876 3877 i = xfs_btree_lastrec(tcur, level); 3878 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3879 3880 /* Grab a pointer to the block. */ 3881 right = xfs_btree_get_block(tcur, level, &rbp); 3882 #ifdef DEBUG 3883 error = xfs_btree_check_block(tcur, right, level, rbp); 3884 if (error) 3885 goto error0; 3886 #endif 3887 /* Grab the current block number, for future use. */ 3888 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3889 3890 /* 3891 * If right block is full enough so that removing one entry 3892 * won't make it too empty, and left-shifting an entry out 3893 * of right to us works, we're done. 3894 */ 3895 if (xfs_btree_get_numrecs(right) - 1 >= 3896 cur->bc_ops->get_minrecs(tcur, level)) { 3897 error = xfs_btree_lshift(tcur, level, &i); 3898 if (error) 3899 goto error0; 3900 if (i) { 3901 ASSERT(xfs_btree_get_numrecs(block) >= 3902 cur->bc_ops->get_minrecs(tcur, level)); 3903 3904 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3905 tcur = NULL; 3906 3907 error = xfs_btree_dec_cursor(cur, level, stat); 3908 if (error) 3909 goto error0; 3910 return 0; 3911 } 3912 } 3913 3914 /* 3915 * Otherwise, grab the number of records in right for 3916 * future reference, and fix up the temp cursor to point 3917 * to our block again (last record). 3918 */ 3919 rrecs = xfs_btree_get_numrecs(right); 3920 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3921 i = xfs_btree_firstrec(tcur, level); 3922 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3923 3924 error = xfs_btree_decrement(tcur, level, &i); 3925 if (error) 3926 goto error0; 3927 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3928 } 3929 } 3930 3931 /* 3932 * If there's a left sibling, see if it's ok to shift an entry 3933 * out of it. 3934 */ 3935 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3936 /* 3937 * Move the temp cursor to the first entry in the 3938 * previous block. 3939 */ 3940 i = xfs_btree_firstrec(tcur, level); 3941 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3942 3943 error = xfs_btree_decrement(tcur, level, &i); 3944 if (error) 3945 goto error0; 3946 i = xfs_btree_firstrec(tcur, level); 3947 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3948 3949 /* Grab a pointer to the block. */ 3950 left = xfs_btree_get_block(tcur, level, &lbp); 3951 #ifdef DEBUG 3952 error = xfs_btree_check_block(cur, left, level, lbp); 3953 if (error) 3954 goto error0; 3955 #endif 3956 /* Grab the current block number, for future use. */ 3957 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 3958 3959 /* 3960 * If left block is full enough so that removing one entry 3961 * won't make it too empty, and right-shifting an entry out 3962 * of left to us works, we're done. 3963 */ 3964 if (xfs_btree_get_numrecs(left) - 1 >= 3965 cur->bc_ops->get_minrecs(tcur, level)) { 3966 error = xfs_btree_rshift(tcur, level, &i); 3967 if (error) 3968 goto error0; 3969 if (i) { 3970 ASSERT(xfs_btree_get_numrecs(block) >= 3971 cur->bc_ops->get_minrecs(tcur, level)); 3972 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3973 tcur = NULL; 3974 if (level == 0) 3975 cur->bc_ptrs[0]++; 3976 3977 *stat = 1; 3978 return 0; 3979 } 3980 } 3981 3982 /* 3983 * Otherwise, grab the number of records in right for 3984 * future reference. 3985 */ 3986 lrecs = xfs_btree_get_numrecs(left); 3987 } 3988 3989 /* Delete the temp cursor, we're done with it. */ 3990 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3991 tcur = NULL; 3992 3993 /* If here, we need to do a join to keep the tree balanced. */ 3994 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 3995 3996 if (!xfs_btree_ptr_is_null(cur, &lptr) && 3997 lrecs + xfs_btree_get_numrecs(block) <= 3998 cur->bc_ops->get_maxrecs(cur, level)) { 3999 /* 4000 * Set "right" to be the starting block, 4001 * "left" to be the left neighbor. 4002 */ 4003 rptr = cptr; 4004 right = block; 4005 rbp = bp; 4006 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 4007 if (error) 4008 goto error0; 4009 4010 /* 4011 * If that won't work, see if we can join with the right neighbor block. 4012 */ 4013 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 4014 rrecs + xfs_btree_get_numrecs(block) <= 4015 cur->bc_ops->get_maxrecs(cur, level)) { 4016 /* 4017 * Set "left" to be the starting block, 4018 * "right" to be the right neighbor. 4019 */ 4020 lptr = cptr; 4021 left = block; 4022 lbp = bp; 4023 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 4024 if (error) 4025 goto error0; 4026 4027 /* 4028 * Otherwise, we can't fix the imbalance. 4029 * Just return. This is probably a logic error, but it's not fatal. 4030 */ 4031 } else { 4032 error = xfs_btree_dec_cursor(cur, level, stat); 4033 if (error) 4034 goto error0; 4035 return 0; 4036 } 4037 4038 rrecs = xfs_btree_get_numrecs(right); 4039 lrecs = xfs_btree_get_numrecs(left); 4040 4041 /* 4042 * We're now going to join "left" and "right" by moving all the stuff 4043 * in "right" to "left" and deleting "right". 4044 */ 4045 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 4046 if (level > 0) { 4047 /* It's a non-leaf. Move keys and pointers. */ 4048 union xfs_btree_key *lkp; /* left btree key */ 4049 union xfs_btree_ptr *lpp; /* left address pointer */ 4050 union xfs_btree_key *rkp; /* right btree key */ 4051 union xfs_btree_ptr *rpp; /* right address pointer */ 4052 4053 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 4054 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 4055 rkp = xfs_btree_key_addr(cur, 1, right); 4056 rpp = xfs_btree_ptr_addr(cur, 1, right); 4057 4058 for (i = 1; i < rrecs; i++) { 4059 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 4060 if (error) 4061 goto error0; 4062 } 4063 4064 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 4065 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 4066 4067 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 4068 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 4069 } else { 4070 /* It's a leaf. Move records. */ 4071 union xfs_btree_rec *lrp; /* left record pointer */ 4072 union xfs_btree_rec *rrp; /* right record pointer */ 4073 4074 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 4075 rrp = xfs_btree_rec_addr(cur, 1, right); 4076 4077 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 4078 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 4079 } 4080 4081 XFS_BTREE_STATS_INC(cur, join); 4082 4083 /* 4084 * Fix up the number of records and right block pointer in the 4085 * surviving block, and log it. 4086 */ 4087 xfs_btree_set_numrecs(left, lrecs + rrecs); 4088 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), 4089 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4090 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 4091 4092 /* If there is a right sibling, point it to the remaining block. */ 4093 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4094 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 4095 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 4096 if (error) 4097 goto error0; 4098 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 4099 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 4100 } 4101 4102 /* Free the deleted block. */ 4103 error = xfs_btree_free_block(cur, rbp); 4104 if (error) 4105 goto error0; 4106 4107 /* 4108 * If we joined with the left neighbor, set the buffer in the 4109 * cursor to the left block, and fix up the index. 4110 */ 4111 if (bp != lbp) { 4112 cur->bc_bufs[level] = lbp; 4113 cur->bc_ptrs[level] += lrecs; 4114 cur->bc_ra[level] = 0; 4115 } 4116 /* 4117 * If we joined with the right neighbor and there's a level above 4118 * us, increment the cursor at that level. 4119 */ 4120 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 4121 (level + 1 < cur->bc_nlevels)) { 4122 error = xfs_btree_increment(cur, level + 1, &i); 4123 if (error) 4124 goto error0; 4125 } 4126 4127 /* 4128 * Readjust the ptr at this level if it's not a leaf, since it's 4129 * still pointing at the deletion point, which makes the cursor 4130 * inconsistent. If this makes the ptr 0, the caller fixes it up. 4131 * We can't use decrement because it would change the next level up. 4132 */ 4133 if (level > 0) 4134 cur->bc_ptrs[level]--; 4135 4136 /* 4137 * We combined blocks, so we have to update the parent keys if the 4138 * btree supports overlapped intervals. However, bc_ptrs[level + 1] 4139 * points to the old block so that the caller knows which record to 4140 * delete. Therefore, the caller must be savvy enough to call updkeys 4141 * for us if we return stat == 2. The other exit points from this 4142 * function don't require deletions further up the tree, so they can 4143 * call updkeys directly. 4144 */ 4145 4146 /* Return value means the next level up has something to do. */ 4147 *stat = 2; 4148 return 0; 4149 4150 error0: 4151 if (tcur) 4152 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 4153 return error; 4154 } 4155 4156 /* 4157 * Delete the record pointed to by cur. 4158 * The cursor refers to the place where the record was (could be inserted) 4159 * when the operation returns. 4160 */ 4161 int /* error */ 4162 xfs_btree_delete( 4163 struct xfs_btree_cur *cur, 4164 int *stat) /* success/failure */ 4165 { 4166 int error; /* error return value */ 4167 int level; 4168 int i; 4169 bool joined = false; 4170 4171 /* 4172 * Go up the tree, starting at leaf level. 4173 * 4174 * If 2 is returned then a join was done; go to the next level. 4175 * Otherwise we are done. 4176 */ 4177 for (level = 0, i = 2; i == 2; level++) { 4178 error = xfs_btree_delrec(cur, level, &i); 4179 if (error) 4180 goto error0; 4181 if (i == 2) 4182 joined = true; 4183 } 4184 4185 /* 4186 * If we combined blocks as part of deleting the record, delrec won't 4187 * have updated the parent high keys so we have to do that here. 4188 */ 4189 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { 4190 error = xfs_btree_updkeys_force(cur, 0); 4191 if (error) 4192 goto error0; 4193 } 4194 4195 if (i == 0) { 4196 for (level = 1; level < cur->bc_nlevels; level++) { 4197 if (cur->bc_ptrs[level] == 0) { 4198 error = xfs_btree_decrement(cur, level, &i); 4199 if (error) 4200 goto error0; 4201 break; 4202 } 4203 } 4204 } 4205 4206 *stat = i; 4207 return 0; 4208 error0: 4209 return error; 4210 } 4211 4212 /* 4213 * Get the data from the pointed-to record. 4214 */ 4215 int /* error */ 4216 xfs_btree_get_rec( 4217 struct xfs_btree_cur *cur, /* btree cursor */ 4218 union xfs_btree_rec **recp, /* output: btree record */ 4219 int *stat) /* output: success/failure */ 4220 { 4221 struct xfs_btree_block *block; /* btree block */ 4222 struct xfs_buf *bp; /* buffer pointer */ 4223 int ptr; /* record number */ 4224 #ifdef DEBUG 4225 int error; /* error return value */ 4226 #endif 4227 4228 ptr = cur->bc_ptrs[0]; 4229 block = xfs_btree_get_block(cur, 0, &bp); 4230 4231 #ifdef DEBUG 4232 error = xfs_btree_check_block(cur, block, 0, bp); 4233 if (error) 4234 return error; 4235 #endif 4236 4237 /* 4238 * Off the right end or left end, return failure. 4239 */ 4240 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 4241 *stat = 0; 4242 return 0; 4243 } 4244 4245 /* 4246 * Point to the record and extract its data. 4247 */ 4248 *recp = xfs_btree_rec_addr(cur, ptr, block); 4249 *stat = 1; 4250 return 0; 4251 } 4252 4253 /* Visit a block in a btree. */ 4254 STATIC int 4255 xfs_btree_visit_block( 4256 struct xfs_btree_cur *cur, 4257 int level, 4258 xfs_btree_visit_blocks_fn fn, 4259 void *data) 4260 { 4261 struct xfs_btree_block *block; 4262 struct xfs_buf *bp; 4263 union xfs_btree_ptr rptr; 4264 int error; 4265 4266 /* do right sibling readahead */ 4267 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4268 block = xfs_btree_get_block(cur, level, &bp); 4269 4270 /* process the block */ 4271 error = fn(cur, level, data); 4272 if (error) 4273 return error; 4274 4275 /* now read rh sibling block for next iteration */ 4276 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4277 if (xfs_btree_ptr_is_null(cur, &rptr)) 4278 return -ENOENT; 4279 4280 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4281 } 4282 4283 4284 /* Visit every block in a btree. */ 4285 int 4286 xfs_btree_visit_blocks( 4287 struct xfs_btree_cur *cur, 4288 xfs_btree_visit_blocks_fn fn, 4289 void *data) 4290 { 4291 union xfs_btree_ptr lptr; 4292 int level; 4293 struct xfs_btree_block *block = NULL; 4294 int error = 0; 4295 4296 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4297 4298 /* for each level */ 4299 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4300 /* grab the left hand block */ 4301 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4302 if (error) 4303 return error; 4304 4305 /* readahead the left most block for the next level down */ 4306 if (level > 0) { 4307 union xfs_btree_ptr *ptr; 4308 4309 ptr = xfs_btree_ptr_addr(cur, 1, block); 4310 xfs_btree_readahead_ptr(cur, ptr, 1); 4311 4312 /* save for the next iteration of the loop */ 4313 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); 4314 } 4315 4316 /* for each buffer in the level */ 4317 do { 4318 error = xfs_btree_visit_block(cur, level, fn, data); 4319 } while (!error); 4320 4321 if (error != -ENOENT) 4322 return error; 4323 } 4324 4325 return 0; 4326 } 4327 4328 /* 4329 * Change the owner of a btree. 4330 * 4331 * The mechanism we use here is ordered buffer logging. Because we don't know 4332 * how many buffers were are going to need to modify, we don't really want to 4333 * have to make transaction reservations for the worst case of every buffer in a 4334 * full size btree as that may be more space that we can fit in the log.... 4335 * 4336 * We do the btree walk in the most optimal manner possible - we have sibling 4337 * pointers so we can just walk all the blocks on each level from left to right 4338 * in a single pass, and then move to the next level and do the same. We can 4339 * also do readahead on the sibling pointers to get IO moving more quickly, 4340 * though for slow disks this is unlikely to make much difference to performance 4341 * as the amount of CPU work we have to do before moving to the next block is 4342 * relatively small. 4343 * 4344 * For each btree block that we load, modify the owner appropriately, set the 4345 * buffer as an ordered buffer and log it appropriately. We need to ensure that 4346 * we mark the region we change dirty so that if the buffer is relogged in 4347 * a subsequent transaction the changes we make here as an ordered buffer are 4348 * correctly relogged in that transaction. If we are in recovery context, then 4349 * just queue the modified buffer as delayed write buffer so the transaction 4350 * recovery completion writes the changes to disk. 4351 */ 4352 struct xfs_btree_block_change_owner_info { 4353 uint64_t new_owner; 4354 struct list_head *buffer_list; 4355 }; 4356 4357 static int 4358 xfs_btree_block_change_owner( 4359 struct xfs_btree_cur *cur, 4360 int level, 4361 void *data) 4362 { 4363 struct xfs_btree_block_change_owner_info *bbcoi = data; 4364 struct xfs_btree_block *block; 4365 struct xfs_buf *bp; 4366 4367 /* modify the owner */ 4368 block = xfs_btree_get_block(cur, level, &bp); 4369 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4370 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) 4371 return 0; 4372 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4373 } else { 4374 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) 4375 return 0; 4376 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4377 } 4378 4379 /* 4380 * If the block is a root block hosted in an inode, we might not have a 4381 * buffer pointer here and we shouldn't attempt to log the change as the 4382 * information is already held in the inode and discarded when the root 4383 * block is formatted into the on-disk inode fork. We still change it, 4384 * though, so everything is consistent in memory. 4385 */ 4386 if (!bp) { 4387 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4388 ASSERT(level == cur->bc_nlevels - 1); 4389 return 0; 4390 } 4391 4392 if (cur->bc_tp) { 4393 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { 4394 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4395 return -EAGAIN; 4396 } 4397 } else { 4398 xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4399 } 4400 4401 return 0; 4402 } 4403 4404 int 4405 xfs_btree_change_owner( 4406 struct xfs_btree_cur *cur, 4407 uint64_t new_owner, 4408 struct list_head *buffer_list) 4409 { 4410 struct xfs_btree_block_change_owner_info bbcoi; 4411 4412 bbcoi.new_owner = new_owner; 4413 bbcoi.buffer_list = buffer_list; 4414 4415 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, 4416 &bbcoi); 4417 } 4418 4419 /* Verify the v5 fields of a long-format btree block. */ 4420 xfs_failaddr_t 4421 xfs_btree_lblock_v5hdr_verify( 4422 struct xfs_buf *bp, 4423 uint64_t owner) 4424 { 4425 struct xfs_mount *mp = bp->b_mount; 4426 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4427 4428 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4429 return __this_address; 4430 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4431 return __this_address; 4432 if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) 4433 return __this_address; 4434 if (owner != XFS_RMAP_OWN_UNKNOWN && 4435 be64_to_cpu(block->bb_u.l.bb_owner) != owner) 4436 return __this_address; 4437 return NULL; 4438 } 4439 4440 /* Verify a long-format btree block. */ 4441 xfs_failaddr_t 4442 xfs_btree_lblock_verify( 4443 struct xfs_buf *bp, 4444 unsigned int max_recs) 4445 { 4446 struct xfs_mount *mp = bp->b_mount; 4447 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4448 4449 /* numrecs verification */ 4450 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4451 return __this_address; 4452 4453 /* sibling pointer verification */ 4454 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 4455 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) 4456 return __this_address; 4457 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 4458 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) 4459 return __this_address; 4460 4461 return NULL; 4462 } 4463 4464 /** 4465 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4466 * btree block 4467 * 4468 * @bp: buffer containing the btree block 4469 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4470 * @pag_max_level: pointer to the per-ag max level field 4471 */ 4472 xfs_failaddr_t 4473 xfs_btree_sblock_v5hdr_verify( 4474 struct xfs_buf *bp) 4475 { 4476 struct xfs_mount *mp = bp->b_mount; 4477 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4478 struct xfs_perag *pag = bp->b_pag; 4479 4480 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4481 return __this_address; 4482 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4483 return __this_address; 4484 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4485 return __this_address; 4486 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4487 return __this_address; 4488 return NULL; 4489 } 4490 4491 /** 4492 * xfs_btree_sblock_verify() -- verify a short-format btree block 4493 * 4494 * @bp: buffer containing the btree block 4495 * @max_recs: maximum records allowed in this btree node 4496 */ 4497 xfs_failaddr_t 4498 xfs_btree_sblock_verify( 4499 struct xfs_buf *bp, 4500 unsigned int max_recs) 4501 { 4502 struct xfs_mount *mp = bp->b_mount; 4503 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4504 xfs_agblock_t agno; 4505 4506 /* numrecs verification */ 4507 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4508 return __this_address; 4509 4510 /* sibling pointer verification */ 4511 agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); 4512 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 4513 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) 4514 return __this_address; 4515 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 4516 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) 4517 return __this_address; 4518 4519 return NULL; 4520 } 4521 4522 /* 4523 * Calculate the number of btree levels needed to store a given number of 4524 * records in a short-format btree. 4525 */ 4526 uint 4527 xfs_btree_compute_maxlevels( 4528 uint *limits, 4529 unsigned long len) 4530 { 4531 uint level; 4532 unsigned long maxblocks; 4533 4534 maxblocks = (len + limits[0] - 1) / limits[0]; 4535 for (level = 1; maxblocks > 1; level++) 4536 maxblocks = (maxblocks + limits[1] - 1) / limits[1]; 4537 return level; 4538 } 4539 4540 /* 4541 * Query a regular btree for all records overlapping a given interval. 4542 * Start with a LE lookup of the key of low_rec and return all records 4543 * until we find a record with a key greater than the key of high_rec. 4544 */ 4545 STATIC int 4546 xfs_btree_simple_query_range( 4547 struct xfs_btree_cur *cur, 4548 union xfs_btree_key *low_key, 4549 union xfs_btree_key *high_key, 4550 xfs_btree_query_range_fn fn, 4551 void *priv) 4552 { 4553 union xfs_btree_rec *recp; 4554 union xfs_btree_key rec_key; 4555 int64_t diff; 4556 int stat; 4557 bool firstrec = true; 4558 int error; 4559 4560 ASSERT(cur->bc_ops->init_high_key_from_rec); 4561 ASSERT(cur->bc_ops->diff_two_keys); 4562 4563 /* 4564 * Find the leftmost record. The btree cursor must be set 4565 * to the low record used to generate low_key. 4566 */ 4567 stat = 0; 4568 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 4569 if (error) 4570 goto out; 4571 4572 /* Nothing? See if there's anything to the right. */ 4573 if (!stat) { 4574 error = xfs_btree_increment(cur, 0, &stat); 4575 if (error) 4576 goto out; 4577 } 4578 4579 while (stat) { 4580 /* Find the record. */ 4581 error = xfs_btree_get_rec(cur, &recp, &stat); 4582 if (error || !stat) 4583 break; 4584 4585 /* Skip if high_key(rec) < low_key. */ 4586 if (firstrec) { 4587 cur->bc_ops->init_high_key_from_rec(&rec_key, recp); 4588 firstrec = false; 4589 diff = cur->bc_ops->diff_two_keys(cur, low_key, 4590 &rec_key); 4591 if (diff > 0) 4592 goto advloop; 4593 } 4594 4595 /* Stop if high_key < low_key(rec). */ 4596 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4597 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); 4598 if (diff > 0) 4599 break; 4600 4601 /* Callback */ 4602 error = fn(cur, recp, priv); 4603 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT) 4604 break; 4605 4606 advloop: 4607 /* Move on to the next record. */ 4608 error = xfs_btree_increment(cur, 0, &stat); 4609 if (error) 4610 break; 4611 } 4612 4613 out: 4614 return error; 4615 } 4616 4617 /* 4618 * Query an overlapped interval btree for all records overlapping a given 4619 * interval. This function roughly follows the algorithm given in 4620 * "Interval Trees" of _Introduction to Algorithms_, which is section 4621 * 14.3 in the 2nd and 3rd editions. 4622 * 4623 * First, generate keys for the low and high records passed in. 4624 * 4625 * For any leaf node, generate the high and low keys for the record. 4626 * If the record keys overlap with the query low/high keys, pass the 4627 * record to the function iterator. 4628 * 4629 * For any internal node, compare the low and high keys of each 4630 * pointer against the query low/high keys. If there's an overlap, 4631 * follow the pointer. 4632 * 4633 * As an optimization, we stop scanning a block when we find a low key 4634 * that is greater than the query's high key. 4635 */ 4636 STATIC int 4637 xfs_btree_overlapped_query_range( 4638 struct xfs_btree_cur *cur, 4639 union xfs_btree_key *low_key, 4640 union xfs_btree_key *high_key, 4641 xfs_btree_query_range_fn fn, 4642 void *priv) 4643 { 4644 union xfs_btree_ptr ptr; 4645 union xfs_btree_ptr *pp; 4646 union xfs_btree_key rec_key; 4647 union xfs_btree_key rec_hkey; 4648 union xfs_btree_key *lkp; 4649 union xfs_btree_key *hkp; 4650 union xfs_btree_rec *recp; 4651 struct xfs_btree_block *block; 4652 int64_t ldiff; 4653 int64_t hdiff; 4654 int level; 4655 struct xfs_buf *bp; 4656 int i; 4657 int error; 4658 4659 /* Load the root of the btree. */ 4660 level = cur->bc_nlevels - 1; 4661 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 4662 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); 4663 if (error) 4664 return error; 4665 xfs_btree_get_block(cur, level, &bp); 4666 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4667 #ifdef DEBUG 4668 error = xfs_btree_check_block(cur, block, level, bp); 4669 if (error) 4670 goto out; 4671 #endif 4672 cur->bc_ptrs[level] = 1; 4673 4674 while (level < cur->bc_nlevels) { 4675 block = xfs_btree_get_block(cur, level, &bp); 4676 4677 /* End of node, pop back towards the root. */ 4678 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { 4679 pop_up: 4680 if (level < cur->bc_nlevels - 1) 4681 cur->bc_ptrs[level + 1]++; 4682 level++; 4683 continue; 4684 } 4685 4686 if (level == 0) { 4687 /* Handle a leaf node. */ 4688 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 4689 4690 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); 4691 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, 4692 low_key); 4693 4694 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4695 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, 4696 &rec_key); 4697 4698 /* 4699 * If (record's high key >= query's low key) and 4700 * (query's high key >= record's low key), then 4701 * this record overlaps the query range; callback. 4702 */ 4703 if (ldiff >= 0 && hdiff >= 0) { 4704 error = fn(cur, recp, priv); 4705 if (error < 0 || 4706 error == XFS_BTREE_QUERY_RANGE_ABORT) 4707 break; 4708 } else if (hdiff < 0) { 4709 /* Record is larger than high key; pop. */ 4710 goto pop_up; 4711 } 4712 cur->bc_ptrs[level]++; 4713 continue; 4714 } 4715 4716 /* Handle an internal node. */ 4717 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); 4718 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); 4719 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); 4720 4721 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); 4722 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); 4723 4724 /* 4725 * If (pointer's high key >= query's low key) and 4726 * (query's high key >= pointer's low key), then 4727 * this record overlaps the query range; follow pointer. 4728 */ 4729 if (ldiff >= 0 && hdiff >= 0) { 4730 level--; 4731 error = xfs_btree_lookup_get_block(cur, level, pp, 4732 &block); 4733 if (error) 4734 goto out; 4735 xfs_btree_get_block(cur, level, &bp); 4736 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4737 #ifdef DEBUG 4738 error = xfs_btree_check_block(cur, block, level, bp); 4739 if (error) 4740 goto out; 4741 #endif 4742 cur->bc_ptrs[level] = 1; 4743 continue; 4744 } else if (hdiff < 0) { 4745 /* The low key is larger than the upper range; pop. */ 4746 goto pop_up; 4747 } 4748 cur->bc_ptrs[level]++; 4749 } 4750 4751 out: 4752 /* 4753 * If we don't end this function with the cursor pointing at a record 4754 * block, a subsequent non-error cursor deletion will not release 4755 * node-level buffers, causing a buffer leak. This is quite possible 4756 * with a zero-results range query, so release the buffers if we 4757 * failed to return any results. 4758 */ 4759 if (cur->bc_bufs[0] == NULL) { 4760 for (i = 0; i < cur->bc_nlevels; i++) { 4761 if (cur->bc_bufs[i]) { 4762 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 4763 cur->bc_bufs[i] = NULL; 4764 cur->bc_ptrs[i] = 0; 4765 cur->bc_ra[i] = 0; 4766 } 4767 } 4768 } 4769 4770 return error; 4771 } 4772 4773 /* 4774 * Query a btree for all records overlapping a given interval of keys. The 4775 * supplied function will be called with each record found; return one of the 4776 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error 4777 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a 4778 * negative error code. 4779 */ 4780 int 4781 xfs_btree_query_range( 4782 struct xfs_btree_cur *cur, 4783 union xfs_btree_irec *low_rec, 4784 union xfs_btree_irec *high_rec, 4785 xfs_btree_query_range_fn fn, 4786 void *priv) 4787 { 4788 union xfs_btree_rec rec; 4789 union xfs_btree_key low_key; 4790 union xfs_btree_key high_key; 4791 4792 /* Find the keys of both ends of the interval. */ 4793 cur->bc_rec = *high_rec; 4794 cur->bc_ops->init_rec_from_cur(cur, &rec); 4795 cur->bc_ops->init_key_from_rec(&high_key, &rec); 4796 4797 cur->bc_rec = *low_rec; 4798 cur->bc_ops->init_rec_from_cur(cur, &rec); 4799 cur->bc_ops->init_key_from_rec(&low_key, &rec); 4800 4801 /* Enforce low key < high key. */ 4802 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0) 4803 return -EINVAL; 4804 4805 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 4806 return xfs_btree_simple_query_range(cur, &low_key, 4807 &high_key, fn, priv); 4808 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4809 fn, priv); 4810 } 4811 4812 /* Query a btree for all records. */ 4813 int 4814 xfs_btree_query_all( 4815 struct xfs_btree_cur *cur, 4816 xfs_btree_query_range_fn fn, 4817 void *priv) 4818 { 4819 union xfs_btree_key low_key; 4820 union xfs_btree_key high_key; 4821 4822 memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); 4823 memset(&low_key, 0, sizeof(low_key)); 4824 memset(&high_key, 0xFF, sizeof(high_key)); 4825 4826 return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); 4827 } 4828 4829 /* 4830 * Calculate the number of blocks needed to store a given number of records 4831 * in a short-format (per-AG metadata) btree. 4832 */ 4833 unsigned long long 4834 xfs_btree_calc_size( 4835 uint *limits, 4836 unsigned long long len) 4837 { 4838 int level; 4839 int maxrecs; 4840 unsigned long long rval; 4841 4842 maxrecs = limits[0]; 4843 for (level = 0, rval = 0; len > 1; level++) { 4844 len += maxrecs - 1; 4845 do_div(len, maxrecs); 4846 maxrecs = limits[1]; 4847 rval += len; 4848 } 4849 return rval; 4850 } 4851 4852 static int 4853 xfs_btree_count_blocks_helper( 4854 struct xfs_btree_cur *cur, 4855 int level, 4856 void *data) 4857 { 4858 xfs_extlen_t *blocks = data; 4859 (*blocks)++; 4860 4861 return 0; 4862 } 4863 4864 /* Count the blocks in a btree and return the result in *blocks. */ 4865 int 4866 xfs_btree_count_blocks( 4867 struct xfs_btree_cur *cur, 4868 xfs_extlen_t *blocks) 4869 { 4870 *blocks = 0; 4871 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 4872 blocks); 4873 } 4874 4875 /* Compare two btree pointers. */ 4876 int64_t 4877 xfs_btree_diff_two_ptrs( 4878 struct xfs_btree_cur *cur, 4879 const union xfs_btree_ptr *a, 4880 const union xfs_btree_ptr *b) 4881 { 4882 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4883 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 4884 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 4885 } 4886 4887 /* If there's an extent, we're done. */ 4888 STATIC int 4889 xfs_btree_has_record_helper( 4890 struct xfs_btree_cur *cur, 4891 union xfs_btree_rec *rec, 4892 void *priv) 4893 { 4894 return XFS_BTREE_QUERY_RANGE_ABORT; 4895 } 4896 4897 /* Is there a record covering a given range of keys? */ 4898 int 4899 xfs_btree_has_record( 4900 struct xfs_btree_cur *cur, 4901 union xfs_btree_irec *low, 4902 union xfs_btree_irec *high, 4903 bool *exists) 4904 { 4905 int error; 4906 4907 error = xfs_btree_query_range(cur, low, high, 4908 &xfs_btree_has_record_helper, NULL); 4909 if (error == XFS_BTREE_QUERY_RANGE_ABORT) { 4910 *exists = true; 4911 return 0; 4912 } 4913 *exists = false; 4914 return error; 4915 } 4916 4917 /* Are there more records in this btree? */ 4918 bool 4919 xfs_btree_has_more_records( 4920 struct xfs_btree_cur *cur) 4921 { 4922 struct xfs_btree_block *block; 4923 struct xfs_buf *bp; 4924 4925 block = xfs_btree_get_block(cur, 0, &bp); 4926 4927 /* There are still records in this block. */ 4928 if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block)) 4929 return true; 4930 4931 /* There are more record blocks. */ 4932 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4933 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK); 4934 else 4935 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); 4936 } 4937