1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_trans.h" 16 #include "xfs_buf_item.h" 17 #include "xfs_btree.h" 18 #include "xfs_errortag.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_alloc.h" 22 #include "xfs_log.h" 23 24 /* 25 * Cursor allocation zone. 26 */ 27 kmem_zone_t *xfs_btree_cur_zone; 28 29 /* 30 * Btree magic numbers. 31 */ 32 static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 33 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 34 XFS_FIBT_MAGIC, 0 }, 35 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, 36 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, 37 XFS_REFC_CRC_MAGIC } 38 }; 39 40 uint32_t 41 xfs_btree_magic( 42 int crc, 43 xfs_btnum_t btnum) 44 { 45 uint32_t magic = xfs_magics[crc][btnum]; 46 47 /* Ensure we asked for crc for crc-only magics. */ 48 ASSERT(magic != 0); 49 return magic; 50 } 51 52 /* 53 * Check a long btree block header. Return the address of the failing check, 54 * or NULL if everything is ok. 55 */ 56 xfs_failaddr_t 57 __xfs_btree_check_lblock( 58 struct xfs_btree_cur *cur, 59 struct xfs_btree_block *block, 60 int level, 61 struct xfs_buf *bp) 62 { 63 struct xfs_mount *mp = cur->bc_mp; 64 xfs_btnum_t btnum = cur->bc_btnum; 65 int crc = xfs_sb_version_hascrc(&mp->m_sb); 66 67 if (crc) { 68 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 69 return __this_address; 70 if (block->bb_u.l.bb_blkno != 71 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 72 return __this_address; 73 if (block->bb_u.l.bb_pad != cpu_to_be32(0)) 74 return __this_address; 75 } 76 77 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 78 return __this_address; 79 if (be16_to_cpu(block->bb_level) != level) 80 return __this_address; 81 if (be16_to_cpu(block->bb_numrecs) > 82 cur->bc_ops->get_maxrecs(cur, level)) 83 return __this_address; 84 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 85 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib), 86 level + 1)) 87 return __this_address; 88 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 89 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib), 90 level + 1)) 91 return __this_address; 92 93 return NULL; 94 } 95 96 /* Check a long btree block header. */ 97 static int 98 xfs_btree_check_lblock( 99 struct xfs_btree_cur *cur, 100 struct xfs_btree_block *block, 101 int level, 102 struct xfs_buf *bp) 103 { 104 struct xfs_mount *mp = cur->bc_mp; 105 xfs_failaddr_t fa; 106 107 fa = __xfs_btree_check_lblock(cur, block, level, bp); 108 if (XFS_IS_CORRUPT(mp, fa != NULL) || 109 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) { 110 if (bp) 111 trace_xfs_btree_corrupt(bp, _RET_IP_); 112 return -EFSCORRUPTED; 113 } 114 return 0; 115 } 116 117 /* 118 * Check a short btree block header. Return the address of the failing check, 119 * or NULL if everything is ok. 120 */ 121 xfs_failaddr_t 122 __xfs_btree_check_sblock( 123 struct xfs_btree_cur *cur, 124 struct xfs_btree_block *block, 125 int level, 126 struct xfs_buf *bp) 127 { 128 struct xfs_mount *mp = cur->bc_mp; 129 xfs_btnum_t btnum = cur->bc_btnum; 130 int crc = xfs_sb_version_hascrc(&mp->m_sb); 131 132 if (crc) { 133 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 134 return __this_address; 135 if (block->bb_u.s.bb_blkno != 136 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 137 return __this_address; 138 } 139 140 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 141 return __this_address; 142 if (be16_to_cpu(block->bb_level) != level) 143 return __this_address; 144 if (be16_to_cpu(block->bb_numrecs) > 145 cur->bc_ops->get_maxrecs(cur, level)) 146 return __this_address; 147 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 148 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib), 149 level + 1)) 150 return __this_address; 151 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 152 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib), 153 level + 1)) 154 return __this_address; 155 156 return NULL; 157 } 158 159 /* Check a short btree block header. */ 160 STATIC int 161 xfs_btree_check_sblock( 162 struct xfs_btree_cur *cur, 163 struct xfs_btree_block *block, 164 int level, 165 struct xfs_buf *bp) 166 { 167 struct xfs_mount *mp = cur->bc_mp; 168 xfs_failaddr_t fa; 169 170 fa = __xfs_btree_check_sblock(cur, block, level, bp); 171 if (XFS_IS_CORRUPT(mp, fa != NULL) || 172 XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) { 173 if (bp) 174 trace_xfs_btree_corrupt(bp, _RET_IP_); 175 return -EFSCORRUPTED; 176 } 177 return 0; 178 } 179 180 /* 181 * Debug routine: check that block header is ok. 182 */ 183 int 184 xfs_btree_check_block( 185 struct xfs_btree_cur *cur, /* btree cursor */ 186 struct xfs_btree_block *block, /* generic btree block pointer */ 187 int level, /* level of the btree block */ 188 struct xfs_buf *bp) /* buffer containing block, if any */ 189 { 190 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 191 return xfs_btree_check_lblock(cur, block, level, bp); 192 else 193 return xfs_btree_check_sblock(cur, block, level, bp); 194 } 195 196 /* Check that this long pointer is valid and points within the fs. */ 197 bool 198 xfs_btree_check_lptr( 199 struct xfs_btree_cur *cur, 200 xfs_fsblock_t fsbno, 201 int level) 202 { 203 if (level <= 0) 204 return false; 205 return xfs_verify_fsbno(cur->bc_mp, fsbno); 206 } 207 208 /* Check that this short pointer is valid and points within the AG. */ 209 bool 210 xfs_btree_check_sptr( 211 struct xfs_btree_cur *cur, 212 xfs_agblock_t agbno, 213 int level) 214 { 215 if (level <= 0) 216 return false; 217 return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno); 218 } 219 220 /* 221 * Check that a given (indexed) btree pointer at a certain level of a 222 * btree is valid and doesn't point past where it should. 223 */ 224 static int 225 xfs_btree_check_ptr( 226 struct xfs_btree_cur *cur, 227 union xfs_btree_ptr *ptr, 228 int index, 229 int level) 230 { 231 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 232 if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]), 233 level)) 234 return 0; 235 xfs_err(cur->bc_mp, 236 "Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.", 237 cur->bc_private.b.ip->i_ino, 238 cur->bc_private.b.whichfork, cur->bc_btnum, 239 level, index); 240 } else { 241 if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]), 242 level)) 243 return 0; 244 xfs_err(cur->bc_mp, 245 "AG %u: Corrupt btree %d pointer at level %d index %d.", 246 cur->bc_private.a.agno, cur->bc_btnum, 247 level, index); 248 } 249 250 return -EFSCORRUPTED; 251 } 252 253 #ifdef DEBUG 254 # define xfs_btree_debug_check_ptr xfs_btree_check_ptr 255 #else 256 # define xfs_btree_debug_check_ptr(...) (0) 257 #endif 258 259 /* 260 * Calculate CRC on the whole btree block and stuff it into the 261 * long-form btree header. 262 * 263 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 264 * it into the buffer so recovery knows what the last modification was that made 265 * it to disk. 266 */ 267 void 268 xfs_btree_lblock_calc_crc( 269 struct xfs_buf *bp) 270 { 271 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 272 struct xfs_buf_log_item *bip = bp->b_log_item; 273 274 if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) 275 return; 276 if (bip) 277 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 278 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 279 } 280 281 bool 282 xfs_btree_lblock_verify_crc( 283 struct xfs_buf *bp) 284 { 285 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 286 struct xfs_mount *mp = bp->b_mount; 287 288 if (xfs_sb_version_hascrc(&mp->m_sb)) { 289 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 290 return false; 291 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 292 } 293 294 return true; 295 } 296 297 /* 298 * Calculate CRC on the whole btree block and stuff it into the 299 * short-form btree header. 300 * 301 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 302 * it into the buffer so recovery knows what the last modification was that made 303 * it to disk. 304 */ 305 void 306 xfs_btree_sblock_calc_crc( 307 struct xfs_buf *bp) 308 { 309 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 310 struct xfs_buf_log_item *bip = bp->b_log_item; 311 312 if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb)) 313 return; 314 if (bip) 315 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 316 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 317 } 318 319 bool 320 xfs_btree_sblock_verify_crc( 321 struct xfs_buf *bp) 322 { 323 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 324 struct xfs_mount *mp = bp->b_mount; 325 326 if (xfs_sb_version_hascrc(&mp->m_sb)) { 327 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 328 return false; 329 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 330 } 331 332 return true; 333 } 334 335 static int 336 xfs_btree_free_block( 337 struct xfs_btree_cur *cur, 338 struct xfs_buf *bp) 339 { 340 int error; 341 342 error = cur->bc_ops->free_block(cur, bp); 343 if (!error) { 344 xfs_trans_binval(cur->bc_tp, bp); 345 XFS_BTREE_STATS_INC(cur, free); 346 } 347 return error; 348 } 349 350 /* 351 * Delete the btree cursor. 352 */ 353 void 354 xfs_btree_del_cursor( 355 xfs_btree_cur_t *cur, /* btree cursor */ 356 int error) /* del because of error */ 357 { 358 int i; /* btree level */ 359 360 /* 361 * Clear the buffer pointers, and release the buffers. 362 * If we're doing this in the face of an error, we 363 * need to make sure to inspect all of the entries 364 * in the bc_bufs array for buffers to be unlocked. 365 * This is because some of the btree code works from 366 * level n down to 0, and if we get an error along 367 * the way we won't have initialized all the entries 368 * down to 0. 369 */ 370 for (i = 0; i < cur->bc_nlevels; i++) { 371 if (cur->bc_bufs[i]) 372 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 373 else if (!error) 374 break; 375 } 376 /* 377 * Can't free a bmap cursor without having dealt with the 378 * allocated indirect blocks' accounting. 379 */ 380 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || 381 cur->bc_private.b.allocated == 0); 382 /* 383 * Free the cursor. 384 */ 385 kmem_cache_free(xfs_btree_cur_zone, cur); 386 } 387 388 /* 389 * Duplicate the btree cursor. 390 * Allocate a new one, copy the record, re-get the buffers. 391 */ 392 int /* error */ 393 xfs_btree_dup_cursor( 394 xfs_btree_cur_t *cur, /* input cursor */ 395 xfs_btree_cur_t **ncur) /* output cursor */ 396 { 397 xfs_buf_t *bp; /* btree block's buffer pointer */ 398 int error; /* error return value */ 399 int i; /* level number of btree block */ 400 xfs_mount_t *mp; /* mount structure for filesystem */ 401 xfs_btree_cur_t *new; /* new cursor value */ 402 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 403 404 tp = cur->bc_tp; 405 mp = cur->bc_mp; 406 407 /* 408 * Allocate a new cursor like the old one. 409 */ 410 new = cur->bc_ops->dup_cursor(cur); 411 412 /* 413 * Copy the record currently in the cursor. 414 */ 415 new->bc_rec = cur->bc_rec; 416 417 /* 418 * For each level current, re-get the buffer and copy the ptr value. 419 */ 420 for (i = 0; i < new->bc_nlevels; i++) { 421 new->bc_ptrs[i] = cur->bc_ptrs[i]; 422 new->bc_ra[i] = cur->bc_ra[i]; 423 bp = cur->bc_bufs[i]; 424 if (bp) { 425 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 426 XFS_BUF_ADDR(bp), mp->m_bsize, 427 0, &bp, 428 cur->bc_ops->buf_ops); 429 if (error) { 430 xfs_btree_del_cursor(new, error); 431 *ncur = NULL; 432 return error; 433 } 434 } 435 new->bc_bufs[i] = bp; 436 } 437 *ncur = new; 438 return 0; 439 } 440 441 /* 442 * XFS btree block layout and addressing: 443 * 444 * There are two types of blocks in the btree: leaf and non-leaf blocks. 445 * 446 * The leaf record start with a header then followed by records containing 447 * the values. A non-leaf block also starts with the same header, and 448 * then first contains lookup keys followed by an equal number of pointers 449 * to the btree blocks at the previous level. 450 * 451 * +--------+-------+-------+-------+-------+-------+-------+ 452 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 453 * +--------+-------+-------+-------+-------+-------+-------+ 454 * 455 * +--------+-------+-------+-------+-------+-------+-------+ 456 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 457 * +--------+-------+-------+-------+-------+-------+-------+ 458 * 459 * The header is called struct xfs_btree_block for reasons better left unknown 460 * and comes in different versions for short (32bit) and long (64bit) block 461 * pointers. The record and key structures are defined by the btree instances 462 * and opaque to the btree core. The block pointers are simple disk endian 463 * integers, available in a short (32bit) and long (64bit) variant. 464 * 465 * The helpers below calculate the offset of a given record, key or pointer 466 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 467 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 468 * inside the btree block is done using indices starting at one, not zero! 469 * 470 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing 471 * overlapping intervals. In such a tree, records are still sorted lowest to 472 * highest and indexed by the smallest key value that refers to the record. 473 * However, nodes are different: each pointer has two associated keys -- one 474 * indexing the lowest key available in the block(s) below (the same behavior 475 * as the key in a regular btree) and another indexing the highest key 476 * available in the block(s) below. Because records are /not/ sorted by the 477 * highest key, all leaf block updates require us to compute the highest key 478 * that matches any record in the leaf and to recursively update the high keys 479 * in the nodes going further up in the tree, if necessary. Nodes look like 480 * this: 481 * 482 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 483 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | 484 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 485 * 486 * To perform an interval query on an overlapped tree, perform the usual 487 * depth-first search and use the low and high keys to decide if we can skip 488 * that particular node. If a leaf node is reached, return the records that 489 * intersect the interval. Note that an interval query may return numerous 490 * entries. For a non-overlapped tree, simply search for the record associated 491 * with the lowest key and iterate forward until a non-matching record is 492 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by 493 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in 494 * more detail. 495 * 496 * Why do we care about overlapping intervals? Let's say you have a bunch of 497 * reverse mapping records on a reflink filesystem: 498 * 499 * 1: +- file A startblock B offset C length D -----------+ 500 * 2: +- file E startblock F offset G length H --------------+ 501 * 3: +- file I startblock F offset J length K --+ 502 * 4: +- file L... --+ 503 * 504 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, 505 * we'd simply increment the length of record 1. But how do we find the record 506 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return 507 * record 3 because the keys are ordered first by startblock. An interval 508 * query would return records 1 and 2 because they both overlap (B+D-1), and 509 * from that we can pick out record 1 as the appropriate left neighbor. 510 * 511 * In the non-overlapped case you can do a LE lookup and decrement the cursor 512 * because a record's interval must end before the next record. 513 */ 514 515 /* 516 * Return size of the btree block header for this btree instance. 517 */ 518 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 519 { 520 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 521 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 522 return XFS_BTREE_LBLOCK_CRC_LEN; 523 return XFS_BTREE_LBLOCK_LEN; 524 } 525 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 526 return XFS_BTREE_SBLOCK_CRC_LEN; 527 return XFS_BTREE_SBLOCK_LEN; 528 } 529 530 /* 531 * Return size of btree block pointers for this btree instance. 532 */ 533 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 534 { 535 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 536 sizeof(__be64) : sizeof(__be32); 537 } 538 539 /* 540 * Calculate offset of the n-th record in a btree block. 541 */ 542 STATIC size_t 543 xfs_btree_rec_offset( 544 struct xfs_btree_cur *cur, 545 int n) 546 { 547 return xfs_btree_block_len(cur) + 548 (n - 1) * cur->bc_ops->rec_len; 549 } 550 551 /* 552 * Calculate offset of the n-th key in a btree block. 553 */ 554 STATIC size_t 555 xfs_btree_key_offset( 556 struct xfs_btree_cur *cur, 557 int n) 558 { 559 return xfs_btree_block_len(cur) + 560 (n - 1) * cur->bc_ops->key_len; 561 } 562 563 /* 564 * Calculate offset of the n-th high key in a btree block. 565 */ 566 STATIC size_t 567 xfs_btree_high_key_offset( 568 struct xfs_btree_cur *cur, 569 int n) 570 { 571 return xfs_btree_block_len(cur) + 572 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); 573 } 574 575 /* 576 * Calculate offset of the n-th block pointer in a btree block. 577 */ 578 STATIC size_t 579 xfs_btree_ptr_offset( 580 struct xfs_btree_cur *cur, 581 int n, 582 int level) 583 { 584 return xfs_btree_block_len(cur) + 585 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 586 (n - 1) * xfs_btree_ptr_len(cur); 587 } 588 589 /* 590 * Return a pointer to the n-th record in the btree block. 591 */ 592 union xfs_btree_rec * 593 xfs_btree_rec_addr( 594 struct xfs_btree_cur *cur, 595 int n, 596 struct xfs_btree_block *block) 597 { 598 return (union xfs_btree_rec *) 599 ((char *)block + xfs_btree_rec_offset(cur, n)); 600 } 601 602 /* 603 * Return a pointer to the n-th key in the btree block. 604 */ 605 union xfs_btree_key * 606 xfs_btree_key_addr( 607 struct xfs_btree_cur *cur, 608 int n, 609 struct xfs_btree_block *block) 610 { 611 return (union xfs_btree_key *) 612 ((char *)block + xfs_btree_key_offset(cur, n)); 613 } 614 615 /* 616 * Return a pointer to the n-th high key in the btree block. 617 */ 618 union xfs_btree_key * 619 xfs_btree_high_key_addr( 620 struct xfs_btree_cur *cur, 621 int n, 622 struct xfs_btree_block *block) 623 { 624 return (union xfs_btree_key *) 625 ((char *)block + xfs_btree_high_key_offset(cur, n)); 626 } 627 628 /* 629 * Return a pointer to the n-th block pointer in the btree block. 630 */ 631 union xfs_btree_ptr * 632 xfs_btree_ptr_addr( 633 struct xfs_btree_cur *cur, 634 int n, 635 struct xfs_btree_block *block) 636 { 637 int level = xfs_btree_get_level(block); 638 639 ASSERT(block->bb_level != 0); 640 641 return (union xfs_btree_ptr *) 642 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 643 } 644 645 /* 646 * Get the root block which is stored in the inode. 647 * 648 * For now this btree implementation assumes the btree root is always 649 * stored in the if_broot field of an inode fork. 650 */ 651 STATIC struct xfs_btree_block * 652 xfs_btree_get_iroot( 653 struct xfs_btree_cur *cur) 654 { 655 struct xfs_ifork *ifp; 656 657 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); 658 return (struct xfs_btree_block *)ifp->if_broot; 659 } 660 661 /* 662 * Retrieve the block pointer from the cursor at the given level. 663 * This may be an inode btree root or from a buffer. 664 */ 665 struct xfs_btree_block * /* generic btree block pointer */ 666 xfs_btree_get_block( 667 struct xfs_btree_cur *cur, /* btree cursor */ 668 int level, /* level in btree */ 669 struct xfs_buf **bpp) /* buffer containing the block */ 670 { 671 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 672 (level == cur->bc_nlevels - 1)) { 673 *bpp = NULL; 674 return xfs_btree_get_iroot(cur); 675 } 676 677 *bpp = cur->bc_bufs[level]; 678 return XFS_BUF_TO_BLOCK(*bpp); 679 } 680 681 /* 682 * Get a buffer for the block, return it with no data read. 683 * Long-form addressing. 684 */ 685 xfs_buf_t * /* buffer for fsbno */ 686 xfs_btree_get_bufl( 687 xfs_mount_t *mp, /* file system mount point */ 688 xfs_trans_t *tp, /* transaction pointer */ 689 xfs_fsblock_t fsbno) /* file system block number */ 690 { 691 xfs_daddr_t d; /* real disk block address */ 692 693 ASSERT(fsbno != NULLFSBLOCK); 694 d = XFS_FSB_TO_DADDR(mp, fsbno); 695 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); 696 } 697 698 /* 699 * Get a buffer for the block, return it with no data read. 700 * Short-form addressing. 701 */ 702 xfs_buf_t * /* buffer for agno/agbno */ 703 xfs_btree_get_bufs( 704 xfs_mount_t *mp, /* file system mount point */ 705 xfs_trans_t *tp, /* transaction pointer */ 706 xfs_agnumber_t agno, /* allocation group number */ 707 xfs_agblock_t agbno) /* allocation group block number */ 708 { 709 xfs_daddr_t d; /* real disk block address */ 710 711 ASSERT(agno != NULLAGNUMBER); 712 ASSERT(agbno != NULLAGBLOCK); 713 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 714 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0); 715 } 716 717 /* 718 * Change the cursor to point to the first record at the given level. 719 * Other levels are unaffected. 720 */ 721 STATIC int /* success=1, failure=0 */ 722 xfs_btree_firstrec( 723 xfs_btree_cur_t *cur, /* btree cursor */ 724 int level) /* level to change */ 725 { 726 struct xfs_btree_block *block; /* generic btree block pointer */ 727 xfs_buf_t *bp; /* buffer containing block */ 728 729 /* 730 * Get the block pointer for this level. 731 */ 732 block = xfs_btree_get_block(cur, level, &bp); 733 if (xfs_btree_check_block(cur, block, level, bp)) 734 return 0; 735 /* 736 * It's empty, there is no such record. 737 */ 738 if (!block->bb_numrecs) 739 return 0; 740 /* 741 * Set the ptr value to 1, that's the first record/key. 742 */ 743 cur->bc_ptrs[level] = 1; 744 return 1; 745 } 746 747 /* 748 * Change the cursor to point to the last record in the current block 749 * at the given level. Other levels are unaffected. 750 */ 751 STATIC int /* success=1, failure=0 */ 752 xfs_btree_lastrec( 753 xfs_btree_cur_t *cur, /* btree cursor */ 754 int level) /* level to change */ 755 { 756 struct xfs_btree_block *block; /* generic btree block pointer */ 757 xfs_buf_t *bp; /* buffer containing block */ 758 759 /* 760 * Get the block pointer for this level. 761 */ 762 block = xfs_btree_get_block(cur, level, &bp); 763 if (xfs_btree_check_block(cur, block, level, bp)) 764 return 0; 765 /* 766 * It's empty, there is no such record. 767 */ 768 if (!block->bb_numrecs) 769 return 0; 770 /* 771 * Set the ptr value to numrecs, that's the last record/key. 772 */ 773 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); 774 return 1; 775 } 776 777 /* 778 * Compute first and last byte offsets for the fields given. 779 * Interprets the offsets table, which contains struct field offsets. 780 */ 781 void 782 xfs_btree_offsets( 783 int64_t fields, /* bitmask of fields */ 784 const short *offsets, /* table of field offsets */ 785 int nbits, /* number of bits to inspect */ 786 int *first, /* output: first byte offset */ 787 int *last) /* output: last byte offset */ 788 { 789 int i; /* current bit number */ 790 int64_t imask; /* mask for current bit number */ 791 792 ASSERT(fields != 0); 793 /* 794 * Find the lowest bit, so the first byte offset. 795 */ 796 for (i = 0, imask = 1LL; ; i++, imask <<= 1) { 797 if (imask & fields) { 798 *first = offsets[i]; 799 break; 800 } 801 } 802 /* 803 * Find the highest bit, so the last byte offset. 804 */ 805 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { 806 if (imask & fields) { 807 *last = offsets[i + 1] - 1; 808 break; 809 } 810 } 811 } 812 813 /* 814 * Get a buffer for the block, return it read in. 815 * Long-form addressing. 816 */ 817 int 818 xfs_btree_read_bufl( 819 struct xfs_mount *mp, /* file system mount point */ 820 struct xfs_trans *tp, /* transaction pointer */ 821 xfs_fsblock_t fsbno, /* file system block number */ 822 struct xfs_buf **bpp, /* buffer for fsbno */ 823 int refval, /* ref count value for buffer */ 824 const struct xfs_buf_ops *ops) 825 { 826 struct xfs_buf *bp; /* return value */ 827 xfs_daddr_t d; /* real disk block address */ 828 int error; 829 830 if (!xfs_verify_fsbno(mp, fsbno)) 831 return -EFSCORRUPTED; 832 d = XFS_FSB_TO_DADDR(mp, fsbno); 833 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 834 mp->m_bsize, 0, &bp, ops); 835 if (error) 836 return error; 837 if (bp) 838 xfs_buf_set_ref(bp, refval); 839 *bpp = bp; 840 return 0; 841 } 842 843 /* 844 * Read-ahead the block, don't wait for it, don't return a buffer. 845 * Long-form addressing. 846 */ 847 /* ARGSUSED */ 848 void 849 xfs_btree_reada_bufl( 850 struct xfs_mount *mp, /* file system mount point */ 851 xfs_fsblock_t fsbno, /* file system block number */ 852 xfs_extlen_t count, /* count of filesystem blocks */ 853 const struct xfs_buf_ops *ops) 854 { 855 xfs_daddr_t d; 856 857 ASSERT(fsbno != NULLFSBLOCK); 858 d = XFS_FSB_TO_DADDR(mp, fsbno); 859 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 860 } 861 862 /* 863 * Read-ahead the block, don't wait for it, don't return a buffer. 864 * Short-form addressing. 865 */ 866 /* ARGSUSED */ 867 void 868 xfs_btree_reada_bufs( 869 struct xfs_mount *mp, /* file system mount point */ 870 xfs_agnumber_t agno, /* allocation group number */ 871 xfs_agblock_t agbno, /* allocation group block number */ 872 xfs_extlen_t count, /* count of filesystem blocks */ 873 const struct xfs_buf_ops *ops) 874 { 875 xfs_daddr_t d; 876 877 ASSERT(agno != NULLAGNUMBER); 878 ASSERT(agbno != NULLAGBLOCK); 879 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 880 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 881 } 882 883 STATIC int 884 xfs_btree_readahead_lblock( 885 struct xfs_btree_cur *cur, 886 int lr, 887 struct xfs_btree_block *block) 888 { 889 int rval = 0; 890 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 891 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 892 893 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 894 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 895 cur->bc_ops->buf_ops); 896 rval++; 897 } 898 899 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 900 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 901 cur->bc_ops->buf_ops); 902 rval++; 903 } 904 905 return rval; 906 } 907 908 STATIC int 909 xfs_btree_readahead_sblock( 910 struct xfs_btree_cur *cur, 911 int lr, 912 struct xfs_btree_block *block) 913 { 914 int rval = 0; 915 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 916 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 917 918 919 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 920 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 921 left, 1, cur->bc_ops->buf_ops); 922 rval++; 923 } 924 925 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 926 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 927 right, 1, cur->bc_ops->buf_ops); 928 rval++; 929 } 930 931 return rval; 932 } 933 934 /* 935 * Read-ahead btree blocks, at the given level. 936 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 937 */ 938 STATIC int 939 xfs_btree_readahead( 940 struct xfs_btree_cur *cur, /* btree cursor */ 941 int lev, /* level in btree */ 942 int lr) /* left/right bits */ 943 { 944 struct xfs_btree_block *block; 945 946 /* 947 * No readahead needed if we are at the root level and the 948 * btree root is stored in the inode. 949 */ 950 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 951 (lev == cur->bc_nlevels - 1)) 952 return 0; 953 954 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) 955 return 0; 956 957 cur->bc_ra[lev] |= lr; 958 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); 959 960 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 961 return xfs_btree_readahead_lblock(cur, lr, block); 962 return xfs_btree_readahead_sblock(cur, lr, block); 963 } 964 965 STATIC int 966 xfs_btree_ptr_to_daddr( 967 struct xfs_btree_cur *cur, 968 union xfs_btree_ptr *ptr, 969 xfs_daddr_t *daddr) 970 { 971 xfs_fsblock_t fsbno; 972 xfs_agblock_t agbno; 973 int error; 974 975 error = xfs_btree_check_ptr(cur, ptr, 0, 1); 976 if (error) 977 return error; 978 979 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 980 fsbno = be64_to_cpu(ptr->l); 981 *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); 982 } else { 983 agbno = be32_to_cpu(ptr->s); 984 *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, 985 agbno); 986 } 987 988 return 0; 989 } 990 991 /* 992 * Readahead @count btree blocks at the given @ptr location. 993 * 994 * We don't need to care about long or short form btrees here as we have a 995 * method of converting the ptr directly to a daddr available to us. 996 */ 997 STATIC void 998 xfs_btree_readahead_ptr( 999 struct xfs_btree_cur *cur, 1000 union xfs_btree_ptr *ptr, 1001 xfs_extlen_t count) 1002 { 1003 xfs_daddr_t daddr; 1004 1005 if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr)) 1006 return; 1007 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr, 1008 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 1009 } 1010 1011 /* 1012 * Set the buffer for level "lev" in the cursor to bp, releasing 1013 * any previous buffer. 1014 */ 1015 STATIC void 1016 xfs_btree_setbuf( 1017 xfs_btree_cur_t *cur, /* btree cursor */ 1018 int lev, /* level in btree */ 1019 xfs_buf_t *bp) /* new buffer to set */ 1020 { 1021 struct xfs_btree_block *b; /* btree block */ 1022 1023 if (cur->bc_bufs[lev]) 1024 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); 1025 cur->bc_bufs[lev] = bp; 1026 cur->bc_ra[lev] = 0; 1027 1028 b = XFS_BUF_TO_BLOCK(bp); 1029 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1030 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 1031 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1032 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 1033 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1034 } else { 1035 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 1036 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1037 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 1038 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1039 } 1040 } 1041 1042 bool 1043 xfs_btree_ptr_is_null( 1044 struct xfs_btree_cur *cur, 1045 union xfs_btree_ptr *ptr) 1046 { 1047 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1048 return ptr->l == cpu_to_be64(NULLFSBLOCK); 1049 else 1050 return ptr->s == cpu_to_be32(NULLAGBLOCK); 1051 } 1052 1053 STATIC void 1054 xfs_btree_set_ptr_null( 1055 struct xfs_btree_cur *cur, 1056 union xfs_btree_ptr *ptr) 1057 { 1058 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1059 ptr->l = cpu_to_be64(NULLFSBLOCK); 1060 else 1061 ptr->s = cpu_to_be32(NULLAGBLOCK); 1062 } 1063 1064 /* 1065 * Get/set/init sibling pointers 1066 */ 1067 void 1068 xfs_btree_get_sibling( 1069 struct xfs_btree_cur *cur, 1070 struct xfs_btree_block *block, 1071 union xfs_btree_ptr *ptr, 1072 int lr) 1073 { 1074 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1075 1076 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1077 if (lr == XFS_BB_RIGHTSIB) 1078 ptr->l = block->bb_u.l.bb_rightsib; 1079 else 1080 ptr->l = block->bb_u.l.bb_leftsib; 1081 } else { 1082 if (lr == XFS_BB_RIGHTSIB) 1083 ptr->s = block->bb_u.s.bb_rightsib; 1084 else 1085 ptr->s = block->bb_u.s.bb_leftsib; 1086 } 1087 } 1088 1089 STATIC void 1090 xfs_btree_set_sibling( 1091 struct xfs_btree_cur *cur, 1092 struct xfs_btree_block *block, 1093 union xfs_btree_ptr *ptr, 1094 int lr) 1095 { 1096 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1097 1098 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1099 if (lr == XFS_BB_RIGHTSIB) 1100 block->bb_u.l.bb_rightsib = ptr->l; 1101 else 1102 block->bb_u.l.bb_leftsib = ptr->l; 1103 } else { 1104 if (lr == XFS_BB_RIGHTSIB) 1105 block->bb_u.s.bb_rightsib = ptr->s; 1106 else 1107 block->bb_u.s.bb_leftsib = ptr->s; 1108 } 1109 } 1110 1111 void 1112 xfs_btree_init_block_int( 1113 struct xfs_mount *mp, 1114 struct xfs_btree_block *buf, 1115 xfs_daddr_t blkno, 1116 xfs_btnum_t btnum, 1117 __u16 level, 1118 __u16 numrecs, 1119 __u64 owner, 1120 unsigned int flags) 1121 { 1122 int crc = xfs_sb_version_hascrc(&mp->m_sb); 1123 __u32 magic = xfs_btree_magic(crc, btnum); 1124 1125 buf->bb_magic = cpu_to_be32(magic); 1126 buf->bb_level = cpu_to_be16(level); 1127 buf->bb_numrecs = cpu_to_be16(numrecs); 1128 1129 if (flags & XFS_BTREE_LONG_PTRS) { 1130 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 1131 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1132 if (crc) { 1133 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1134 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1135 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); 1136 buf->bb_u.l.bb_pad = 0; 1137 buf->bb_u.l.bb_lsn = 0; 1138 } 1139 } else { 1140 /* owner is a 32 bit value on short blocks */ 1141 __u32 __owner = (__u32)owner; 1142 1143 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1144 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1145 if (crc) { 1146 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1147 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1148 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); 1149 buf->bb_u.s.bb_lsn = 0; 1150 } 1151 } 1152 } 1153 1154 void 1155 xfs_btree_init_block( 1156 struct xfs_mount *mp, 1157 struct xfs_buf *bp, 1158 xfs_btnum_t btnum, 1159 __u16 level, 1160 __u16 numrecs, 1161 __u64 owner) 1162 { 1163 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1164 btnum, level, numrecs, owner, 0); 1165 } 1166 1167 STATIC void 1168 xfs_btree_init_block_cur( 1169 struct xfs_btree_cur *cur, 1170 struct xfs_buf *bp, 1171 int level, 1172 int numrecs) 1173 { 1174 __u64 owner; 1175 1176 /* 1177 * we can pull the owner from the cursor right now as the different 1178 * owners align directly with the pointer size of the btree. This may 1179 * change in future, but is safe for current users of the generic btree 1180 * code. 1181 */ 1182 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1183 owner = cur->bc_private.b.ip->i_ino; 1184 else 1185 owner = cur->bc_private.a.agno; 1186 1187 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1188 cur->bc_btnum, level, numrecs, 1189 owner, cur->bc_flags); 1190 } 1191 1192 /* 1193 * Return true if ptr is the last record in the btree and 1194 * we need to track updates to this record. The decision 1195 * will be further refined in the update_lastrec method. 1196 */ 1197 STATIC int 1198 xfs_btree_is_lastrec( 1199 struct xfs_btree_cur *cur, 1200 struct xfs_btree_block *block, 1201 int level) 1202 { 1203 union xfs_btree_ptr ptr; 1204 1205 if (level > 0) 1206 return 0; 1207 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1208 return 0; 1209 1210 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1211 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1212 return 0; 1213 return 1; 1214 } 1215 1216 STATIC void 1217 xfs_btree_buf_to_ptr( 1218 struct xfs_btree_cur *cur, 1219 struct xfs_buf *bp, 1220 union xfs_btree_ptr *ptr) 1221 { 1222 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1223 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1224 XFS_BUF_ADDR(bp))); 1225 else { 1226 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1227 XFS_BUF_ADDR(bp))); 1228 } 1229 } 1230 1231 STATIC void 1232 xfs_btree_set_refs( 1233 struct xfs_btree_cur *cur, 1234 struct xfs_buf *bp) 1235 { 1236 switch (cur->bc_btnum) { 1237 case XFS_BTNUM_BNO: 1238 case XFS_BTNUM_CNT: 1239 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1240 break; 1241 case XFS_BTNUM_INO: 1242 case XFS_BTNUM_FINO: 1243 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1244 break; 1245 case XFS_BTNUM_BMAP: 1246 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1247 break; 1248 case XFS_BTNUM_RMAP: 1249 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); 1250 break; 1251 case XFS_BTNUM_REFC: 1252 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); 1253 break; 1254 default: 1255 ASSERT(0); 1256 } 1257 } 1258 1259 STATIC int 1260 xfs_btree_get_buf_block( 1261 struct xfs_btree_cur *cur, 1262 union xfs_btree_ptr *ptr, 1263 struct xfs_btree_block **block, 1264 struct xfs_buf **bpp) 1265 { 1266 struct xfs_mount *mp = cur->bc_mp; 1267 xfs_daddr_t d; 1268 int error; 1269 1270 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1271 if (error) 1272 return error; 1273 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1274 mp->m_bsize, 0); 1275 1276 if (!*bpp) 1277 return -ENOMEM; 1278 1279 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1280 *block = XFS_BUF_TO_BLOCK(*bpp); 1281 return 0; 1282 } 1283 1284 /* 1285 * Read in the buffer at the given ptr and return the buffer and 1286 * the block pointer within the buffer. 1287 */ 1288 STATIC int 1289 xfs_btree_read_buf_block( 1290 struct xfs_btree_cur *cur, 1291 union xfs_btree_ptr *ptr, 1292 int flags, 1293 struct xfs_btree_block **block, 1294 struct xfs_buf **bpp) 1295 { 1296 struct xfs_mount *mp = cur->bc_mp; 1297 xfs_daddr_t d; 1298 int error; 1299 1300 /* need to sort out how callers deal with failures first */ 1301 ASSERT(!(flags & XBF_TRYLOCK)); 1302 1303 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1304 if (error) 1305 return error; 1306 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1307 mp->m_bsize, flags, bpp, 1308 cur->bc_ops->buf_ops); 1309 if (error) 1310 return error; 1311 1312 xfs_btree_set_refs(cur, *bpp); 1313 *block = XFS_BUF_TO_BLOCK(*bpp); 1314 return 0; 1315 } 1316 1317 /* 1318 * Copy keys from one btree block to another. 1319 */ 1320 STATIC void 1321 xfs_btree_copy_keys( 1322 struct xfs_btree_cur *cur, 1323 union xfs_btree_key *dst_key, 1324 union xfs_btree_key *src_key, 1325 int numkeys) 1326 { 1327 ASSERT(numkeys >= 0); 1328 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1329 } 1330 1331 /* 1332 * Copy records from one btree block to another. 1333 */ 1334 STATIC void 1335 xfs_btree_copy_recs( 1336 struct xfs_btree_cur *cur, 1337 union xfs_btree_rec *dst_rec, 1338 union xfs_btree_rec *src_rec, 1339 int numrecs) 1340 { 1341 ASSERT(numrecs >= 0); 1342 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1343 } 1344 1345 /* 1346 * Copy block pointers from one btree block to another. 1347 */ 1348 STATIC void 1349 xfs_btree_copy_ptrs( 1350 struct xfs_btree_cur *cur, 1351 union xfs_btree_ptr *dst_ptr, 1352 union xfs_btree_ptr *src_ptr, 1353 int numptrs) 1354 { 1355 ASSERT(numptrs >= 0); 1356 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1357 } 1358 1359 /* 1360 * Shift keys one index left/right inside a single btree block. 1361 */ 1362 STATIC void 1363 xfs_btree_shift_keys( 1364 struct xfs_btree_cur *cur, 1365 union xfs_btree_key *key, 1366 int dir, 1367 int numkeys) 1368 { 1369 char *dst_key; 1370 1371 ASSERT(numkeys >= 0); 1372 ASSERT(dir == 1 || dir == -1); 1373 1374 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1375 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1376 } 1377 1378 /* 1379 * Shift records one index left/right inside a single btree block. 1380 */ 1381 STATIC void 1382 xfs_btree_shift_recs( 1383 struct xfs_btree_cur *cur, 1384 union xfs_btree_rec *rec, 1385 int dir, 1386 int numrecs) 1387 { 1388 char *dst_rec; 1389 1390 ASSERT(numrecs >= 0); 1391 ASSERT(dir == 1 || dir == -1); 1392 1393 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1394 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1395 } 1396 1397 /* 1398 * Shift block pointers one index left/right inside a single btree block. 1399 */ 1400 STATIC void 1401 xfs_btree_shift_ptrs( 1402 struct xfs_btree_cur *cur, 1403 union xfs_btree_ptr *ptr, 1404 int dir, 1405 int numptrs) 1406 { 1407 char *dst_ptr; 1408 1409 ASSERT(numptrs >= 0); 1410 ASSERT(dir == 1 || dir == -1); 1411 1412 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1413 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1414 } 1415 1416 /* 1417 * Log key values from the btree block. 1418 */ 1419 STATIC void 1420 xfs_btree_log_keys( 1421 struct xfs_btree_cur *cur, 1422 struct xfs_buf *bp, 1423 int first, 1424 int last) 1425 { 1426 1427 if (bp) { 1428 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1429 xfs_trans_log_buf(cur->bc_tp, bp, 1430 xfs_btree_key_offset(cur, first), 1431 xfs_btree_key_offset(cur, last + 1) - 1); 1432 } else { 1433 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1434 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1435 } 1436 } 1437 1438 /* 1439 * Log record values from the btree block. 1440 */ 1441 void 1442 xfs_btree_log_recs( 1443 struct xfs_btree_cur *cur, 1444 struct xfs_buf *bp, 1445 int first, 1446 int last) 1447 { 1448 1449 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1450 xfs_trans_log_buf(cur->bc_tp, bp, 1451 xfs_btree_rec_offset(cur, first), 1452 xfs_btree_rec_offset(cur, last + 1) - 1); 1453 1454 } 1455 1456 /* 1457 * Log block pointer fields from a btree block (nonleaf). 1458 */ 1459 STATIC void 1460 xfs_btree_log_ptrs( 1461 struct xfs_btree_cur *cur, /* btree cursor */ 1462 struct xfs_buf *bp, /* buffer containing btree block */ 1463 int first, /* index of first pointer to log */ 1464 int last) /* index of last pointer to log */ 1465 { 1466 1467 if (bp) { 1468 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1469 int level = xfs_btree_get_level(block); 1470 1471 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1472 xfs_trans_log_buf(cur->bc_tp, bp, 1473 xfs_btree_ptr_offset(cur, first, level), 1474 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1475 } else { 1476 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1477 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1478 } 1479 1480 } 1481 1482 /* 1483 * Log fields from a btree block header. 1484 */ 1485 void 1486 xfs_btree_log_block( 1487 struct xfs_btree_cur *cur, /* btree cursor */ 1488 struct xfs_buf *bp, /* buffer containing btree block */ 1489 int fields) /* mask of fields: XFS_BB_... */ 1490 { 1491 int first; /* first byte offset logged */ 1492 int last; /* last byte offset logged */ 1493 static const short soffsets[] = { /* table of offsets (short) */ 1494 offsetof(struct xfs_btree_block, bb_magic), 1495 offsetof(struct xfs_btree_block, bb_level), 1496 offsetof(struct xfs_btree_block, bb_numrecs), 1497 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1498 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1499 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1500 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1501 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1502 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1503 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1504 XFS_BTREE_SBLOCK_CRC_LEN 1505 }; 1506 static const short loffsets[] = { /* table of offsets (long) */ 1507 offsetof(struct xfs_btree_block, bb_magic), 1508 offsetof(struct xfs_btree_block, bb_level), 1509 offsetof(struct xfs_btree_block, bb_numrecs), 1510 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1511 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1512 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1513 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1514 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1515 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1516 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1517 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1518 XFS_BTREE_LBLOCK_CRC_LEN 1519 }; 1520 1521 if (bp) { 1522 int nbits; 1523 1524 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1525 /* 1526 * We don't log the CRC when updating a btree 1527 * block but instead recreate it during log 1528 * recovery. As the log buffers have checksums 1529 * of their own this is safe and avoids logging a crc 1530 * update in a lot of places. 1531 */ 1532 if (fields == XFS_BB_ALL_BITS) 1533 fields = XFS_BB_ALL_BITS_CRC; 1534 nbits = XFS_BB_NUM_BITS_CRC; 1535 } else { 1536 nbits = XFS_BB_NUM_BITS; 1537 } 1538 xfs_btree_offsets(fields, 1539 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1540 loffsets : soffsets, 1541 nbits, &first, &last); 1542 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1543 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1544 } else { 1545 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1546 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1547 } 1548 } 1549 1550 /* 1551 * Increment cursor by one record at the level. 1552 * For nonzero levels the leaf-ward information is untouched. 1553 */ 1554 int /* error */ 1555 xfs_btree_increment( 1556 struct xfs_btree_cur *cur, 1557 int level, 1558 int *stat) /* success/failure */ 1559 { 1560 struct xfs_btree_block *block; 1561 union xfs_btree_ptr ptr; 1562 struct xfs_buf *bp; 1563 int error; /* error return value */ 1564 int lev; 1565 1566 ASSERT(level < cur->bc_nlevels); 1567 1568 /* Read-ahead to the right at this level. */ 1569 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1570 1571 /* Get a pointer to the btree block. */ 1572 block = xfs_btree_get_block(cur, level, &bp); 1573 1574 #ifdef DEBUG 1575 error = xfs_btree_check_block(cur, block, level, bp); 1576 if (error) 1577 goto error0; 1578 #endif 1579 1580 /* We're done if we remain in the block after the increment. */ 1581 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) 1582 goto out1; 1583 1584 /* Fail if we just went off the right edge of the tree. */ 1585 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1586 if (xfs_btree_ptr_is_null(cur, &ptr)) 1587 goto out0; 1588 1589 XFS_BTREE_STATS_INC(cur, increment); 1590 1591 /* 1592 * March up the tree incrementing pointers. 1593 * Stop when we don't go off the right edge of a block. 1594 */ 1595 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1596 block = xfs_btree_get_block(cur, lev, &bp); 1597 1598 #ifdef DEBUG 1599 error = xfs_btree_check_block(cur, block, lev, bp); 1600 if (error) 1601 goto error0; 1602 #endif 1603 1604 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) 1605 break; 1606 1607 /* Read-ahead the right block for the next loop. */ 1608 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1609 } 1610 1611 /* 1612 * If we went off the root then we are either seriously 1613 * confused or have the tree root in an inode. 1614 */ 1615 if (lev == cur->bc_nlevels) { 1616 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1617 goto out0; 1618 ASSERT(0); 1619 error = -EFSCORRUPTED; 1620 goto error0; 1621 } 1622 ASSERT(lev < cur->bc_nlevels); 1623 1624 /* 1625 * Now walk back down the tree, fixing up the cursor's buffer 1626 * pointers and key numbers. 1627 */ 1628 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1629 union xfs_btree_ptr *ptrp; 1630 1631 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1632 --lev; 1633 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1634 if (error) 1635 goto error0; 1636 1637 xfs_btree_setbuf(cur, lev, bp); 1638 cur->bc_ptrs[lev] = 1; 1639 } 1640 out1: 1641 *stat = 1; 1642 return 0; 1643 1644 out0: 1645 *stat = 0; 1646 return 0; 1647 1648 error0: 1649 return error; 1650 } 1651 1652 /* 1653 * Decrement cursor by one record at the level. 1654 * For nonzero levels the leaf-ward information is untouched. 1655 */ 1656 int /* error */ 1657 xfs_btree_decrement( 1658 struct xfs_btree_cur *cur, 1659 int level, 1660 int *stat) /* success/failure */ 1661 { 1662 struct xfs_btree_block *block; 1663 xfs_buf_t *bp; 1664 int error; /* error return value */ 1665 int lev; 1666 union xfs_btree_ptr ptr; 1667 1668 ASSERT(level < cur->bc_nlevels); 1669 1670 /* Read-ahead to the left at this level. */ 1671 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1672 1673 /* We're done if we remain in the block after the decrement. */ 1674 if (--cur->bc_ptrs[level] > 0) 1675 goto out1; 1676 1677 /* Get a pointer to the btree block. */ 1678 block = xfs_btree_get_block(cur, level, &bp); 1679 1680 #ifdef DEBUG 1681 error = xfs_btree_check_block(cur, block, level, bp); 1682 if (error) 1683 goto error0; 1684 #endif 1685 1686 /* Fail if we just went off the left edge of the tree. */ 1687 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1688 if (xfs_btree_ptr_is_null(cur, &ptr)) 1689 goto out0; 1690 1691 XFS_BTREE_STATS_INC(cur, decrement); 1692 1693 /* 1694 * March up the tree decrementing pointers. 1695 * Stop when we don't go off the left edge of a block. 1696 */ 1697 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1698 if (--cur->bc_ptrs[lev] > 0) 1699 break; 1700 /* Read-ahead the left block for the next loop. */ 1701 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1702 } 1703 1704 /* 1705 * If we went off the root then we are seriously confused. 1706 * or the root of the tree is in an inode. 1707 */ 1708 if (lev == cur->bc_nlevels) { 1709 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1710 goto out0; 1711 ASSERT(0); 1712 error = -EFSCORRUPTED; 1713 goto error0; 1714 } 1715 ASSERT(lev < cur->bc_nlevels); 1716 1717 /* 1718 * Now walk back down the tree, fixing up the cursor's buffer 1719 * pointers and key numbers. 1720 */ 1721 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1722 union xfs_btree_ptr *ptrp; 1723 1724 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1725 --lev; 1726 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1727 if (error) 1728 goto error0; 1729 xfs_btree_setbuf(cur, lev, bp); 1730 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); 1731 } 1732 out1: 1733 *stat = 1; 1734 return 0; 1735 1736 out0: 1737 *stat = 0; 1738 return 0; 1739 1740 error0: 1741 return error; 1742 } 1743 1744 int 1745 xfs_btree_lookup_get_block( 1746 struct xfs_btree_cur *cur, /* btree cursor */ 1747 int level, /* level in the btree */ 1748 union xfs_btree_ptr *pp, /* ptr to btree block */ 1749 struct xfs_btree_block **blkp) /* return btree block */ 1750 { 1751 struct xfs_buf *bp; /* buffer pointer for btree block */ 1752 xfs_daddr_t daddr; 1753 int error = 0; 1754 1755 /* special case the root block if in an inode */ 1756 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1757 (level == cur->bc_nlevels - 1)) { 1758 *blkp = xfs_btree_get_iroot(cur); 1759 return 0; 1760 } 1761 1762 /* 1763 * If the old buffer at this level for the disk address we are 1764 * looking for re-use it. 1765 * 1766 * Otherwise throw it away and get a new one. 1767 */ 1768 bp = cur->bc_bufs[level]; 1769 error = xfs_btree_ptr_to_daddr(cur, pp, &daddr); 1770 if (error) 1771 return error; 1772 if (bp && XFS_BUF_ADDR(bp) == daddr) { 1773 *blkp = XFS_BUF_TO_BLOCK(bp); 1774 return 0; 1775 } 1776 1777 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1778 if (error) 1779 return error; 1780 1781 /* Check the inode owner since the verifiers don't. */ 1782 if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && 1783 !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) && 1784 (cur->bc_flags & XFS_BTREE_LONG_PTRS) && 1785 be64_to_cpu((*blkp)->bb_u.l.bb_owner) != 1786 cur->bc_private.b.ip->i_ino) 1787 goto out_bad; 1788 1789 /* Did we get the level we were looking for? */ 1790 if (be16_to_cpu((*blkp)->bb_level) != level) 1791 goto out_bad; 1792 1793 /* Check that internal nodes have at least one record. */ 1794 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) 1795 goto out_bad; 1796 1797 xfs_btree_setbuf(cur, level, bp); 1798 return 0; 1799 1800 out_bad: 1801 *blkp = NULL; 1802 xfs_buf_corruption_error(bp); 1803 xfs_trans_brelse(cur->bc_tp, bp); 1804 return -EFSCORRUPTED; 1805 } 1806 1807 /* 1808 * Get current search key. For level 0 we don't actually have a key 1809 * structure so we make one up from the record. For all other levels 1810 * we just return the right key. 1811 */ 1812 STATIC union xfs_btree_key * 1813 xfs_lookup_get_search_key( 1814 struct xfs_btree_cur *cur, 1815 int level, 1816 int keyno, 1817 struct xfs_btree_block *block, 1818 union xfs_btree_key *kp) 1819 { 1820 if (level == 0) { 1821 cur->bc_ops->init_key_from_rec(kp, 1822 xfs_btree_rec_addr(cur, keyno, block)); 1823 return kp; 1824 } 1825 1826 return xfs_btree_key_addr(cur, keyno, block); 1827 } 1828 1829 /* 1830 * Lookup the record. The cursor is made to point to it, based on dir. 1831 * stat is set to 0 if can't find any such record, 1 for success. 1832 */ 1833 int /* error */ 1834 xfs_btree_lookup( 1835 struct xfs_btree_cur *cur, /* btree cursor */ 1836 xfs_lookup_t dir, /* <=, ==, or >= */ 1837 int *stat) /* success/failure */ 1838 { 1839 struct xfs_btree_block *block; /* current btree block */ 1840 int64_t diff; /* difference for the current key */ 1841 int error; /* error return value */ 1842 int keyno; /* current key number */ 1843 int level; /* level in the btree */ 1844 union xfs_btree_ptr *pp; /* ptr to btree block */ 1845 union xfs_btree_ptr ptr; /* ptr to btree block */ 1846 1847 XFS_BTREE_STATS_INC(cur, lookup); 1848 1849 /* No such thing as a zero-level tree. */ 1850 if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) 1851 return -EFSCORRUPTED; 1852 1853 block = NULL; 1854 keyno = 0; 1855 1856 /* initialise start pointer from cursor */ 1857 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1858 pp = &ptr; 1859 1860 /* 1861 * Iterate over each level in the btree, starting at the root. 1862 * For each level above the leaves, find the key we need, based 1863 * on the lookup record, then follow the corresponding block 1864 * pointer down to the next level. 1865 */ 1866 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1867 /* Get the block we need to do the lookup on. */ 1868 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1869 if (error) 1870 goto error0; 1871 1872 if (diff == 0) { 1873 /* 1874 * If we already had a key match at a higher level, we 1875 * know we need to use the first entry in this block. 1876 */ 1877 keyno = 1; 1878 } else { 1879 /* Otherwise search this block. Do a binary search. */ 1880 1881 int high; /* high entry number */ 1882 int low; /* low entry number */ 1883 1884 /* Set low and high entry numbers, 1-based. */ 1885 low = 1; 1886 high = xfs_btree_get_numrecs(block); 1887 if (!high) { 1888 /* Block is empty, must be an empty leaf. */ 1889 if (level != 0 || cur->bc_nlevels != 1) { 1890 XFS_CORRUPTION_ERROR(__func__, 1891 XFS_ERRLEVEL_LOW, 1892 cur->bc_mp, block, 1893 sizeof(*block)); 1894 return -EFSCORRUPTED; 1895 } 1896 1897 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; 1898 *stat = 0; 1899 return 0; 1900 } 1901 1902 /* Binary search the block. */ 1903 while (low <= high) { 1904 union xfs_btree_key key; 1905 union xfs_btree_key *kp; 1906 1907 XFS_BTREE_STATS_INC(cur, compare); 1908 1909 /* keyno is average of low and high. */ 1910 keyno = (low + high) >> 1; 1911 1912 /* Get current search key */ 1913 kp = xfs_lookup_get_search_key(cur, level, 1914 keyno, block, &key); 1915 1916 /* 1917 * Compute difference to get next direction: 1918 * - less than, move right 1919 * - greater than, move left 1920 * - equal, we're done 1921 */ 1922 diff = cur->bc_ops->key_diff(cur, kp); 1923 if (diff < 0) 1924 low = keyno + 1; 1925 else if (diff > 0) 1926 high = keyno - 1; 1927 else 1928 break; 1929 } 1930 } 1931 1932 /* 1933 * If there are more levels, set up for the next level 1934 * by getting the block number and filling in the cursor. 1935 */ 1936 if (level > 0) { 1937 /* 1938 * If we moved left, need the previous key number, 1939 * unless there isn't one. 1940 */ 1941 if (diff > 0 && --keyno < 1) 1942 keyno = 1; 1943 pp = xfs_btree_ptr_addr(cur, keyno, block); 1944 1945 error = xfs_btree_debug_check_ptr(cur, pp, 0, level); 1946 if (error) 1947 goto error0; 1948 1949 cur->bc_ptrs[level] = keyno; 1950 } 1951 } 1952 1953 /* Done with the search. See if we need to adjust the results. */ 1954 if (dir != XFS_LOOKUP_LE && diff < 0) { 1955 keyno++; 1956 /* 1957 * If ge search and we went off the end of the block, but it's 1958 * not the last block, we're in the wrong block. 1959 */ 1960 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1961 if (dir == XFS_LOOKUP_GE && 1962 keyno > xfs_btree_get_numrecs(block) && 1963 !xfs_btree_ptr_is_null(cur, &ptr)) { 1964 int i; 1965 1966 cur->bc_ptrs[0] = keyno; 1967 error = xfs_btree_increment(cur, 0, &i); 1968 if (error) 1969 goto error0; 1970 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) 1971 return -EFSCORRUPTED; 1972 *stat = 1; 1973 return 0; 1974 } 1975 } else if (dir == XFS_LOOKUP_LE && diff > 0) 1976 keyno--; 1977 cur->bc_ptrs[0] = keyno; 1978 1979 /* Return if we succeeded or not. */ 1980 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 1981 *stat = 0; 1982 else if (dir != XFS_LOOKUP_EQ || diff == 0) 1983 *stat = 1; 1984 else 1985 *stat = 0; 1986 return 0; 1987 1988 error0: 1989 return error; 1990 } 1991 1992 /* Find the high key storage area from a regular key. */ 1993 union xfs_btree_key * 1994 xfs_btree_high_key_from_key( 1995 struct xfs_btree_cur *cur, 1996 union xfs_btree_key *key) 1997 { 1998 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 1999 return (union xfs_btree_key *)((char *)key + 2000 (cur->bc_ops->key_len / 2)); 2001 } 2002 2003 /* Determine the low (and high if overlapped) keys of a leaf block */ 2004 STATIC void 2005 xfs_btree_get_leaf_keys( 2006 struct xfs_btree_cur *cur, 2007 struct xfs_btree_block *block, 2008 union xfs_btree_key *key) 2009 { 2010 union xfs_btree_key max_hkey; 2011 union xfs_btree_key hkey; 2012 union xfs_btree_rec *rec; 2013 union xfs_btree_key *high; 2014 int n; 2015 2016 rec = xfs_btree_rec_addr(cur, 1, block); 2017 cur->bc_ops->init_key_from_rec(key, rec); 2018 2019 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2020 2021 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); 2022 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2023 rec = xfs_btree_rec_addr(cur, n, block); 2024 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 2025 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey) 2026 > 0) 2027 max_hkey = hkey; 2028 } 2029 2030 high = xfs_btree_high_key_from_key(cur, key); 2031 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); 2032 } 2033 } 2034 2035 /* Determine the low (and high if overlapped) keys of a node block */ 2036 STATIC void 2037 xfs_btree_get_node_keys( 2038 struct xfs_btree_cur *cur, 2039 struct xfs_btree_block *block, 2040 union xfs_btree_key *key) 2041 { 2042 union xfs_btree_key *hkey; 2043 union xfs_btree_key *max_hkey; 2044 union xfs_btree_key *high; 2045 int n; 2046 2047 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2048 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2049 cur->bc_ops->key_len / 2); 2050 2051 max_hkey = xfs_btree_high_key_addr(cur, 1, block); 2052 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2053 hkey = xfs_btree_high_key_addr(cur, n, block); 2054 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0) 2055 max_hkey = hkey; 2056 } 2057 2058 high = xfs_btree_high_key_from_key(cur, key); 2059 memcpy(high, max_hkey, cur->bc_ops->key_len / 2); 2060 } else { 2061 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2062 cur->bc_ops->key_len); 2063 } 2064 } 2065 2066 /* Derive the keys for any btree block. */ 2067 void 2068 xfs_btree_get_keys( 2069 struct xfs_btree_cur *cur, 2070 struct xfs_btree_block *block, 2071 union xfs_btree_key *key) 2072 { 2073 if (be16_to_cpu(block->bb_level) == 0) 2074 xfs_btree_get_leaf_keys(cur, block, key); 2075 else 2076 xfs_btree_get_node_keys(cur, block, key); 2077 } 2078 2079 /* 2080 * Decide if we need to update the parent keys of a btree block. For 2081 * a standard btree this is only necessary if we're updating the first 2082 * record/key. For an overlapping btree, we must always update the 2083 * keys because the highest key can be in any of the records or keys 2084 * in the block. 2085 */ 2086 static inline bool 2087 xfs_btree_needs_key_update( 2088 struct xfs_btree_cur *cur, 2089 int ptr) 2090 { 2091 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; 2092 } 2093 2094 /* 2095 * Update the low and high parent keys of the given level, progressing 2096 * towards the root. If force_all is false, stop if the keys for a given 2097 * level do not need updating. 2098 */ 2099 STATIC int 2100 __xfs_btree_updkeys( 2101 struct xfs_btree_cur *cur, 2102 int level, 2103 struct xfs_btree_block *block, 2104 struct xfs_buf *bp0, 2105 bool force_all) 2106 { 2107 union xfs_btree_key key; /* keys from current level */ 2108 union xfs_btree_key *lkey; /* keys from the next level up */ 2109 union xfs_btree_key *hkey; 2110 union xfs_btree_key *nlkey; /* keys from the next level up */ 2111 union xfs_btree_key *nhkey; 2112 struct xfs_buf *bp; 2113 int ptr; 2114 2115 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2116 2117 /* Exit if there aren't any parent levels to update. */ 2118 if (level + 1 >= cur->bc_nlevels) 2119 return 0; 2120 2121 trace_xfs_btree_updkeys(cur, level, bp0); 2122 2123 lkey = &key; 2124 hkey = xfs_btree_high_key_from_key(cur, lkey); 2125 xfs_btree_get_keys(cur, block, lkey); 2126 for (level++; level < cur->bc_nlevels; level++) { 2127 #ifdef DEBUG 2128 int error; 2129 #endif 2130 block = xfs_btree_get_block(cur, level, &bp); 2131 trace_xfs_btree_updkeys(cur, level, bp); 2132 #ifdef DEBUG 2133 error = xfs_btree_check_block(cur, block, level, bp); 2134 if (error) 2135 return error; 2136 #endif 2137 ptr = cur->bc_ptrs[level]; 2138 nlkey = xfs_btree_key_addr(cur, ptr, block); 2139 nhkey = xfs_btree_high_key_addr(cur, ptr, block); 2140 if (!force_all && 2141 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 || 2142 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0)) 2143 break; 2144 xfs_btree_copy_keys(cur, nlkey, lkey, 1); 2145 xfs_btree_log_keys(cur, bp, ptr, ptr); 2146 if (level + 1 >= cur->bc_nlevels) 2147 break; 2148 xfs_btree_get_node_keys(cur, block, lkey); 2149 } 2150 2151 return 0; 2152 } 2153 2154 /* Update all the keys from some level in cursor back to the root. */ 2155 STATIC int 2156 xfs_btree_updkeys_force( 2157 struct xfs_btree_cur *cur, 2158 int level) 2159 { 2160 struct xfs_buf *bp; 2161 struct xfs_btree_block *block; 2162 2163 block = xfs_btree_get_block(cur, level, &bp); 2164 return __xfs_btree_updkeys(cur, level, block, bp, true); 2165 } 2166 2167 /* 2168 * Update the parent keys of the given level, progressing towards the root. 2169 */ 2170 STATIC int 2171 xfs_btree_update_keys( 2172 struct xfs_btree_cur *cur, 2173 int level) 2174 { 2175 struct xfs_btree_block *block; 2176 struct xfs_buf *bp; 2177 union xfs_btree_key *kp; 2178 union xfs_btree_key key; 2179 int ptr; 2180 2181 ASSERT(level >= 0); 2182 2183 block = xfs_btree_get_block(cur, level, &bp); 2184 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) 2185 return __xfs_btree_updkeys(cur, level, block, bp, false); 2186 2187 /* 2188 * Go up the tree from this level toward the root. 2189 * At each level, update the key value to the value input. 2190 * Stop when we reach a level where the cursor isn't pointing 2191 * at the first entry in the block. 2192 */ 2193 xfs_btree_get_keys(cur, block, &key); 2194 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2195 #ifdef DEBUG 2196 int error; 2197 #endif 2198 block = xfs_btree_get_block(cur, level, &bp); 2199 #ifdef DEBUG 2200 error = xfs_btree_check_block(cur, block, level, bp); 2201 if (error) 2202 return error; 2203 #endif 2204 ptr = cur->bc_ptrs[level]; 2205 kp = xfs_btree_key_addr(cur, ptr, block); 2206 xfs_btree_copy_keys(cur, kp, &key, 1); 2207 xfs_btree_log_keys(cur, bp, ptr, ptr); 2208 } 2209 2210 return 0; 2211 } 2212 2213 /* 2214 * Update the record referred to by cur to the value in the 2215 * given record. This either works (return 0) or gets an 2216 * EFSCORRUPTED error. 2217 */ 2218 int 2219 xfs_btree_update( 2220 struct xfs_btree_cur *cur, 2221 union xfs_btree_rec *rec) 2222 { 2223 struct xfs_btree_block *block; 2224 struct xfs_buf *bp; 2225 int error; 2226 int ptr; 2227 union xfs_btree_rec *rp; 2228 2229 /* Pick up the current block. */ 2230 block = xfs_btree_get_block(cur, 0, &bp); 2231 2232 #ifdef DEBUG 2233 error = xfs_btree_check_block(cur, block, 0, bp); 2234 if (error) 2235 goto error0; 2236 #endif 2237 /* Get the address of the rec to be updated. */ 2238 ptr = cur->bc_ptrs[0]; 2239 rp = xfs_btree_rec_addr(cur, ptr, block); 2240 2241 /* Fill in the new contents and log them. */ 2242 xfs_btree_copy_recs(cur, rp, rec, 1); 2243 xfs_btree_log_recs(cur, bp, ptr, ptr); 2244 2245 /* 2246 * If we are tracking the last record in the tree and 2247 * we are at the far right edge of the tree, update it. 2248 */ 2249 if (xfs_btree_is_lastrec(cur, block, 0)) { 2250 cur->bc_ops->update_lastrec(cur, block, rec, 2251 ptr, LASTREC_UPDATE); 2252 } 2253 2254 /* Pass new key value up to our parent. */ 2255 if (xfs_btree_needs_key_update(cur, ptr)) { 2256 error = xfs_btree_update_keys(cur, 0); 2257 if (error) 2258 goto error0; 2259 } 2260 2261 return 0; 2262 2263 error0: 2264 return error; 2265 } 2266 2267 /* 2268 * Move 1 record left from cur/level if possible. 2269 * Update cur to reflect the new path. 2270 */ 2271 STATIC int /* error */ 2272 xfs_btree_lshift( 2273 struct xfs_btree_cur *cur, 2274 int level, 2275 int *stat) /* success/failure */ 2276 { 2277 struct xfs_buf *lbp; /* left buffer pointer */ 2278 struct xfs_btree_block *left; /* left btree block */ 2279 int lrecs; /* left record count */ 2280 struct xfs_buf *rbp; /* right buffer pointer */ 2281 struct xfs_btree_block *right; /* right btree block */ 2282 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2283 int rrecs; /* right record count */ 2284 union xfs_btree_ptr lptr; /* left btree pointer */ 2285 union xfs_btree_key *rkp = NULL; /* right btree key */ 2286 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2287 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2288 int error; /* error return value */ 2289 int i; 2290 2291 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2292 level == cur->bc_nlevels - 1) 2293 goto out0; 2294 2295 /* Set up variables for this block as "right". */ 2296 right = xfs_btree_get_block(cur, level, &rbp); 2297 2298 #ifdef DEBUG 2299 error = xfs_btree_check_block(cur, right, level, rbp); 2300 if (error) 2301 goto error0; 2302 #endif 2303 2304 /* If we've got no left sibling then we can't shift an entry left. */ 2305 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2306 if (xfs_btree_ptr_is_null(cur, &lptr)) 2307 goto out0; 2308 2309 /* 2310 * If the cursor entry is the one that would be moved, don't 2311 * do it... it's too complicated. 2312 */ 2313 if (cur->bc_ptrs[level] <= 1) 2314 goto out0; 2315 2316 /* Set up the left neighbor as "left". */ 2317 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2318 if (error) 2319 goto error0; 2320 2321 /* If it's full, it can't take another entry. */ 2322 lrecs = xfs_btree_get_numrecs(left); 2323 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2324 goto out0; 2325 2326 rrecs = xfs_btree_get_numrecs(right); 2327 2328 /* 2329 * We add one entry to the left side and remove one for the right side. 2330 * Account for it here, the changes will be updated on disk and logged 2331 * later. 2332 */ 2333 lrecs++; 2334 rrecs--; 2335 2336 XFS_BTREE_STATS_INC(cur, lshift); 2337 XFS_BTREE_STATS_ADD(cur, moves, 1); 2338 2339 /* 2340 * If non-leaf, copy a key and a ptr to the left block. 2341 * Log the changes to the left block. 2342 */ 2343 if (level > 0) { 2344 /* It's a non-leaf. Move keys and pointers. */ 2345 union xfs_btree_key *lkp; /* left btree key */ 2346 union xfs_btree_ptr *lpp; /* left address pointer */ 2347 2348 lkp = xfs_btree_key_addr(cur, lrecs, left); 2349 rkp = xfs_btree_key_addr(cur, 1, right); 2350 2351 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2352 rpp = xfs_btree_ptr_addr(cur, 1, right); 2353 2354 error = xfs_btree_debug_check_ptr(cur, rpp, 0, level); 2355 if (error) 2356 goto error0; 2357 2358 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2359 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2360 2361 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2362 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2363 2364 ASSERT(cur->bc_ops->keys_inorder(cur, 2365 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2366 } else { 2367 /* It's a leaf. Move records. */ 2368 union xfs_btree_rec *lrp; /* left record pointer */ 2369 2370 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2371 rrp = xfs_btree_rec_addr(cur, 1, right); 2372 2373 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2374 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2375 2376 ASSERT(cur->bc_ops->recs_inorder(cur, 2377 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2378 } 2379 2380 xfs_btree_set_numrecs(left, lrecs); 2381 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2382 2383 xfs_btree_set_numrecs(right, rrecs); 2384 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2385 2386 /* 2387 * Slide the contents of right down one entry. 2388 */ 2389 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2390 if (level > 0) { 2391 /* It's a nonleaf. operate on keys and ptrs */ 2392 int i; /* loop index */ 2393 2394 for (i = 0; i < rrecs; i++) { 2395 error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level); 2396 if (error) 2397 goto error0; 2398 } 2399 2400 xfs_btree_shift_keys(cur, 2401 xfs_btree_key_addr(cur, 2, right), 2402 -1, rrecs); 2403 xfs_btree_shift_ptrs(cur, 2404 xfs_btree_ptr_addr(cur, 2, right), 2405 -1, rrecs); 2406 2407 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2408 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2409 } else { 2410 /* It's a leaf. operate on records */ 2411 xfs_btree_shift_recs(cur, 2412 xfs_btree_rec_addr(cur, 2, right), 2413 -1, rrecs); 2414 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2415 } 2416 2417 /* 2418 * Using a temporary cursor, update the parent key values of the 2419 * block on the left. 2420 */ 2421 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2422 error = xfs_btree_dup_cursor(cur, &tcur); 2423 if (error) 2424 goto error0; 2425 i = xfs_btree_firstrec(tcur, level); 2426 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { 2427 error = -EFSCORRUPTED; 2428 goto error0; 2429 } 2430 2431 error = xfs_btree_decrement(tcur, level, &i); 2432 if (error) 2433 goto error1; 2434 2435 /* Update the parent high keys of the left block, if needed. */ 2436 error = xfs_btree_update_keys(tcur, level); 2437 if (error) 2438 goto error1; 2439 2440 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2441 } 2442 2443 /* Update the parent keys of the right block. */ 2444 error = xfs_btree_update_keys(cur, level); 2445 if (error) 2446 goto error0; 2447 2448 /* Slide the cursor value left one. */ 2449 cur->bc_ptrs[level]--; 2450 2451 *stat = 1; 2452 return 0; 2453 2454 out0: 2455 *stat = 0; 2456 return 0; 2457 2458 error0: 2459 return error; 2460 2461 error1: 2462 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2463 return error; 2464 } 2465 2466 /* 2467 * Move 1 record right from cur/level if possible. 2468 * Update cur to reflect the new path. 2469 */ 2470 STATIC int /* error */ 2471 xfs_btree_rshift( 2472 struct xfs_btree_cur *cur, 2473 int level, 2474 int *stat) /* success/failure */ 2475 { 2476 struct xfs_buf *lbp; /* left buffer pointer */ 2477 struct xfs_btree_block *left; /* left btree block */ 2478 struct xfs_buf *rbp; /* right buffer pointer */ 2479 struct xfs_btree_block *right; /* right btree block */ 2480 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2481 union xfs_btree_ptr rptr; /* right block pointer */ 2482 union xfs_btree_key *rkp; /* right btree key */ 2483 int rrecs; /* right record count */ 2484 int lrecs; /* left record count */ 2485 int error; /* error return value */ 2486 int i; /* loop counter */ 2487 2488 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2489 (level == cur->bc_nlevels - 1)) 2490 goto out0; 2491 2492 /* Set up variables for this block as "left". */ 2493 left = xfs_btree_get_block(cur, level, &lbp); 2494 2495 #ifdef DEBUG 2496 error = xfs_btree_check_block(cur, left, level, lbp); 2497 if (error) 2498 goto error0; 2499 #endif 2500 2501 /* If we've got no right sibling then we can't shift an entry right. */ 2502 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2503 if (xfs_btree_ptr_is_null(cur, &rptr)) 2504 goto out0; 2505 2506 /* 2507 * If the cursor entry is the one that would be moved, don't 2508 * do it... it's too complicated. 2509 */ 2510 lrecs = xfs_btree_get_numrecs(left); 2511 if (cur->bc_ptrs[level] >= lrecs) 2512 goto out0; 2513 2514 /* Set up the right neighbor as "right". */ 2515 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2516 if (error) 2517 goto error0; 2518 2519 /* If it's full, it can't take another entry. */ 2520 rrecs = xfs_btree_get_numrecs(right); 2521 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2522 goto out0; 2523 2524 XFS_BTREE_STATS_INC(cur, rshift); 2525 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2526 2527 /* 2528 * Make a hole at the start of the right neighbor block, then 2529 * copy the last left block entry to the hole. 2530 */ 2531 if (level > 0) { 2532 /* It's a nonleaf. make a hole in the keys and ptrs */ 2533 union xfs_btree_key *lkp; 2534 union xfs_btree_ptr *lpp; 2535 union xfs_btree_ptr *rpp; 2536 2537 lkp = xfs_btree_key_addr(cur, lrecs, left); 2538 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2539 rkp = xfs_btree_key_addr(cur, 1, right); 2540 rpp = xfs_btree_ptr_addr(cur, 1, right); 2541 2542 for (i = rrecs - 1; i >= 0; i--) { 2543 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 2544 if (error) 2545 goto error0; 2546 } 2547 2548 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2549 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2550 2551 error = xfs_btree_debug_check_ptr(cur, lpp, 0, level); 2552 if (error) 2553 goto error0; 2554 2555 /* Now put the new data in, and log it. */ 2556 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2557 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2558 2559 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2560 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2561 2562 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2563 xfs_btree_key_addr(cur, 2, right))); 2564 } else { 2565 /* It's a leaf. make a hole in the records */ 2566 union xfs_btree_rec *lrp; 2567 union xfs_btree_rec *rrp; 2568 2569 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2570 rrp = xfs_btree_rec_addr(cur, 1, right); 2571 2572 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2573 2574 /* Now put the new data in, and log it. */ 2575 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2576 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2577 } 2578 2579 /* 2580 * Decrement and log left's numrecs, bump and log right's numrecs. 2581 */ 2582 xfs_btree_set_numrecs(left, --lrecs); 2583 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2584 2585 xfs_btree_set_numrecs(right, ++rrecs); 2586 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2587 2588 /* 2589 * Using a temporary cursor, update the parent key values of the 2590 * block on the right. 2591 */ 2592 error = xfs_btree_dup_cursor(cur, &tcur); 2593 if (error) 2594 goto error0; 2595 i = xfs_btree_lastrec(tcur, level); 2596 if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { 2597 error = -EFSCORRUPTED; 2598 goto error0; 2599 } 2600 2601 error = xfs_btree_increment(tcur, level, &i); 2602 if (error) 2603 goto error1; 2604 2605 /* Update the parent high keys of the left block, if needed. */ 2606 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2607 error = xfs_btree_update_keys(cur, level); 2608 if (error) 2609 goto error1; 2610 } 2611 2612 /* Update the parent keys of the right block. */ 2613 error = xfs_btree_update_keys(tcur, level); 2614 if (error) 2615 goto error1; 2616 2617 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2618 2619 *stat = 1; 2620 return 0; 2621 2622 out0: 2623 *stat = 0; 2624 return 0; 2625 2626 error0: 2627 return error; 2628 2629 error1: 2630 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2631 return error; 2632 } 2633 2634 /* 2635 * Split cur/level block in half. 2636 * Return new block number and the key to its first 2637 * record (to be inserted into parent). 2638 */ 2639 STATIC int /* error */ 2640 __xfs_btree_split( 2641 struct xfs_btree_cur *cur, 2642 int level, 2643 union xfs_btree_ptr *ptrp, 2644 union xfs_btree_key *key, 2645 struct xfs_btree_cur **curp, 2646 int *stat) /* success/failure */ 2647 { 2648 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2649 struct xfs_buf *lbp; /* left buffer pointer */ 2650 struct xfs_btree_block *left; /* left btree block */ 2651 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2652 struct xfs_buf *rbp; /* right buffer pointer */ 2653 struct xfs_btree_block *right; /* right btree block */ 2654 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2655 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2656 struct xfs_btree_block *rrblock; /* right-right btree block */ 2657 int lrecs; 2658 int rrecs; 2659 int src_index; 2660 int error; /* error return value */ 2661 int i; 2662 2663 XFS_BTREE_STATS_INC(cur, split); 2664 2665 /* Set up left block (current one). */ 2666 left = xfs_btree_get_block(cur, level, &lbp); 2667 2668 #ifdef DEBUG 2669 error = xfs_btree_check_block(cur, left, level, lbp); 2670 if (error) 2671 goto error0; 2672 #endif 2673 2674 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2675 2676 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2677 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2678 if (error) 2679 goto error0; 2680 if (*stat == 0) 2681 goto out0; 2682 XFS_BTREE_STATS_INC(cur, alloc); 2683 2684 /* Set up the new block as "right". */ 2685 error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp); 2686 if (error) 2687 goto error0; 2688 2689 /* Fill in the btree header for the new right block. */ 2690 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2691 2692 /* 2693 * Split the entries between the old and the new block evenly. 2694 * Make sure that if there's an odd number of entries now, that 2695 * each new block will have the same number of entries. 2696 */ 2697 lrecs = xfs_btree_get_numrecs(left); 2698 rrecs = lrecs / 2; 2699 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) 2700 rrecs++; 2701 src_index = (lrecs - rrecs + 1); 2702 2703 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2704 2705 /* Adjust numrecs for the later get_*_keys() calls. */ 2706 lrecs -= rrecs; 2707 xfs_btree_set_numrecs(left, lrecs); 2708 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2709 2710 /* 2711 * Copy btree block entries from the left block over to the 2712 * new block, the right. Update the right block and log the 2713 * changes. 2714 */ 2715 if (level > 0) { 2716 /* It's a non-leaf. Move keys and pointers. */ 2717 union xfs_btree_key *lkp; /* left btree key */ 2718 union xfs_btree_ptr *lpp; /* left address pointer */ 2719 union xfs_btree_key *rkp; /* right btree key */ 2720 union xfs_btree_ptr *rpp; /* right address pointer */ 2721 2722 lkp = xfs_btree_key_addr(cur, src_index, left); 2723 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2724 rkp = xfs_btree_key_addr(cur, 1, right); 2725 rpp = xfs_btree_ptr_addr(cur, 1, right); 2726 2727 for (i = src_index; i < rrecs; i++) { 2728 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 2729 if (error) 2730 goto error0; 2731 } 2732 2733 /* Copy the keys & pointers to the new block. */ 2734 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2735 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2736 2737 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2738 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2739 2740 /* Stash the keys of the new block for later insertion. */ 2741 xfs_btree_get_node_keys(cur, right, key); 2742 } else { 2743 /* It's a leaf. Move records. */ 2744 union xfs_btree_rec *lrp; /* left record pointer */ 2745 union xfs_btree_rec *rrp; /* right record pointer */ 2746 2747 lrp = xfs_btree_rec_addr(cur, src_index, left); 2748 rrp = xfs_btree_rec_addr(cur, 1, right); 2749 2750 /* Copy records to the new block. */ 2751 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2752 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2753 2754 /* Stash the keys of the new block for later insertion. */ 2755 xfs_btree_get_leaf_keys(cur, right, key); 2756 } 2757 2758 /* 2759 * Find the left block number by looking in the buffer. 2760 * Adjust sibling pointers. 2761 */ 2762 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2763 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2764 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2765 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2766 2767 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2768 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2769 2770 /* 2771 * If there's a block to the new block's right, make that block 2772 * point back to right instead of to left. 2773 */ 2774 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2775 error = xfs_btree_read_buf_block(cur, &rrptr, 2776 0, &rrblock, &rrbp); 2777 if (error) 2778 goto error0; 2779 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2780 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2781 } 2782 2783 /* Update the parent high keys of the left block, if needed. */ 2784 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2785 error = xfs_btree_update_keys(cur, level); 2786 if (error) 2787 goto error0; 2788 } 2789 2790 /* 2791 * If the cursor is really in the right block, move it there. 2792 * If it's just pointing past the last entry in left, then we'll 2793 * insert there, so don't change anything in that case. 2794 */ 2795 if (cur->bc_ptrs[level] > lrecs + 1) { 2796 xfs_btree_setbuf(cur, level, rbp); 2797 cur->bc_ptrs[level] -= lrecs; 2798 } 2799 /* 2800 * If there are more levels, we'll need another cursor which refers 2801 * the right block, no matter where this cursor was. 2802 */ 2803 if (level + 1 < cur->bc_nlevels) { 2804 error = xfs_btree_dup_cursor(cur, curp); 2805 if (error) 2806 goto error0; 2807 (*curp)->bc_ptrs[level + 1]++; 2808 } 2809 *ptrp = rptr; 2810 *stat = 1; 2811 return 0; 2812 out0: 2813 *stat = 0; 2814 return 0; 2815 2816 error0: 2817 return error; 2818 } 2819 2820 struct xfs_btree_split_args { 2821 struct xfs_btree_cur *cur; 2822 int level; 2823 union xfs_btree_ptr *ptrp; 2824 union xfs_btree_key *key; 2825 struct xfs_btree_cur **curp; 2826 int *stat; /* success/failure */ 2827 int result; 2828 bool kswapd; /* allocation in kswapd context */ 2829 struct completion *done; 2830 struct work_struct work; 2831 }; 2832 2833 /* 2834 * Stack switching interfaces for allocation 2835 */ 2836 static void 2837 xfs_btree_split_worker( 2838 struct work_struct *work) 2839 { 2840 struct xfs_btree_split_args *args = container_of(work, 2841 struct xfs_btree_split_args, work); 2842 unsigned long pflags; 2843 unsigned long new_pflags = PF_MEMALLOC_NOFS; 2844 2845 /* 2846 * we are in a transaction context here, but may also be doing work 2847 * in kswapd context, and hence we may need to inherit that state 2848 * temporarily to ensure that we don't block waiting for memory reclaim 2849 * in any way. 2850 */ 2851 if (args->kswapd) 2852 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2853 2854 current_set_flags_nested(&pflags, new_pflags); 2855 2856 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2857 args->key, args->curp, args->stat); 2858 complete(args->done); 2859 2860 current_restore_flags_nested(&pflags, new_pflags); 2861 } 2862 2863 /* 2864 * BMBT split requests often come in with little stack to work on. Push 2865 * them off to a worker thread so there is lots of stack to use. For the other 2866 * btree types, just call directly to avoid the context switch overhead here. 2867 */ 2868 STATIC int /* error */ 2869 xfs_btree_split( 2870 struct xfs_btree_cur *cur, 2871 int level, 2872 union xfs_btree_ptr *ptrp, 2873 union xfs_btree_key *key, 2874 struct xfs_btree_cur **curp, 2875 int *stat) /* success/failure */ 2876 { 2877 struct xfs_btree_split_args args; 2878 DECLARE_COMPLETION_ONSTACK(done); 2879 2880 if (cur->bc_btnum != XFS_BTNUM_BMAP) 2881 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2882 2883 args.cur = cur; 2884 args.level = level; 2885 args.ptrp = ptrp; 2886 args.key = key; 2887 args.curp = curp; 2888 args.stat = stat; 2889 args.done = &done; 2890 args.kswapd = current_is_kswapd(); 2891 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2892 queue_work(xfs_alloc_wq, &args.work); 2893 wait_for_completion(&done); 2894 destroy_work_on_stack(&args.work); 2895 return args.result; 2896 } 2897 2898 2899 /* 2900 * Copy the old inode root contents into a real block and make the 2901 * broot point to it. 2902 */ 2903 int /* error */ 2904 xfs_btree_new_iroot( 2905 struct xfs_btree_cur *cur, /* btree cursor */ 2906 int *logflags, /* logging flags for inode */ 2907 int *stat) /* return status - 0 fail */ 2908 { 2909 struct xfs_buf *cbp; /* buffer for cblock */ 2910 struct xfs_btree_block *block; /* btree block */ 2911 struct xfs_btree_block *cblock; /* child btree block */ 2912 union xfs_btree_key *ckp; /* child key pointer */ 2913 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2914 union xfs_btree_key *kp; /* pointer to btree key */ 2915 union xfs_btree_ptr *pp; /* pointer to block addr */ 2916 union xfs_btree_ptr nptr; /* new block addr */ 2917 int level; /* btree level */ 2918 int error; /* error return code */ 2919 int i; /* loop counter */ 2920 2921 XFS_BTREE_STATS_INC(cur, newroot); 2922 2923 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2924 2925 level = cur->bc_nlevels - 1; 2926 2927 block = xfs_btree_get_iroot(cur); 2928 pp = xfs_btree_ptr_addr(cur, 1, block); 2929 2930 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2931 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 2932 if (error) 2933 goto error0; 2934 if (*stat == 0) 2935 return 0; 2936 2937 XFS_BTREE_STATS_INC(cur, alloc); 2938 2939 /* Copy the root into a real block. */ 2940 error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp); 2941 if (error) 2942 goto error0; 2943 2944 /* 2945 * we can't just memcpy() the root in for CRC enabled btree blocks. 2946 * In that case have to also ensure the blkno remains correct 2947 */ 2948 memcpy(cblock, block, xfs_btree_block_len(cur)); 2949 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 2950 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 2951 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); 2952 else 2953 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); 2954 } 2955 2956 be16_add_cpu(&block->bb_level, 1); 2957 xfs_btree_set_numrecs(block, 1); 2958 cur->bc_nlevels++; 2959 cur->bc_ptrs[level + 1] = 1; 2960 2961 kp = xfs_btree_key_addr(cur, 1, block); 2962 ckp = xfs_btree_key_addr(cur, 1, cblock); 2963 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 2964 2965 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 2966 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 2967 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 2968 if (error) 2969 goto error0; 2970 } 2971 2972 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 2973 2974 error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level); 2975 if (error) 2976 goto error0; 2977 2978 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 2979 2980 xfs_iroot_realloc(cur->bc_private.b.ip, 2981 1 - xfs_btree_get_numrecs(cblock), 2982 cur->bc_private.b.whichfork); 2983 2984 xfs_btree_setbuf(cur, level, cbp); 2985 2986 /* 2987 * Do all this logging at the end so that 2988 * the root is at the right level. 2989 */ 2990 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 2991 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 2992 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 2993 2994 *logflags |= 2995 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); 2996 *stat = 1; 2997 return 0; 2998 error0: 2999 return error; 3000 } 3001 3002 /* 3003 * Allocate a new root block, fill it in. 3004 */ 3005 STATIC int /* error */ 3006 xfs_btree_new_root( 3007 struct xfs_btree_cur *cur, /* btree cursor */ 3008 int *stat) /* success/failure */ 3009 { 3010 struct xfs_btree_block *block; /* one half of the old root block */ 3011 struct xfs_buf *bp; /* buffer containing block */ 3012 int error; /* error return value */ 3013 struct xfs_buf *lbp; /* left buffer pointer */ 3014 struct xfs_btree_block *left; /* left btree block */ 3015 struct xfs_buf *nbp; /* new (root) buffer */ 3016 struct xfs_btree_block *new; /* new (root) btree block */ 3017 int nptr; /* new value for key index, 1 or 2 */ 3018 struct xfs_buf *rbp; /* right buffer pointer */ 3019 struct xfs_btree_block *right; /* right btree block */ 3020 union xfs_btree_ptr rptr; 3021 union xfs_btree_ptr lptr; 3022 3023 XFS_BTREE_STATS_INC(cur, newroot); 3024 3025 /* initialise our start point from the cursor */ 3026 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 3027 3028 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 3029 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 3030 if (error) 3031 goto error0; 3032 if (*stat == 0) 3033 goto out0; 3034 XFS_BTREE_STATS_INC(cur, alloc); 3035 3036 /* Set up the new block. */ 3037 error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp); 3038 if (error) 3039 goto error0; 3040 3041 /* Set the root in the holding structure increasing the level by 1. */ 3042 cur->bc_ops->set_root(cur, &lptr, 1); 3043 3044 /* 3045 * At the previous root level there are now two blocks: the old root, 3046 * and the new block generated when it was split. We don't know which 3047 * one the cursor is pointing at, so we set up variables "left" and 3048 * "right" for each case. 3049 */ 3050 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 3051 3052 #ifdef DEBUG 3053 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 3054 if (error) 3055 goto error0; 3056 #endif 3057 3058 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3059 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3060 /* Our block is left, pick up the right block. */ 3061 lbp = bp; 3062 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 3063 left = block; 3064 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3065 if (error) 3066 goto error0; 3067 bp = rbp; 3068 nptr = 1; 3069 } else { 3070 /* Our block is right, pick up the left block. */ 3071 rbp = bp; 3072 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 3073 right = block; 3074 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 3075 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3076 if (error) 3077 goto error0; 3078 bp = lbp; 3079 nptr = 2; 3080 } 3081 3082 /* Fill in the new block's btree header and log it. */ 3083 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3084 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3085 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 3086 !xfs_btree_ptr_is_null(cur, &rptr)); 3087 3088 /* Fill in the key data in the new root. */ 3089 if (xfs_btree_get_level(left) > 0) { 3090 /* 3091 * Get the keys for the left block's keys and put them directly 3092 * in the parent block. Do the same for the right block. 3093 */ 3094 xfs_btree_get_node_keys(cur, left, 3095 xfs_btree_key_addr(cur, 1, new)); 3096 xfs_btree_get_node_keys(cur, right, 3097 xfs_btree_key_addr(cur, 2, new)); 3098 } else { 3099 /* 3100 * Get the keys for the left block's records and put them 3101 * directly in the parent block. Do the same for the right 3102 * block. 3103 */ 3104 xfs_btree_get_leaf_keys(cur, left, 3105 xfs_btree_key_addr(cur, 1, new)); 3106 xfs_btree_get_leaf_keys(cur, right, 3107 xfs_btree_key_addr(cur, 2, new)); 3108 } 3109 xfs_btree_log_keys(cur, nbp, 1, 2); 3110 3111 /* Fill in the pointer data in the new root. */ 3112 xfs_btree_copy_ptrs(cur, 3113 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 3114 xfs_btree_copy_ptrs(cur, 3115 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 3116 xfs_btree_log_ptrs(cur, nbp, 1, 2); 3117 3118 /* Fix up the cursor. */ 3119 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 3120 cur->bc_ptrs[cur->bc_nlevels] = nptr; 3121 cur->bc_nlevels++; 3122 *stat = 1; 3123 return 0; 3124 error0: 3125 return error; 3126 out0: 3127 *stat = 0; 3128 return 0; 3129 } 3130 3131 STATIC int 3132 xfs_btree_make_block_unfull( 3133 struct xfs_btree_cur *cur, /* btree cursor */ 3134 int level, /* btree level */ 3135 int numrecs,/* # of recs in block */ 3136 int *oindex,/* old tree index */ 3137 int *index, /* new tree index */ 3138 union xfs_btree_ptr *nptr, /* new btree ptr */ 3139 struct xfs_btree_cur **ncur, /* new btree cursor */ 3140 union xfs_btree_key *key, /* key of new block */ 3141 int *stat) 3142 { 3143 int error = 0; 3144 3145 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3146 level == cur->bc_nlevels - 1) { 3147 struct xfs_inode *ip = cur->bc_private.b.ip; 3148 3149 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3150 /* A root block that can be made bigger. */ 3151 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 3152 *stat = 1; 3153 } else { 3154 /* A root block that needs replacing */ 3155 int logflags = 0; 3156 3157 error = xfs_btree_new_iroot(cur, &logflags, stat); 3158 if (error || *stat == 0) 3159 return error; 3160 3161 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 3162 } 3163 3164 return 0; 3165 } 3166 3167 /* First, try shifting an entry to the right neighbor. */ 3168 error = xfs_btree_rshift(cur, level, stat); 3169 if (error || *stat) 3170 return error; 3171 3172 /* Next, try shifting an entry to the left neighbor. */ 3173 error = xfs_btree_lshift(cur, level, stat); 3174 if (error) 3175 return error; 3176 3177 if (*stat) { 3178 *oindex = *index = cur->bc_ptrs[level]; 3179 return 0; 3180 } 3181 3182 /* 3183 * Next, try splitting the current block in half. 3184 * 3185 * If this works we have to re-set our variables because we 3186 * could be in a different block now. 3187 */ 3188 error = xfs_btree_split(cur, level, nptr, key, ncur, stat); 3189 if (error || *stat == 0) 3190 return error; 3191 3192 3193 *index = cur->bc_ptrs[level]; 3194 return 0; 3195 } 3196 3197 /* 3198 * Insert one record/level. Return information to the caller 3199 * allowing the next level up to proceed if necessary. 3200 */ 3201 STATIC int 3202 xfs_btree_insrec( 3203 struct xfs_btree_cur *cur, /* btree cursor */ 3204 int level, /* level to insert record at */ 3205 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3206 union xfs_btree_rec *rec, /* record to insert */ 3207 union xfs_btree_key *key, /* i/o: block key for ptrp */ 3208 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3209 int *stat) /* success/failure */ 3210 { 3211 struct xfs_btree_block *block; /* btree block */ 3212 struct xfs_buf *bp; /* buffer for block */ 3213 union xfs_btree_ptr nptr; /* new block ptr */ 3214 struct xfs_btree_cur *ncur; /* new btree cursor */ 3215 union xfs_btree_key nkey; /* new block key */ 3216 union xfs_btree_key *lkey; 3217 int optr; /* old key/record index */ 3218 int ptr; /* key/record index */ 3219 int numrecs;/* number of records */ 3220 int error; /* error return value */ 3221 int i; 3222 xfs_daddr_t old_bn; 3223 3224 ncur = NULL; 3225 lkey = &nkey; 3226 3227 /* 3228 * If we have an external root pointer, and we've made it to the 3229 * root level, allocate a new root block and we're done. 3230 */ 3231 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3232 (level >= cur->bc_nlevels)) { 3233 error = xfs_btree_new_root(cur, stat); 3234 xfs_btree_set_ptr_null(cur, ptrp); 3235 3236 return error; 3237 } 3238 3239 /* If we're off the left edge, return failure. */ 3240 ptr = cur->bc_ptrs[level]; 3241 if (ptr == 0) { 3242 *stat = 0; 3243 return 0; 3244 } 3245 3246 optr = ptr; 3247 3248 XFS_BTREE_STATS_INC(cur, insrec); 3249 3250 /* Get pointers to the btree buffer and block. */ 3251 block = xfs_btree_get_block(cur, level, &bp); 3252 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL; 3253 numrecs = xfs_btree_get_numrecs(block); 3254 3255 #ifdef DEBUG 3256 error = xfs_btree_check_block(cur, block, level, bp); 3257 if (error) 3258 goto error0; 3259 3260 /* Check that the new entry is being inserted in the right place. */ 3261 if (ptr <= numrecs) { 3262 if (level == 0) { 3263 ASSERT(cur->bc_ops->recs_inorder(cur, rec, 3264 xfs_btree_rec_addr(cur, ptr, block))); 3265 } else { 3266 ASSERT(cur->bc_ops->keys_inorder(cur, key, 3267 xfs_btree_key_addr(cur, ptr, block))); 3268 } 3269 } 3270 #endif 3271 3272 /* 3273 * If the block is full, we can't insert the new entry until we 3274 * make the block un-full. 3275 */ 3276 xfs_btree_set_ptr_null(cur, &nptr); 3277 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3278 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3279 &optr, &ptr, &nptr, &ncur, lkey, stat); 3280 if (error || *stat == 0) 3281 goto error0; 3282 } 3283 3284 /* 3285 * The current block may have changed if the block was 3286 * previously full and we have just made space in it. 3287 */ 3288 block = xfs_btree_get_block(cur, level, &bp); 3289 numrecs = xfs_btree_get_numrecs(block); 3290 3291 #ifdef DEBUG 3292 error = xfs_btree_check_block(cur, block, level, bp); 3293 if (error) 3294 return error; 3295 #endif 3296 3297 /* 3298 * At this point we know there's room for our new entry in the block 3299 * we're pointing at. 3300 */ 3301 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3302 3303 if (level > 0) { 3304 /* It's a nonleaf. make a hole in the keys and ptrs */ 3305 union xfs_btree_key *kp; 3306 union xfs_btree_ptr *pp; 3307 3308 kp = xfs_btree_key_addr(cur, ptr, block); 3309 pp = xfs_btree_ptr_addr(cur, ptr, block); 3310 3311 for (i = numrecs - ptr; i >= 0; i--) { 3312 error = xfs_btree_debug_check_ptr(cur, pp, i, level); 3313 if (error) 3314 return error; 3315 } 3316 3317 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3318 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3319 3320 error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level); 3321 if (error) 3322 goto error0; 3323 3324 /* Now put the new data in, bump numrecs and log it. */ 3325 xfs_btree_copy_keys(cur, kp, key, 1); 3326 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3327 numrecs++; 3328 xfs_btree_set_numrecs(block, numrecs); 3329 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3330 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3331 #ifdef DEBUG 3332 if (ptr < numrecs) { 3333 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3334 xfs_btree_key_addr(cur, ptr + 1, block))); 3335 } 3336 #endif 3337 } else { 3338 /* It's a leaf. make a hole in the records */ 3339 union xfs_btree_rec *rp; 3340 3341 rp = xfs_btree_rec_addr(cur, ptr, block); 3342 3343 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3344 3345 /* Now put the new data in, bump numrecs and log it. */ 3346 xfs_btree_copy_recs(cur, rp, rec, 1); 3347 xfs_btree_set_numrecs(block, ++numrecs); 3348 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3349 #ifdef DEBUG 3350 if (ptr < numrecs) { 3351 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3352 xfs_btree_rec_addr(cur, ptr + 1, block))); 3353 } 3354 #endif 3355 } 3356 3357 /* Log the new number of records in the btree header. */ 3358 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3359 3360 /* 3361 * If we just inserted into a new tree block, we have to 3362 * recalculate nkey here because nkey is out of date. 3363 * 3364 * Otherwise we're just updating an existing block (having shoved 3365 * some records into the new tree block), so use the regular key 3366 * update mechanism. 3367 */ 3368 if (bp && bp->b_bn != old_bn) { 3369 xfs_btree_get_keys(cur, block, lkey); 3370 } else if (xfs_btree_needs_key_update(cur, optr)) { 3371 error = xfs_btree_update_keys(cur, level); 3372 if (error) 3373 goto error0; 3374 } 3375 3376 /* 3377 * If we are tracking the last record in the tree and 3378 * we are at the far right edge of the tree, update it. 3379 */ 3380 if (xfs_btree_is_lastrec(cur, block, level)) { 3381 cur->bc_ops->update_lastrec(cur, block, rec, 3382 ptr, LASTREC_INSREC); 3383 } 3384 3385 /* 3386 * Return the new block number, if any. 3387 * If there is one, give back a record value and a cursor too. 3388 */ 3389 *ptrp = nptr; 3390 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3391 xfs_btree_copy_keys(cur, key, lkey, 1); 3392 *curp = ncur; 3393 } 3394 3395 *stat = 1; 3396 return 0; 3397 3398 error0: 3399 return error; 3400 } 3401 3402 /* 3403 * Insert the record at the point referenced by cur. 3404 * 3405 * A multi-level split of the tree on insert will invalidate the original 3406 * cursor. All callers of this function should assume that the cursor is 3407 * no longer valid and revalidate it. 3408 */ 3409 int 3410 xfs_btree_insert( 3411 struct xfs_btree_cur *cur, 3412 int *stat) 3413 { 3414 int error; /* error return value */ 3415 int i; /* result value, 0 for failure */ 3416 int level; /* current level number in btree */ 3417 union xfs_btree_ptr nptr; /* new block number (split result) */ 3418 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3419 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3420 union xfs_btree_key bkey; /* key of block to insert */ 3421 union xfs_btree_key *key; 3422 union xfs_btree_rec rec; /* record to insert */ 3423 3424 level = 0; 3425 ncur = NULL; 3426 pcur = cur; 3427 key = &bkey; 3428 3429 xfs_btree_set_ptr_null(cur, &nptr); 3430 3431 /* Make a key out of the record data to be inserted, and save it. */ 3432 cur->bc_ops->init_rec_from_cur(cur, &rec); 3433 cur->bc_ops->init_key_from_rec(key, &rec); 3434 3435 /* 3436 * Loop going up the tree, starting at the leaf level. 3437 * Stop when we don't get a split block, that must mean that 3438 * the insert is finished with this level. 3439 */ 3440 do { 3441 /* 3442 * Insert nrec/nptr into this level of the tree. 3443 * Note if we fail, nptr will be null. 3444 */ 3445 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, 3446 &ncur, &i); 3447 if (error) { 3448 if (pcur != cur) 3449 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3450 goto error0; 3451 } 3452 3453 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3454 error = -EFSCORRUPTED; 3455 goto error0; 3456 } 3457 level++; 3458 3459 /* 3460 * See if the cursor we just used is trash. 3461 * Can't trash the caller's cursor, but otherwise we should 3462 * if ncur is a new cursor or we're about to be done. 3463 */ 3464 if (pcur != cur && 3465 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3466 /* Save the state from the cursor before we trash it */ 3467 if (cur->bc_ops->update_cursor) 3468 cur->bc_ops->update_cursor(pcur, cur); 3469 cur->bc_nlevels = pcur->bc_nlevels; 3470 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3471 } 3472 /* If we got a new cursor, switch to it. */ 3473 if (ncur) { 3474 pcur = ncur; 3475 ncur = NULL; 3476 } 3477 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3478 3479 *stat = i; 3480 return 0; 3481 error0: 3482 return error; 3483 } 3484 3485 /* 3486 * Try to merge a non-leaf block back into the inode root. 3487 * 3488 * Note: the killroot names comes from the fact that we're effectively 3489 * killing the old root block. But because we can't just delete the 3490 * inode we have to copy the single block it was pointing to into the 3491 * inode. 3492 */ 3493 STATIC int 3494 xfs_btree_kill_iroot( 3495 struct xfs_btree_cur *cur) 3496 { 3497 int whichfork = cur->bc_private.b.whichfork; 3498 struct xfs_inode *ip = cur->bc_private.b.ip; 3499 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3500 struct xfs_btree_block *block; 3501 struct xfs_btree_block *cblock; 3502 union xfs_btree_key *kp; 3503 union xfs_btree_key *ckp; 3504 union xfs_btree_ptr *pp; 3505 union xfs_btree_ptr *cpp; 3506 struct xfs_buf *cbp; 3507 int level; 3508 int index; 3509 int numrecs; 3510 int error; 3511 #ifdef DEBUG 3512 union xfs_btree_ptr ptr; 3513 #endif 3514 int i; 3515 3516 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3517 ASSERT(cur->bc_nlevels > 1); 3518 3519 /* 3520 * Don't deal with the root block needs to be a leaf case. 3521 * We're just going to turn the thing back into extents anyway. 3522 */ 3523 level = cur->bc_nlevels - 1; 3524 if (level == 1) 3525 goto out0; 3526 3527 /* 3528 * Give up if the root has multiple children. 3529 */ 3530 block = xfs_btree_get_iroot(cur); 3531 if (xfs_btree_get_numrecs(block) != 1) 3532 goto out0; 3533 3534 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3535 numrecs = xfs_btree_get_numrecs(cblock); 3536 3537 /* 3538 * Only do this if the next level will fit. 3539 * Then the data must be copied up to the inode, 3540 * instead of freeing the root you free the next level. 3541 */ 3542 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3543 goto out0; 3544 3545 XFS_BTREE_STATS_INC(cur, killroot); 3546 3547 #ifdef DEBUG 3548 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3549 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3550 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3551 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3552 #endif 3553 3554 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3555 if (index) { 3556 xfs_iroot_realloc(cur->bc_private.b.ip, index, 3557 cur->bc_private.b.whichfork); 3558 block = ifp->if_broot; 3559 } 3560 3561 be16_add_cpu(&block->bb_numrecs, index); 3562 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3563 3564 kp = xfs_btree_key_addr(cur, 1, block); 3565 ckp = xfs_btree_key_addr(cur, 1, cblock); 3566 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3567 3568 pp = xfs_btree_ptr_addr(cur, 1, block); 3569 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3570 3571 for (i = 0; i < numrecs; i++) { 3572 error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1); 3573 if (error) 3574 return error; 3575 } 3576 3577 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3578 3579 error = xfs_btree_free_block(cur, cbp); 3580 if (error) 3581 return error; 3582 3583 cur->bc_bufs[level - 1] = NULL; 3584 be16_add_cpu(&block->bb_level, -1); 3585 xfs_trans_log_inode(cur->bc_tp, ip, 3586 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 3587 cur->bc_nlevels--; 3588 out0: 3589 return 0; 3590 } 3591 3592 /* 3593 * Kill the current root node, and replace it with it's only child node. 3594 */ 3595 STATIC int 3596 xfs_btree_kill_root( 3597 struct xfs_btree_cur *cur, 3598 struct xfs_buf *bp, 3599 int level, 3600 union xfs_btree_ptr *newroot) 3601 { 3602 int error; 3603 3604 XFS_BTREE_STATS_INC(cur, killroot); 3605 3606 /* 3607 * Update the root pointer, decreasing the level by 1 and then 3608 * free the old root. 3609 */ 3610 cur->bc_ops->set_root(cur, newroot, -1); 3611 3612 error = xfs_btree_free_block(cur, bp); 3613 if (error) 3614 return error; 3615 3616 cur->bc_bufs[level] = NULL; 3617 cur->bc_ra[level] = 0; 3618 cur->bc_nlevels--; 3619 3620 return 0; 3621 } 3622 3623 STATIC int 3624 xfs_btree_dec_cursor( 3625 struct xfs_btree_cur *cur, 3626 int level, 3627 int *stat) 3628 { 3629 int error; 3630 int i; 3631 3632 if (level > 0) { 3633 error = xfs_btree_decrement(cur, level, &i); 3634 if (error) 3635 return error; 3636 } 3637 3638 *stat = 1; 3639 return 0; 3640 } 3641 3642 /* 3643 * Single level of the btree record deletion routine. 3644 * Delete record pointed to by cur/level. 3645 * Remove the record from its block then rebalance the tree. 3646 * Return 0 for error, 1 for done, 2 to go on to the next level. 3647 */ 3648 STATIC int /* error */ 3649 xfs_btree_delrec( 3650 struct xfs_btree_cur *cur, /* btree cursor */ 3651 int level, /* level removing record from */ 3652 int *stat) /* fail/done/go-on */ 3653 { 3654 struct xfs_btree_block *block; /* btree block */ 3655 union xfs_btree_ptr cptr; /* current block ptr */ 3656 struct xfs_buf *bp; /* buffer for block */ 3657 int error; /* error return value */ 3658 int i; /* loop counter */ 3659 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3660 struct xfs_buf *lbp; /* left buffer pointer */ 3661 struct xfs_btree_block *left; /* left btree block */ 3662 int lrecs = 0; /* left record count */ 3663 int ptr; /* key/record index */ 3664 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3665 struct xfs_buf *rbp; /* right buffer pointer */ 3666 struct xfs_btree_block *right; /* right btree block */ 3667 struct xfs_btree_block *rrblock; /* right-right btree block */ 3668 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3669 int rrecs = 0; /* right record count */ 3670 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3671 int numrecs; /* temporary numrec count */ 3672 3673 tcur = NULL; 3674 3675 /* Get the index of the entry being deleted, check for nothing there. */ 3676 ptr = cur->bc_ptrs[level]; 3677 if (ptr == 0) { 3678 *stat = 0; 3679 return 0; 3680 } 3681 3682 /* Get the buffer & block containing the record or key/ptr. */ 3683 block = xfs_btree_get_block(cur, level, &bp); 3684 numrecs = xfs_btree_get_numrecs(block); 3685 3686 #ifdef DEBUG 3687 error = xfs_btree_check_block(cur, block, level, bp); 3688 if (error) 3689 goto error0; 3690 #endif 3691 3692 /* Fail if we're off the end of the block. */ 3693 if (ptr > numrecs) { 3694 *stat = 0; 3695 return 0; 3696 } 3697 3698 XFS_BTREE_STATS_INC(cur, delrec); 3699 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3700 3701 /* Excise the entries being deleted. */ 3702 if (level > 0) { 3703 /* It's a nonleaf. operate on keys and ptrs */ 3704 union xfs_btree_key *lkp; 3705 union xfs_btree_ptr *lpp; 3706 3707 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3708 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3709 3710 for (i = 0; i < numrecs - ptr; i++) { 3711 error = xfs_btree_debug_check_ptr(cur, lpp, i, level); 3712 if (error) 3713 goto error0; 3714 } 3715 3716 if (ptr < numrecs) { 3717 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3718 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3719 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3720 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3721 } 3722 } else { 3723 /* It's a leaf. operate on records */ 3724 if (ptr < numrecs) { 3725 xfs_btree_shift_recs(cur, 3726 xfs_btree_rec_addr(cur, ptr + 1, block), 3727 -1, numrecs - ptr); 3728 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3729 } 3730 } 3731 3732 /* 3733 * Decrement and log the number of entries in the block. 3734 */ 3735 xfs_btree_set_numrecs(block, --numrecs); 3736 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3737 3738 /* 3739 * If we are tracking the last record in the tree and 3740 * we are at the far right edge of the tree, update it. 3741 */ 3742 if (xfs_btree_is_lastrec(cur, block, level)) { 3743 cur->bc_ops->update_lastrec(cur, block, NULL, 3744 ptr, LASTREC_DELREC); 3745 } 3746 3747 /* 3748 * We're at the root level. First, shrink the root block in-memory. 3749 * Try to get rid of the next level down. If we can't then there's 3750 * nothing left to do. 3751 */ 3752 if (level == cur->bc_nlevels - 1) { 3753 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3754 xfs_iroot_realloc(cur->bc_private.b.ip, -1, 3755 cur->bc_private.b.whichfork); 3756 3757 error = xfs_btree_kill_iroot(cur); 3758 if (error) 3759 goto error0; 3760 3761 error = xfs_btree_dec_cursor(cur, level, stat); 3762 if (error) 3763 goto error0; 3764 *stat = 1; 3765 return 0; 3766 } 3767 3768 /* 3769 * If this is the root level, and there's only one entry left, 3770 * and it's NOT the leaf level, then we can get rid of this 3771 * level. 3772 */ 3773 if (numrecs == 1 && level > 0) { 3774 union xfs_btree_ptr *pp; 3775 /* 3776 * pp is still set to the first pointer in the block. 3777 * Make it the new root of the btree. 3778 */ 3779 pp = xfs_btree_ptr_addr(cur, 1, block); 3780 error = xfs_btree_kill_root(cur, bp, level, pp); 3781 if (error) 3782 goto error0; 3783 } else if (level > 0) { 3784 error = xfs_btree_dec_cursor(cur, level, stat); 3785 if (error) 3786 goto error0; 3787 } 3788 *stat = 1; 3789 return 0; 3790 } 3791 3792 /* 3793 * If we deleted the leftmost entry in the block, update the 3794 * key values above us in the tree. 3795 */ 3796 if (xfs_btree_needs_key_update(cur, ptr)) { 3797 error = xfs_btree_update_keys(cur, level); 3798 if (error) 3799 goto error0; 3800 } 3801 3802 /* 3803 * If the number of records remaining in the block is at least 3804 * the minimum, we're done. 3805 */ 3806 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3807 error = xfs_btree_dec_cursor(cur, level, stat); 3808 if (error) 3809 goto error0; 3810 return 0; 3811 } 3812 3813 /* 3814 * Otherwise, we have to move some records around to keep the 3815 * tree balanced. Look at the left and right sibling blocks to 3816 * see if we can re-balance by moving only one record. 3817 */ 3818 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3819 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3820 3821 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3822 /* 3823 * One child of root, need to get a chance to copy its contents 3824 * into the root and delete it. Can't go up to next level, 3825 * there's nothing to delete there. 3826 */ 3827 if (xfs_btree_ptr_is_null(cur, &rptr) && 3828 xfs_btree_ptr_is_null(cur, &lptr) && 3829 level == cur->bc_nlevels - 2) { 3830 error = xfs_btree_kill_iroot(cur); 3831 if (!error) 3832 error = xfs_btree_dec_cursor(cur, level, stat); 3833 if (error) 3834 goto error0; 3835 return 0; 3836 } 3837 } 3838 3839 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3840 !xfs_btree_ptr_is_null(cur, &lptr)); 3841 3842 /* 3843 * Duplicate the cursor so our btree manipulations here won't 3844 * disrupt the next level up. 3845 */ 3846 error = xfs_btree_dup_cursor(cur, &tcur); 3847 if (error) 3848 goto error0; 3849 3850 /* 3851 * If there's a right sibling, see if it's ok to shift an entry 3852 * out of it. 3853 */ 3854 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3855 /* 3856 * Move the temp cursor to the last entry in the next block. 3857 * Actually any entry but the first would suffice. 3858 */ 3859 i = xfs_btree_lastrec(tcur, level); 3860 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3861 error = -EFSCORRUPTED; 3862 goto error0; 3863 } 3864 3865 error = xfs_btree_increment(tcur, level, &i); 3866 if (error) 3867 goto error0; 3868 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3869 error = -EFSCORRUPTED; 3870 goto error0; 3871 } 3872 3873 i = xfs_btree_lastrec(tcur, level); 3874 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3875 error = -EFSCORRUPTED; 3876 goto error0; 3877 } 3878 3879 /* Grab a pointer to the block. */ 3880 right = xfs_btree_get_block(tcur, level, &rbp); 3881 #ifdef DEBUG 3882 error = xfs_btree_check_block(tcur, right, level, rbp); 3883 if (error) 3884 goto error0; 3885 #endif 3886 /* Grab the current block number, for future use. */ 3887 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3888 3889 /* 3890 * If right block is full enough so that removing one entry 3891 * won't make it too empty, and left-shifting an entry out 3892 * of right to us works, we're done. 3893 */ 3894 if (xfs_btree_get_numrecs(right) - 1 >= 3895 cur->bc_ops->get_minrecs(tcur, level)) { 3896 error = xfs_btree_lshift(tcur, level, &i); 3897 if (error) 3898 goto error0; 3899 if (i) { 3900 ASSERT(xfs_btree_get_numrecs(block) >= 3901 cur->bc_ops->get_minrecs(tcur, level)); 3902 3903 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3904 tcur = NULL; 3905 3906 error = xfs_btree_dec_cursor(cur, level, stat); 3907 if (error) 3908 goto error0; 3909 return 0; 3910 } 3911 } 3912 3913 /* 3914 * Otherwise, grab the number of records in right for 3915 * future reference, and fix up the temp cursor to point 3916 * to our block again (last record). 3917 */ 3918 rrecs = xfs_btree_get_numrecs(right); 3919 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3920 i = xfs_btree_firstrec(tcur, level); 3921 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3922 error = -EFSCORRUPTED; 3923 goto error0; 3924 } 3925 3926 error = xfs_btree_decrement(tcur, level, &i); 3927 if (error) 3928 goto error0; 3929 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3930 error = -EFSCORRUPTED; 3931 goto error0; 3932 } 3933 } 3934 } 3935 3936 /* 3937 * If there's a left sibling, see if it's ok to shift an entry 3938 * out of it. 3939 */ 3940 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3941 /* 3942 * Move the temp cursor to the first entry in the 3943 * previous block. 3944 */ 3945 i = xfs_btree_firstrec(tcur, level); 3946 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3947 error = -EFSCORRUPTED; 3948 goto error0; 3949 } 3950 3951 error = xfs_btree_decrement(tcur, level, &i); 3952 if (error) 3953 goto error0; 3954 i = xfs_btree_firstrec(tcur, level); 3955 if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { 3956 error = -EFSCORRUPTED; 3957 goto error0; 3958 } 3959 3960 /* Grab a pointer to the block. */ 3961 left = xfs_btree_get_block(tcur, level, &lbp); 3962 #ifdef DEBUG 3963 error = xfs_btree_check_block(cur, left, level, lbp); 3964 if (error) 3965 goto error0; 3966 #endif 3967 /* Grab the current block number, for future use. */ 3968 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 3969 3970 /* 3971 * If left block is full enough so that removing one entry 3972 * won't make it too empty, and right-shifting an entry out 3973 * of left to us works, we're done. 3974 */ 3975 if (xfs_btree_get_numrecs(left) - 1 >= 3976 cur->bc_ops->get_minrecs(tcur, level)) { 3977 error = xfs_btree_rshift(tcur, level, &i); 3978 if (error) 3979 goto error0; 3980 if (i) { 3981 ASSERT(xfs_btree_get_numrecs(block) >= 3982 cur->bc_ops->get_minrecs(tcur, level)); 3983 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3984 tcur = NULL; 3985 if (level == 0) 3986 cur->bc_ptrs[0]++; 3987 3988 *stat = 1; 3989 return 0; 3990 } 3991 } 3992 3993 /* 3994 * Otherwise, grab the number of records in right for 3995 * future reference. 3996 */ 3997 lrecs = xfs_btree_get_numrecs(left); 3998 } 3999 4000 /* Delete the temp cursor, we're done with it. */ 4001 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 4002 tcur = NULL; 4003 4004 /* If here, we need to do a join to keep the tree balanced. */ 4005 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 4006 4007 if (!xfs_btree_ptr_is_null(cur, &lptr) && 4008 lrecs + xfs_btree_get_numrecs(block) <= 4009 cur->bc_ops->get_maxrecs(cur, level)) { 4010 /* 4011 * Set "right" to be the starting block, 4012 * "left" to be the left neighbor. 4013 */ 4014 rptr = cptr; 4015 right = block; 4016 rbp = bp; 4017 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 4018 if (error) 4019 goto error0; 4020 4021 /* 4022 * If that won't work, see if we can join with the right neighbor block. 4023 */ 4024 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 4025 rrecs + xfs_btree_get_numrecs(block) <= 4026 cur->bc_ops->get_maxrecs(cur, level)) { 4027 /* 4028 * Set "left" to be the starting block, 4029 * "right" to be the right neighbor. 4030 */ 4031 lptr = cptr; 4032 left = block; 4033 lbp = bp; 4034 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 4035 if (error) 4036 goto error0; 4037 4038 /* 4039 * Otherwise, we can't fix the imbalance. 4040 * Just return. This is probably a logic error, but it's not fatal. 4041 */ 4042 } else { 4043 error = xfs_btree_dec_cursor(cur, level, stat); 4044 if (error) 4045 goto error0; 4046 return 0; 4047 } 4048 4049 rrecs = xfs_btree_get_numrecs(right); 4050 lrecs = xfs_btree_get_numrecs(left); 4051 4052 /* 4053 * We're now going to join "left" and "right" by moving all the stuff 4054 * in "right" to "left" and deleting "right". 4055 */ 4056 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 4057 if (level > 0) { 4058 /* It's a non-leaf. Move keys and pointers. */ 4059 union xfs_btree_key *lkp; /* left btree key */ 4060 union xfs_btree_ptr *lpp; /* left address pointer */ 4061 union xfs_btree_key *rkp; /* right btree key */ 4062 union xfs_btree_ptr *rpp; /* right address pointer */ 4063 4064 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 4065 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 4066 rkp = xfs_btree_key_addr(cur, 1, right); 4067 rpp = xfs_btree_ptr_addr(cur, 1, right); 4068 4069 for (i = 1; i < rrecs; i++) { 4070 error = xfs_btree_debug_check_ptr(cur, rpp, i, level); 4071 if (error) 4072 goto error0; 4073 } 4074 4075 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 4076 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 4077 4078 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 4079 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 4080 } else { 4081 /* It's a leaf. Move records. */ 4082 union xfs_btree_rec *lrp; /* left record pointer */ 4083 union xfs_btree_rec *rrp; /* right record pointer */ 4084 4085 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 4086 rrp = xfs_btree_rec_addr(cur, 1, right); 4087 4088 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 4089 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 4090 } 4091 4092 XFS_BTREE_STATS_INC(cur, join); 4093 4094 /* 4095 * Fix up the number of records and right block pointer in the 4096 * surviving block, and log it. 4097 */ 4098 xfs_btree_set_numrecs(left, lrecs + rrecs); 4099 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), 4100 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4101 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 4102 4103 /* If there is a right sibling, point it to the remaining block. */ 4104 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4105 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 4106 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 4107 if (error) 4108 goto error0; 4109 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 4110 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 4111 } 4112 4113 /* Free the deleted block. */ 4114 error = xfs_btree_free_block(cur, rbp); 4115 if (error) 4116 goto error0; 4117 4118 /* 4119 * If we joined with the left neighbor, set the buffer in the 4120 * cursor to the left block, and fix up the index. 4121 */ 4122 if (bp != lbp) { 4123 cur->bc_bufs[level] = lbp; 4124 cur->bc_ptrs[level] += lrecs; 4125 cur->bc_ra[level] = 0; 4126 } 4127 /* 4128 * If we joined with the right neighbor and there's a level above 4129 * us, increment the cursor at that level. 4130 */ 4131 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 4132 (level + 1 < cur->bc_nlevels)) { 4133 error = xfs_btree_increment(cur, level + 1, &i); 4134 if (error) 4135 goto error0; 4136 } 4137 4138 /* 4139 * Readjust the ptr at this level if it's not a leaf, since it's 4140 * still pointing at the deletion point, which makes the cursor 4141 * inconsistent. If this makes the ptr 0, the caller fixes it up. 4142 * We can't use decrement because it would change the next level up. 4143 */ 4144 if (level > 0) 4145 cur->bc_ptrs[level]--; 4146 4147 /* 4148 * We combined blocks, so we have to update the parent keys if the 4149 * btree supports overlapped intervals. However, bc_ptrs[level + 1] 4150 * points to the old block so that the caller knows which record to 4151 * delete. Therefore, the caller must be savvy enough to call updkeys 4152 * for us if we return stat == 2. The other exit points from this 4153 * function don't require deletions further up the tree, so they can 4154 * call updkeys directly. 4155 */ 4156 4157 /* Return value means the next level up has something to do. */ 4158 *stat = 2; 4159 return 0; 4160 4161 error0: 4162 if (tcur) 4163 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 4164 return error; 4165 } 4166 4167 /* 4168 * Delete the record pointed to by cur. 4169 * The cursor refers to the place where the record was (could be inserted) 4170 * when the operation returns. 4171 */ 4172 int /* error */ 4173 xfs_btree_delete( 4174 struct xfs_btree_cur *cur, 4175 int *stat) /* success/failure */ 4176 { 4177 int error; /* error return value */ 4178 int level; 4179 int i; 4180 bool joined = false; 4181 4182 /* 4183 * Go up the tree, starting at leaf level. 4184 * 4185 * If 2 is returned then a join was done; go to the next level. 4186 * Otherwise we are done. 4187 */ 4188 for (level = 0, i = 2; i == 2; level++) { 4189 error = xfs_btree_delrec(cur, level, &i); 4190 if (error) 4191 goto error0; 4192 if (i == 2) 4193 joined = true; 4194 } 4195 4196 /* 4197 * If we combined blocks as part of deleting the record, delrec won't 4198 * have updated the parent high keys so we have to do that here. 4199 */ 4200 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { 4201 error = xfs_btree_updkeys_force(cur, 0); 4202 if (error) 4203 goto error0; 4204 } 4205 4206 if (i == 0) { 4207 for (level = 1; level < cur->bc_nlevels; level++) { 4208 if (cur->bc_ptrs[level] == 0) { 4209 error = xfs_btree_decrement(cur, level, &i); 4210 if (error) 4211 goto error0; 4212 break; 4213 } 4214 } 4215 } 4216 4217 *stat = i; 4218 return 0; 4219 error0: 4220 return error; 4221 } 4222 4223 /* 4224 * Get the data from the pointed-to record. 4225 */ 4226 int /* error */ 4227 xfs_btree_get_rec( 4228 struct xfs_btree_cur *cur, /* btree cursor */ 4229 union xfs_btree_rec **recp, /* output: btree record */ 4230 int *stat) /* output: success/failure */ 4231 { 4232 struct xfs_btree_block *block; /* btree block */ 4233 struct xfs_buf *bp; /* buffer pointer */ 4234 int ptr; /* record number */ 4235 #ifdef DEBUG 4236 int error; /* error return value */ 4237 #endif 4238 4239 ptr = cur->bc_ptrs[0]; 4240 block = xfs_btree_get_block(cur, 0, &bp); 4241 4242 #ifdef DEBUG 4243 error = xfs_btree_check_block(cur, block, 0, bp); 4244 if (error) 4245 return error; 4246 #endif 4247 4248 /* 4249 * Off the right end or left end, return failure. 4250 */ 4251 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 4252 *stat = 0; 4253 return 0; 4254 } 4255 4256 /* 4257 * Point to the record and extract its data. 4258 */ 4259 *recp = xfs_btree_rec_addr(cur, ptr, block); 4260 *stat = 1; 4261 return 0; 4262 } 4263 4264 /* Visit a block in a btree. */ 4265 STATIC int 4266 xfs_btree_visit_block( 4267 struct xfs_btree_cur *cur, 4268 int level, 4269 xfs_btree_visit_blocks_fn fn, 4270 void *data) 4271 { 4272 struct xfs_btree_block *block; 4273 struct xfs_buf *bp; 4274 union xfs_btree_ptr rptr; 4275 int error; 4276 4277 /* do right sibling readahead */ 4278 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4279 block = xfs_btree_get_block(cur, level, &bp); 4280 4281 /* process the block */ 4282 error = fn(cur, level, data); 4283 if (error) 4284 return error; 4285 4286 /* now read rh sibling block for next iteration */ 4287 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4288 if (xfs_btree_ptr_is_null(cur, &rptr)) 4289 return -ENOENT; 4290 4291 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4292 } 4293 4294 4295 /* Visit every block in a btree. */ 4296 int 4297 xfs_btree_visit_blocks( 4298 struct xfs_btree_cur *cur, 4299 xfs_btree_visit_blocks_fn fn, 4300 unsigned int flags, 4301 void *data) 4302 { 4303 union xfs_btree_ptr lptr; 4304 int level; 4305 struct xfs_btree_block *block = NULL; 4306 int error = 0; 4307 4308 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4309 4310 /* for each level */ 4311 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4312 /* grab the left hand block */ 4313 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4314 if (error) 4315 return error; 4316 4317 /* readahead the left most block for the next level down */ 4318 if (level > 0) { 4319 union xfs_btree_ptr *ptr; 4320 4321 ptr = xfs_btree_ptr_addr(cur, 1, block); 4322 xfs_btree_readahead_ptr(cur, ptr, 1); 4323 4324 /* save for the next iteration of the loop */ 4325 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); 4326 4327 if (!(flags & XFS_BTREE_VISIT_LEAVES)) 4328 continue; 4329 } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) { 4330 continue; 4331 } 4332 4333 /* for each buffer in the level */ 4334 do { 4335 error = xfs_btree_visit_block(cur, level, fn, data); 4336 } while (!error); 4337 4338 if (error != -ENOENT) 4339 return error; 4340 } 4341 4342 return 0; 4343 } 4344 4345 /* 4346 * Change the owner of a btree. 4347 * 4348 * The mechanism we use here is ordered buffer logging. Because we don't know 4349 * how many buffers were are going to need to modify, we don't really want to 4350 * have to make transaction reservations for the worst case of every buffer in a 4351 * full size btree as that may be more space that we can fit in the log.... 4352 * 4353 * We do the btree walk in the most optimal manner possible - we have sibling 4354 * pointers so we can just walk all the blocks on each level from left to right 4355 * in a single pass, and then move to the next level and do the same. We can 4356 * also do readahead on the sibling pointers to get IO moving more quickly, 4357 * though for slow disks this is unlikely to make much difference to performance 4358 * as the amount of CPU work we have to do before moving to the next block is 4359 * relatively small. 4360 * 4361 * For each btree block that we load, modify the owner appropriately, set the 4362 * buffer as an ordered buffer and log it appropriately. We need to ensure that 4363 * we mark the region we change dirty so that if the buffer is relogged in 4364 * a subsequent transaction the changes we make here as an ordered buffer are 4365 * correctly relogged in that transaction. If we are in recovery context, then 4366 * just queue the modified buffer as delayed write buffer so the transaction 4367 * recovery completion writes the changes to disk. 4368 */ 4369 struct xfs_btree_block_change_owner_info { 4370 uint64_t new_owner; 4371 struct list_head *buffer_list; 4372 }; 4373 4374 static int 4375 xfs_btree_block_change_owner( 4376 struct xfs_btree_cur *cur, 4377 int level, 4378 void *data) 4379 { 4380 struct xfs_btree_block_change_owner_info *bbcoi = data; 4381 struct xfs_btree_block *block; 4382 struct xfs_buf *bp; 4383 4384 /* modify the owner */ 4385 block = xfs_btree_get_block(cur, level, &bp); 4386 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4387 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) 4388 return 0; 4389 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4390 } else { 4391 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) 4392 return 0; 4393 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4394 } 4395 4396 /* 4397 * If the block is a root block hosted in an inode, we might not have a 4398 * buffer pointer here and we shouldn't attempt to log the change as the 4399 * information is already held in the inode and discarded when the root 4400 * block is formatted into the on-disk inode fork. We still change it, 4401 * though, so everything is consistent in memory. 4402 */ 4403 if (!bp) { 4404 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4405 ASSERT(level == cur->bc_nlevels - 1); 4406 return 0; 4407 } 4408 4409 if (cur->bc_tp) { 4410 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { 4411 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4412 return -EAGAIN; 4413 } 4414 } else { 4415 xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4416 } 4417 4418 return 0; 4419 } 4420 4421 int 4422 xfs_btree_change_owner( 4423 struct xfs_btree_cur *cur, 4424 uint64_t new_owner, 4425 struct list_head *buffer_list) 4426 { 4427 struct xfs_btree_block_change_owner_info bbcoi; 4428 4429 bbcoi.new_owner = new_owner; 4430 bbcoi.buffer_list = buffer_list; 4431 4432 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, 4433 XFS_BTREE_VISIT_ALL, &bbcoi); 4434 } 4435 4436 /* Verify the v5 fields of a long-format btree block. */ 4437 xfs_failaddr_t 4438 xfs_btree_lblock_v5hdr_verify( 4439 struct xfs_buf *bp, 4440 uint64_t owner) 4441 { 4442 struct xfs_mount *mp = bp->b_mount; 4443 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4444 4445 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4446 return __this_address; 4447 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4448 return __this_address; 4449 if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) 4450 return __this_address; 4451 if (owner != XFS_RMAP_OWN_UNKNOWN && 4452 be64_to_cpu(block->bb_u.l.bb_owner) != owner) 4453 return __this_address; 4454 return NULL; 4455 } 4456 4457 /* Verify a long-format btree block. */ 4458 xfs_failaddr_t 4459 xfs_btree_lblock_verify( 4460 struct xfs_buf *bp, 4461 unsigned int max_recs) 4462 { 4463 struct xfs_mount *mp = bp->b_mount; 4464 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4465 4466 /* numrecs verification */ 4467 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4468 return __this_address; 4469 4470 /* sibling pointer verification */ 4471 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 4472 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) 4473 return __this_address; 4474 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 4475 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) 4476 return __this_address; 4477 4478 return NULL; 4479 } 4480 4481 /** 4482 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4483 * btree block 4484 * 4485 * @bp: buffer containing the btree block 4486 */ 4487 xfs_failaddr_t 4488 xfs_btree_sblock_v5hdr_verify( 4489 struct xfs_buf *bp) 4490 { 4491 struct xfs_mount *mp = bp->b_mount; 4492 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4493 struct xfs_perag *pag = bp->b_pag; 4494 4495 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4496 return __this_address; 4497 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4498 return __this_address; 4499 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4500 return __this_address; 4501 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4502 return __this_address; 4503 return NULL; 4504 } 4505 4506 /** 4507 * xfs_btree_sblock_verify() -- verify a short-format btree block 4508 * 4509 * @bp: buffer containing the btree block 4510 * @max_recs: maximum records allowed in this btree node 4511 */ 4512 xfs_failaddr_t 4513 xfs_btree_sblock_verify( 4514 struct xfs_buf *bp, 4515 unsigned int max_recs) 4516 { 4517 struct xfs_mount *mp = bp->b_mount; 4518 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4519 xfs_agblock_t agno; 4520 4521 /* numrecs verification */ 4522 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4523 return __this_address; 4524 4525 /* sibling pointer verification */ 4526 agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); 4527 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 4528 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) 4529 return __this_address; 4530 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 4531 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) 4532 return __this_address; 4533 4534 return NULL; 4535 } 4536 4537 /* 4538 * Calculate the number of btree levels needed to store a given number of 4539 * records in a short-format btree. 4540 */ 4541 uint 4542 xfs_btree_compute_maxlevels( 4543 uint *limits, 4544 unsigned long len) 4545 { 4546 uint level; 4547 unsigned long maxblocks; 4548 4549 maxblocks = (len + limits[0] - 1) / limits[0]; 4550 for (level = 1; maxblocks > 1; level++) 4551 maxblocks = (maxblocks + limits[1] - 1) / limits[1]; 4552 return level; 4553 } 4554 4555 /* 4556 * Query a regular btree for all records overlapping a given interval. 4557 * Start with a LE lookup of the key of low_rec and return all records 4558 * until we find a record with a key greater than the key of high_rec. 4559 */ 4560 STATIC int 4561 xfs_btree_simple_query_range( 4562 struct xfs_btree_cur *cur, 4563 union xfs_btree_key *low_key, 4564 union xfs_btree_key *high_key, 4565 xfs_btree_query_range_fn fn, 4566 void *priv) 4567 { 4568 union xfs_btree_rec *recp; 4569 union xfs_btree_key rec_key; 4570 int64_t diff; 4571 int stat; 4572 bool firstrec = true; 4573 int error; 4574 4575 ASSERT(cur->bc_ops->init_high_key_from_rec); 4576 ASSERT(cur->bc_ops->diff_two_keys); 4577 4578 /* 4579 * Find the leftmost record. The btree cursor must be set 4580 * to the low record used to generate low_key. 4581 */ 4582 stat = 0; 4583 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 4584 if (error) 4585 goto out; 4586 4587 /* Nothing? See if there's anything to the right. */ 4588 if (!stat) { 4589 error = xfs_btree_increment(cur, 0, &stat); 4590 if (error) 4591 goto out; 4592 } 4593 4594 while (stat) { 4595 /* Find the record. */ 4596 error = xfs_btree_get_rec(cur, &recp, &stat); 4597 if (error || !stat) 4598 break; 4599 4600 /* Skip if high_key(rec) < low_key. */ 4601 if (firstrec) { 4602 cur->bc_ops->init_high_key_from_rec(&rec_key, recp); 4603 firstrec = false; 4604 diff = cur->bc_ops->diff_two_keys(cur, low_key, 4605 &rec_key); 4606 if (diff > 0) 4607 goto advloop; 4608 } 4609 4610 /* Stop if high_key < low_key(rec). */ 4611 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4612 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); 4613 if (diff > 0) 4614 break; 4615 4616 /* Callback */ 4617 error = fn(cur, recp, priv); 4618 if (error) 4619 break; 4620 4621 advloop: 4622 /* Move on to the next record. */ 4623 error = xfs_btree_increment(cur, 0, &stat); 4624 if (error) 4625 break; 4626 } 4627 4628 out: 4629 return error; 4630 } 4631 4632 /* 4633 * Query an overlapped interval btree for all records overlapping a given 4634 * interval. This function roughly follows the algorithm given in 4635 * "Interval Trees" of _Introduction to Algorithms_, which is section 4636 * 14.3 in the 2nd and 3rd editions. 4637 * 4638 * First, generate keys for the low and high records passed in. 4639 * 4640 * For any leaf node, generate the high and low keys for the record. 4641 * If the record keys overlap with the query low/high keys, pass the 4642 * record to the function iterator. 4643 * 4644 * For any internal node, compare the low and high keys of each 4645 * pointer against the query low/high keys. If there's an overlap, 4646 * follow the pointer. 4647 * 4648 * As an optimization, we stop scanning a block when we find a low key 4649 * that is greater than the query's high key. 4650 */ 4651 STATIC int 4652 xfs_btree_overlapped_query_range( 4653 struct xfs_btree_cur *cur, 4654 union xfs_btree_key *low_key, 4655 union xfs_btree_key *high_key, 4656 xfs_btree_query_range_fn fn, 4657 void *priv) 4658 { 4659 union xfs_btree_ptr ptr; 4660 union xfs_btree_ptr *pp; 4661 union xfs_btree_key rec_key; 4662 union xfs_btree_key rec_hkey; 4663 union xfs_btree_key *lkp; 4664 union xfs_btree_key *hkp; 4665 union xfs_btree_rec *recp; 4666 struct xfs_btree_block *block; 4667 int64_t ldiff; 4668 int64_t hdiff; 4669 int level; 4670 struct xfs_buf *bp; 4671 int i; 4672 int error; 4673 4674 /* Load the root of the btree. */ 4675 level = cur->bc_nlevels - 1; 4676 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 4677 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); 4678 if (error) 4679 return error; 4680 xfs_btree_get_block(cur, level, &bp); 4681 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4682 #ifdef DEBUG 4683 error = xfs_btree_check_block(cur, block, level, bp); 4684 if (error) 4685 goto out; 4686 #endif 4687 cur->bc_ptrs[level] = 1; 4688 4689 while (level < cur->bc_nlevels) { 4690 block = xfs_btree_get_block(cur, level, &bp); 4691 4692 /* End of node, pop back towards the root. */ 4693 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { 4694 pop_up: 4695 if (level < cur->bc_nlevels - 1) 4696 cur->bc_ptrs[level + 1]++; 4697 level++; 4698 continue; 4699 } 4700 4701 if (level == 0) { 4702 /* Handle a leaf node. */ 4703 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 4704 4705 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); 4706 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, 4707 low_key); 4708 4709 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4710 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, 4711 &rec_key); 4712 4713 /* 4714 * If (record's high key >= query's low key) and 4715 * (query's high key >= record's low key), then 4716 * this record overlaps the query range; callback. 4717 */ 4718 if (ldiff >= 0 && hdiff >= 0) { 4719 error = fn(cur, recp, priv); 4720 if (error) 4721 break; 4722 } else if (hdiff < 0) { 4723 /* Record is larger than high key; pop. */ 4724 goto pop_up; 4725 } 4726 cur->bc_ptrs[level]++; 4727 continue; 4728 } 4729 4730 /* Handle an internal node. */ 4731 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); 4732 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); 4733 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); 4734 4735 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); 4736 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); 4737 4738 /* 4739 * If (pointer's high key >= query's low key) and 4740 * (query's high key >= pointer's low key), then 4741 * this record overlaps the query range; follow pointer. 4742 */ 4743 if (ldiff >= 0 && hdiff >= 0) { 4744 level--; 4745 error = xfs_btree_lookup_get_block(cur, level, pp, 4746 &block); 4747 if (error) 4748 goto out; 4749 xfs_btree_get_block(cur, level, &bp); 4750 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4751 #ifdef DEBUG 4752 error = xfs_btree_check_block(cur, block, level, bp); 4753 if (error) 4754 goto out; 4755 #endif 4756 cur->bc_ptrs[level] = 1; 4757 continue; 4758 } else if (hdiff < 0) { 4759 /* The low key is larger than the upper range; pop. */ 4760 goto pop_up; 4761 } 4762 cur->bc_ptrs[level]++; 4763 } 4764 4765 out: 4766 /* 4767 * If we don't end this function with the cursor pointing at a record 4768 * block, a subsequent non-error cursor deletion will not release 4769 * node-level buffers, causing a buffer leak. This is quite possible 4770 * with a zero-results range query, so release the buffers if we 4771 * failed to return any results. 4772 */ 4773 if (cur->bc_bufs[0] == NULL) { 4774 for (i = 0; i < cur->bc_nlevels; i++) { 4775 if (cur->bc_bufs[i]) { 4776 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 4777 cur->bc_bufs[i] = NULL; 4778 cur->bc_ptrs[i] = 0; 4779 cur->bc_ra[i] = 0; 4780 } 4781 } 4782 } 4783 4784 return error; 4785 } 4786 4787 /* 4788 * Query a btree for all records overlapping a given interval of keys. The 4789 * supplied function will be called with each record found; return one of the 4790 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error 4791 * code. This function returns -ECANCELED, zero, or a negative error code. 4792 */ 4793 int 4794 xfs_btree_query_range( 4795 struct xfs_btree_cur *cur, 4796 union xfs_btree_irec *low_rec, 4797 union xfs_btree_irec *high_rec, 4798 xfs_btree_query_range_fn fn, 4799 void *priv) 4800 { 4801 union xfs_btree_rec rec; 4802 union xfs_btree_key low_key; 4803 union xfs_btree_key high_key; 4804 4805 /* Find the keys of both ends of the interval. */ 4806 cur->bc_rec = *high_rec; 4807 cur->bc_ops->init_rec_from_cur(cur, &rec); 4808 cur->bc_ops->init_key_from_rec(&high_key, &rec); 4809 4810 cur->bc_rec = *low_rec; 4811 cur->bc_ops->init_rec_from_cur(cur, &rec); 4812 cur->bc_ops->init_key_from_rec(&low_key, &rec); 4813 4814 /* Enforce low key < high key. */ 4815 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0) 4816 return -EINVAL; 4817 4818 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 4819 return xfs_btree_simple_query_range(cur, &low_key, 4820 &high_key, fn, priv); 4821 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4822 fn, priv); 4823 } 4824 4825 /* Query a btree for all records. */ 4826 int 4827 xfs_btree_query_all( 4828 struct xfs_btree_cur *cur, 4829 xfs_btree_query_range_fn fn, 4830 void *priv) 4831 { 4832 union xfs_btree_key low_key; 4833 union xfs_btree_key high_key; 4834 4835 memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); 4836 memset(&low_key, 0, sizeof(low_key)); 4837 memset(&high_key, 0xFF, sizeof(high_key)); 4838 4839 return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); 4840 } 4841 4842 /* 4843 * Calculate the number of blocks needed to store a given number of records 4844 * in a short-format (per-AG metadata) btree. 4845 */ 4846 unsigned long long 4847 xfs_btree_calc_size( 4848 uint *limits, 4849 unsigned long long len) 4850 { 4851 int level; 4852 int maxrecs; 4853 unsigned long long rval; 4854 4855 maxrecs = limits[0]; 4856 for (level = 0, rval = 0; len > 1; level++) { 4857 len += maxrecs - 1; 4858 do_div(len, maxrecs); 4859 maxrecs = limits[1]; 4860 rval += len; 4861 } 4862 return rval; 4863 } 4864 4865 static int 4866 xfs_btree_count_blocks_helper( 4867 struct xfs_btree_cur *cur, 4868 int level, 4869 void *data) 4870 { 4871 xfs_extlen_t *blocks = data; 4872 (*blocks)++; 4873 4874 return 0; 4875 } 4876 4877 /* Count the blocks in a btree and return the result in *blocks. */ 4878 int 4879 xfs_btree_count_blocks( 4880 struct xfs_btree_cur *cur, 4881 xfs_extlen_t *blocks) 4882 { 4883 *blocks = 0; 4884 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 4885 XFS_BTREE_VISIT_ALL, blocks); 4886 } 4887 4888 /* Compare two btree pointers. */ 4889 int64_t 4890 xfs_btree_diff_two_ptrs( 4891 struct xfs_btree_cur *cur, 4892 const union xfs_btree_ptr *a, 4893 const union xfs_btree_ptr *b) 4894 { 4895 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4896 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 4897 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 4898 } 4899 4900 /* If there's an extent, we're done. */ 4901 STATIC int 4902 xfs_btree_has_record_helper( 4903 struct xfs_btree_cur *cur, 4904 union xfs_btree_rec *rec, 4905 void *priv) 4906 { 4907 return -ECANCELED; 4908 } 4909 4910 /* Is there a record covering a given range of keys? */ 4911 int 4912 xfs_btree_has_record( 4913 struct xfs_btree_cur *cur, 4914 union xfs_btree_irec *low, 4915 union xfs_btree_irec *high, 4916 bool *exists) 4917 { 4918 int error; 4919 4920 error = xfs_btree_query_range(cur, low, high, 4921 &xfs_btree_has_record_helper, NULL); 4922 if (error == -ECANCELED) { 4923 *exists = true; 4924 return 0; 4925 } 4926 *exists = false; 4927 return error; 4928 } 4929 4930 /* Are there more records in this btree? */ 4931 bool 4932 xfs_btree_has_more_records( 4933 struct xfs_btree_cur *cur) 4934 { 4935 struct xfs_btree_block *block; 4936 struct xfs_buf *bp; 4937 4938 block = xfs_btree_get_block(cur, 0, &bp); 4939 4940 /* There are still records in this block. */ 4941 if (cur->bc_ptrs[0] < xfs_btree_get_numrecs(block)) 4942 return true; 4943 4944 /* There are more record blocks. */ 4945 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4946 return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK); 4947 else 4948 return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); 4949 } 4950