1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_shared.h" 21 #include "xfs_format.h" 22 #include "xfs_log_format.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_mount.h" 26 #include "xfs_defer.h" 27 #include "xfs_inode.h" 28 #include "xfs_trans.h" 29 #include "xfs_inode_item.h" 30 #include "xfs_buf_item.h" 31 #include "xfs_btree.h" 32 #include "xfs_errortag.h" 33 #include "xfs_error.h" 34 #include "xfs_trace.h" 35 #include "xfs_cksum.h" 36 #include "xfs_alloc.h" 37 #include "xfs_log.h" 38 39 /* 40 * Cursor allocation zone. 41 */ 42 kmem_zone_t *xfs_btree_cur_zone; 43 44 /* 45 * Btree magic numbers. 46 */ 47 static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 48 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 49 XFS_FIBT_MAGIC, 0 }, 50 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, 51 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, 52 XFS_REFC_CRC_MAGIC } 53 }; 54 55 uint32_t 56 xfs_btree_magic( 57 int crc, 58 xfs_btnum_t btnum) 59 { 60 uint32_t magic = xfs_magics[crc][btnum]; 61 62 /* Ensure we asked for crc for crc-only magics. */ 63 ASSERT(magic != 0); 64 return magic; 65 } 66 67 /* 68 * Check a long btree block header. Return the address of the failing check, 69 * or NULL if everything is ok. 70 */ 71 xfs_failaddr_t 72 __xfs_btree_check_lblock( 73 struct xfs_btree_cur *cur, 74 struct xfs_btree_block *block, 75 int level, 76 struct xfs_buf *bp) 77 { 78 struct xfs_mount *mp = cur->bc_mp; 79 xfs_btnum_t btnum = cur->bc_btnum; 80 int crc = xfs_sb_version_hascrc(&mp->m_sb); 81 82 if (crc) { 83 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 84 return __this_address; 85 if (block->bb_u.l.bb_blkno != 86 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 87 return __this_address; 88 if (block->bb_u.l.bb_pad != cpu_to_be32(0)) 89 return __this_address; 90 } 91 92 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 93 return __this_address; 94 if (be16_to_cpu(block->bb_level) != level) 95 return __this_address; 96 if (be16_to_cpu(block->bb_numrecs) > 97 cur->bc_ops->get_maxrecs(cur, level)) 98 return __this_address; 99 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 100 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_leftsib), 101 level + 1)) 102 return __this_address; 103 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 104 !xfs_btree_check_lptr(cur, be64_to_cpu(block->bb_u.l.bb_rightsib), 105 level + 1)) 106 return __this_address; 107 108 return NULL; 109 } 110 111 /* Check a long btree block header. */ 112 static int 113 xfs_btree_check_lblock( 114 struct xfs_btree_cur *cur, 115 struct xfs_btree_block *block, 116 int level, 117 struct xfs_buf *bp) 118 { 119 struct xfs_mount *mp = cur->bc_mp; 120 xfs_failaddr_t fa; 121 122 fa = __xfs_btree_check_lblock(cur, block, level, bp); 123 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, 124 XFS_ERRTAG_BTREE_CHECK_LBLOCK))) { 125 if (bp) 126 trace_xfs_btree_corrupt(bp, _RET_IP_); 127 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 128 return -EFSCORRUPTED; 129 } 130 return 0; 131 } 132 133 /* 134 * Check a short btree block header. Return the address of the failing check, 135 * or NULL if everything is ok. 136 */ 137 xfs_failaddr_t 138 __xfs_btree_check_sblock( 139 struct xfs_btree_cur *cur, 140 struct xfs_btree_block *block, 141 int level, 142 struct xfs_buf *bp) 143 { 144 struct xfs_mount *mp = cur->bc_mp; 145 xfs_btnum_t btnum = cur->bc_btnum; 146 int crc = xfs_sb_version_hascrc(&mp->m_sb); 147 148 if (crc) { 149 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 150 return __this_address; 151 if (block->bb_u.s.bb_blkno != 152 cpu_to_be64(bp ? bp->b_bn : XFS_BUF_DADDR_NULL)) 153 return __this_address; 154 } 155 156 if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) 157 return __this_address; 158 if (be16_to_cpu(block->bb_level) != level) 159 return __this_address; 160 if (be16_to_cpu(block->bb_numrecs) > 161 cur->bc_ops->get_maxrecs(cur, level)) 162 return __this_address; 163 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 164 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_leftsib), 165 level + 1)) 166 return __this_address; 167 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 168 !xfs_btree_check_sptr(cur, be32_to_cpu(block->bb_u.s.bb_rightsib), 169 level + 1)) 170 return __this_address; 171 172 return NULL; 173 } 174 175 /* Check a short btree block header. */ 176 STATIC int 177 xfs_btree_check_sblock( 178 struct xfs_btree_cur *cur, 179 struct xfs_btree_block *block, 180 int level, 181 struct xfs_buf *bp) 182 { 183 struct xfs_mount *mp = cur->bc_mp; 184 xfs_failaddr_t fa; 185 186 fa = __xfs_btree_check_sblock(cur, block, level, bp); 187 if (unlikely(XFS_TEST_ERROR(fa != NULL, mp, 188 XFS_ERRTAG_BTREE_CHECK_SBLOCK))) { 189 if (bp) 190 trace_xfs_btree_corrupt(bp, _RET_IP_); 191 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 192 return -EFSCORRUPTED; 193 } 194 return 0; 195 } 196 197 /* 198 * Debug routine: check that block header is ok. 199 */ 200 int 201 xfs_btree_check_block( 202 struct xfs_btree_cur *cur, /* btree cursor */ 203 struct xfs_btree_block *block, /* generic btree block pointer */ 204 int level, /* level of the btree block */ 205 struct xfs_buf *bp) /* buffer containing block, if any */ 206 { 207 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 208 return xfs_btree_check_lblock(cur, block, level, bp); 209 else 210 return xfs_btree_check_sblock(cur, block, level, bp); 211 } 212 213 /* Check that this long pointer is valid and points within the fs. */ 214 bool 215 xfs_btree_check_lptr( 216 struct xfs_btree_cur *cur, 217 xfs_fsblock_t fsbno, 218 int level) 219 { 220 if (level <= 0) 221 return false; 222 return xfs_verify_fsbno(cur->bc_mp, fsbno); 223 } 224 225 /* Check that this short pointer is valid and points within the AG. */ 226 bool 227 xfs_btree_check_sptr( 228 struct xfs_btree_cur *cur, 229 xfs_agblock_t agbno, 230 int level) 231 { 232 if (level <= 0) 233 return false; 234 return xfs_verify_agbno(cur->bc_mp, cur->bc_private.a.agno, agbno); 235 } 236 237 #ifdef DEBUG 238 /* 239 * Check that a given (indexed) btree pointer at a certain level of a 240 * btree is valid and doesn't point past where it should. 241 */ 242 static int 243 xfs_btree_check_ptr( 244 struct xfs_btree_cur *cur, 245 union xfs_btree_ptr *ptr, 246 int index, 247 int level) 248 { 249 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 250 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 251 xfs_btree_check_lptr(cur, 252 be64_to_cpu((&ptr->l)[index]), level)); 253 } else { 254 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 255 xfs_btree_check_sptr(cur, 256 be32_to_cpu((&ptr->s)[index]), level)); 257 } 258 259 return 0; 260 } 261 #endif 262 263 /* 264 * Calculate CRC on the whole btree block and stuff it into the 265 * long-form btree header. 266 * 267 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 268 * it into the buffer so recovery knows what the last modification was that made 269 * it to disk. 270 */ 271 void 272 xfs_btree_lblock_calc_crc( 273 struct xfs_buf *bp) 274 { 275 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 276 struct xfs_buf_log_item *bip = bp->b_log_item; 277 278 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 279 return; 280 if (bip) 281 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 282 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 283 } 284 285 bool 286 xfs_btree_lblock_verify_crc( 287 struct xfs_buf *bp) 288 { 289 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 290 struct xfs_mount *mp = bp->b_target->bt_mount; 291 292 if (xfs_sb_version_hascrc(&mp->m_sb)) { 293 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 294 return false; 295 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 296 } 297 298 return true; 299 } 300 301 /* 302 * Calculate CRC on the whole btree block and stuff it into the 303 * short-form btree header. 304 * 305 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 306 * it into the buffer so recovery knows what the last modification was that made 307 * it to disk. 308 */ 309 void 310 xfs_btree_sblock_calc_crc( 311 struct xfs_buf *bp) 312 { 313 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 314 struct xfs_buf_log_item *bip = bp->b_log_item; 315 316 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 317 return; 318 if (bip) 319 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 320 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 321 } 322 323 bool 324 xfs_btree_sblock_verify_crc( 325 struct xfs_buf *bp) 326 { 327 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 328 struct xfs_mount *mp = bp->b_target->bt_mount; 329 330 if (xfs_sb_version_hascrc(&mp->m_sb)) { 331 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 332 return __this_address; 333 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 334 } 335 336 return true; 337 } 338 339 static int 340 xfs_btree_free_block( 341 struct xfs_btree_cur *cur, 342 struct xfs_buf *bp) 343 { 344 int error; 345 346 error = cur->bc_ops->free_block(cur, bp); 347 if (!error) { 348 xfs_trans_binval(cur->bc_tp, bp); 349 XFS_BTREE_STATS_INC(cur, free); 350 } 351 return error; 352 } 353 354 /* 355 * Delete the btree cursor. 356 */ 357 void 358 xfs_btree_del_cursor( 359 xfs_btree_cur_t *cur, /* btree cursor */ 360 int error) /* del because of error */ 361 { 362 int i; /* btree level */ 363 364 /* 365 * Clear the buffer pointers, and release the buffers. 366 * If we're doing this in the face of an error, we 367 * need to make sure to inspect all of the entries 368 * in the bc_bufs array for buffers to be unlocked. 369 * This is because some of the btree code works from 370 * level n down to 0, and if we get an error along 371 * the way we won't have initialized all the entries 372 * down to 0. 373 */ 374 for (i = 0; i < cur->bc_nlevels; i++) { 375 if (cur->bc_bufs[i]) 376 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 377 else if (!error) 378 break; 379 } 380 /* 381 * Can't free a bmap cursor without having dealt with the 382 * allocated indirect blocks' accounting. 383 */ 384 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || 385 cur->bc_private.b.allocated == 0); 386 /* 387 * Free the cursor. 388 */ 389 kmem_zone_free(xfs_btree_cur_zone, cur); 390 } 391 392 /* 393 * Duplicate the btree cursor. 394 * Allocate a new one, copy the record, re-get the buffers. 395 */ 396 int /* error */ 397 xfs_btree_dup_cursor( 398 xfs_btree_cur_t *cur, /* input cursor */ 399 xfs_btree_cur_t **ncur) /* output cursor */ 400 { 401 xfs_buf_t *bp; /* btree block's buffer pointer */ 402 int error; /* error return value */ 403 int i; /* level number of btree block */ 404 xfs_mount_t *mp; /* mount structure for filesystem */ 405 xfs_btree_cur_t *new; /* new cursor value */ 406 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 407 408 tp = cur->bc_tp; 409 mp = cur->bc_mp; 410 411 /* 412 * Allocate a new cursor like the old one. 413 */ 414 new = cur->bc_ops->dup_cursor(cur); 415 416 /* 417 * Copy the record currently in the cursor. 418 */ 419 new->bc_rec = cur->bc_rec; 420 421 /* 422 * For each level current, re-get the buffer and copy the ptr value. 423 */ 424 for (i = 0; i < new->bc_nlevels; i++) { 425 new->bc_ptrs[i] = cur->bc_ptrs[i]; 426 new->bc_ra[i] = cur->bc_ra[i]; 427 bp = cur->bc_bufs[i]; 428 if (bp) { 429 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 430 XFS_BUF_ADDR(bp), mp->m_bsize, 431 0, &bp, 432 cur->bc_ops->buf_ops); 433 if (error) { 434 xfs_btree_del_cursor(new, error); 435 *ncur = NULL; 436 return error; 437 } 438 } 439 new->bc_bufs[i] = bp; 440 } 441 *ncur = new; 442 return 0; 443 } 444 445 /* 446 * XFS btree block layout and addressing: 447 * 448 * There are two types of blocks in the btree: leaf and non-leaf blocks. 449 * 450 * The leaf record start with a header then followed by records containing 451 * the values. A non-leaf block also starts with the same header, and 452 * then first contains lookup keys followed by an equal number of pointers 453 * to the btree blocks at the previous level. 454 * 455 * +--------+-------+-------+-------+-------+-------+-------+ 456 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 457 * +--------+-------+-------+-------+-------+-------+-------+ 458 * 459 * +--------+-------+-------+-------+-------+-------+-------+ 460 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 461 * +--------+-------+-------+-------+-------+-------+-------+ 462 * 463 * The header is called struct xfs_btree_block for reasons better left unknown 464 * and comes in different versions for short (32bit) and long (64bit) block 465 * pointers. The record and key structures are defined by the btree instances 466 * and opaque to the btree core. The block pointers are simple disk endian 467 * integers, available in a short (32bit) and long (64bit) variant. 468 * 469 * The helpers below calculate the offset of a given record, key or pointer 470 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 471 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 472 * inside the btree block is done using indices starting at one, not zero! 473 * 474 * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing 475 * overlapping intervals. In such a tree, records are still sorted lowest to 476 * highest and indexed by the smallest key value that refers to the record. 477 * However, nodes are different: each pointer has two associated keys -- one 478 * indexing the lowest key available in the block(s) below (the same behavior 479 * as the key in a regular btree) and another indexing the highest key 480 * available in the block(s) below. Because records are /not/ sorted by the 481 * highest key, all leaf block updates require us to compute the highest key 482 * that matches any record in the leaf and to recursively update the high keys 483 * in the nodes going further up in the tree, if necessary. Nodes look like 484 * this: 485 * 486 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 487 * Non-Leaf: | header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... | 488 * +--------+-----+-----+-----+-----+-----+-------+-------+-----+ 489 * 490 * To perform an interval query on an overlapped tree, perform the usual 491 * depth-first search and use the low and high keys to decide if we can skip 492 * that particular node. If a leaf node is reached, return the records that 493 * intersect the interval. Note that an interval query may return numerous 494 * entries. For a non-overlapped tree, simply search for the record associated 495 * with the lowest key and iterate forward until a non-matching record is 496 * found. Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by 497 * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in 498 * more detail. 499 * 500 * Why do we care about overlapping intervals? Let's say you have a bunch of 501 * reverse mapping records on a reflink filesystem: 502 * 503 * 1: +- file A startblock B offset C length D -----------+ 504 * 2: +- file E startblock F offset G length H --------------+ 505 * 3: +- file I startblock F offset J length K --+ 506 * 4: +- file L... --+ 507 * 508 * Now say we want to map block (B+D) into file A at offset (C+D). Ideally, 509 * we'd simply increment the length of record 1. But how do we find the record 510 * that ends at (B+D-1) (i.e. record 1)? A LE lookup of (B+D-1) would return 511 * record 3 because the keys are ordered first by startblock. An interval 512 * query would return records 1 and 2 because they both overlap (B+D-1), and 513 * from that we can pick out record 1 as the appropriate left neighbor. 514 * 515 * In the non-overlapped case you can do a LE lookup and decrement the cursor 516 * because a record's interval must end before the next record. 517 */ 518 519 /* 520 * Return size of the btree block header for this btree instance. 521 */ 522 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 523 { 524 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 525 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 526 return XFS_BTREE_LBLOCK_CRC_LEN; 527 return XFS_BTREE_LBLOCK_LEN; 528 } 529 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 530 return XFS_BTREE_SBLOCK_CRC_LEN; 531 return XFS_BTREE_SBLOCK_LEN; 532 } 533 534 /* 535 * Return size of btree block pointers for this btree instance. 536 */ 537 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 538 { 539 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 540 sizeof(__be64) : sizeof(__be32); 541 } 542 543 /* 544 * Calculate offset of the n-th record in a btree block. 545 */ 546 STATIC size_t 547 xfs_btree_rec_offset( 548 struct xfs_btree_cur *cur, 549 int n) 550 { 551 return xfs_btree_block_len(cur) + 552 (n - 1) * cur->bc_ops->rec_len; 553 } 554 555 /* 556 * Calculate offset of the n-th key in a btree block. 557 */ 558 STATIC size_t 559 xfs_btree_key_offset( 560 struct xfs_btree_cur *cur, 561 int n) 562 { 563 return xfs_btree_block_len(cur) + 564 (n - 1) * cur->bc_ops->key_len; 565 } 566 567 /* 568 * Calculate offset of the n-th high key in a btree block. 569 */ 570 STATIC size_t 571 xfs_btree_high_key_offset( 572 struct xfs_btree_cur *cur, 573 int n) 574 { 575 return xfs_btree_block_len(cur) + 576 (n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2); 577 } 578 579 /* 580 * Calculate offset of the n-th block pointer in a btree block. 581 */ 582 STATIC size_t 583 xfs_btree_ptr_offset( 584 struct xfs_btree_cur *cur, 585 int n, 586 int level) 587 { 588 return xfs_btree_block_len(cur) + 589 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 590 (n - 1) * xfs_btree_ptr_len(cur); 591 } 592 593 /* 594 * Return a pointer to the n-th record in the btree block. 595 */ 596 union xfs_btree_rec * 597 xfs_btree_rec_addr( 598 struct xfs_btree_cur *cur, 599 int n, 600 struct xfs_btree_block *block) 601 { 602 return (union xfs_btree_rec *) 603 ((char *)block + xfs_btree_rec_offset(cur, n)); 604 } 605 606 /* 607 * Return a pointer to the n-th key in the btree block. 608 */ 609 union xfs_btree_key * 610 xfs_btree_key_addr( 611 struct xfs_btree_cur *cur, 612 int n, 613 struct xfs_btree_block *block) 614 { 615 return (union xfs_btree_key *) 616 ((char *)block + xfs_btree_key_offset(cur, n)); 617 } 618 619 /* 620 * Return a pointer to the n-th high key in the btree block. 621 */ 622 union xfs_btree_key * 623 xfs_btree_high_key_addr( 624 struct xfs_btree_cur *cur, 625 int n, 626 struct xfs_btree_block *block) 627 { 628 return (union xfs_btree_key *) 629 ((char *)block + xfs_btree_high_key_offset(cur, n)); 630 } 631 632 /* 633 * Return a pointer to the n-th block pointer in the btree block. 634 */ 635 union xfs_btree_ptr * 636 xfs_btree_ptr_addr( 637 struct xfs_btree_cur *cur, 638 int n, 639 struct xfs_btree_block *block) 640 { 641 int level = xfs_btree_get_level(block); 642 643 ASSERT(block->bb_level != 0); 644 645 return (union xfs_btree_ptr *) 646 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 647 } 648 649 /* 650 * Get the root block which is stored in the inode. 651 * 652 * For now this btree implementation assumes the btree root is always 653 * stored in the if_broot field of an inode fork. 654 */ 655 STATIC struct xfs_btree_block * 656 xfs_btree_get_iroot( 657 struct xfs_btree_cur *cur) 658 { 659 struct xfs_ifork *ifp; 660 661 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); 662 return (struct xfs_btree_block *)ifp->if_broot; 663 } 664 665 /* 666 * Retrieve the block pointer from the cursor at the given level. 667 * This may be an inode btree root or from a buffer. 668 */ 669 struct xfs_btree_block * /* generic btree block pointer */ 670 xfs_btree_get_block( 671 struct xfs_btree_cur *cur, /* btree cursor */ 672 int level, /* level in btree */ 673 struct xfs_buf **bpp) /* buffer containing the block */ 674 { 675 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 676 (level == cur->bc_nlevels - 1)) { 677 *bpp = NULL; 678 return xfs_btree_get_iroot(cur); 679 } 680 681 *bpp = cur->bc_bufs[level]; 682 return XFS_BUF_TO_BLOCK(*bpp); 683 } 684 685 /* 686 * Get a buffer for the block, return it with no data read. 687 * Long-form addressing. 688 */ 689 xfs_buf_t * /* buffer for fsbno */ 690 xfs_btree_get_bufl( 691 xfs_mount_t *mp, /* file system mount point */ 692 xfs_trans_t *tp, /* transaction pointer */ 693 xfs_fsblock_t fsbno, /* file system block number */ 694 uint lock) /* lock flags for get_buf */ 695 { 696 xfs_daddr_t d; /* real disk block address */ 697 698 ASSERT(fsbno != NULLFSBLOCK); 699 d = XFS_FSB_TO_DADDR(mp, fsbno); 700 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 701 } 702 703 /* 704 * Get a buffer for the block, return it with no data read. 705 * Short-form addressing. 706 */ 707 xfs_buf_t * /* buffer for agno/agbno */ 708 xfs_btree_get_bufs( 709 xfs_mount_t *mp, /* file system mount point */ 710 xfs_trans_t *tp, /* transaction pointer */ 711 xfs_agnumber_t agno, /* allocation group number */ 712 xfs_agblock_t agbno, /* allocation group block number */ 713 uint lock) /* lock flags for get_buf */ 714 { 715 xfs_daddr_t d; /* real disk block address */ 716 717 ASSERT(agno != NULLAGNUMBER); 718 ASSERT(agbno != NULLAGBLOCK); 719 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 720 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 721 } 722 723 /* 724 * Check for the cursor referring to the last block at the given level. 725 */ 726 int /* 1=is last block, 0=not last block */ 727 xfs_btree_islastblock( 728 xfs_btree_cur_t *cur, /* btree cursor */ 729 int level) /* level to check */ 730 { 731 struct xfs_btree_block *block; /* generic btree block pointer */ 732 xfs_buf_t *bp; /* buffer containing block */ 733 734 block = xfs_btree_get_block(cur, level, &bp); 735 xfs_btree_check_block(cur, block, level, bp); 736 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 737 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); 738 else 739 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); 740 } 741 742 /* 743 * Change the cursor to point to the first record at the given level. 744 * Other levels are unaffected. 745 */ 746 STATIC int /* success=1, failure=0 */ 747 xfs_btree_firstrec( 748 xfs_btree_cur_t *cur, /* btree cursor */ 749 int level) /* level to change */ 750 { 751 struct xfs_btree_block *block; /* generic btree block pointer */ 752 xfs_buf_t *bp; /* buffer containing block */ 753 754 /* 755 * Get the block pointer for this level. 756 */ 757 block = xfs_btree_get_block(cur, level, &bp); 758 if (xfs_btree_check_block(cur, block, level, bp)) 759 return 0; 760 /* 761 * It's empty, there is no such record. 762 */ 763 if (!block->bb_numrecs) 764 return 0; 765 /* 766 * Set the ptr value to 1, that's the first record/key. 767 */ 768 cur->bc_ptrs[level] = 1; 769 return 1; 770 } 771 772 /* 773 * Change the cursor to point to the last record in the current block 774 * at the given level. Other levels are unaffected. 775 */ 776 STATIC int /* success=1, failure=0 */ 777 xfs_btree_lastrec( 778 xfs_btree_cur_t *cur, /* btree cursor */ 779 int level) /* level to change */ 780 { 781 struct xfs_btree_block *block; /* generic btree block pointer */ 782 xfs_buf_t *bp; /* buffer containing block */ 783 784 /* 785 * Get the block pointer for this level. 786 */ 787 block = xfs_btree_get_block(cur, level, &bp); 788 if (xfs_btree_check_block(cur, block, level, bp)) 789 return 0; 790 /* 791 * It's empty, there is no such record. 792 */ 793 if (!block->bb_numrecs) 794 return 0; 795 /* 796 * Set the ptr value to numrecs, that's the last record/key. 797 */ 798 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); 799 return 1; 800 } 801 802 /* 803 * Compute first and last byte offsets for the fields given. 804 * Interprets the offsets table, which contains struct field offsets. 805 */ 806 void 807 xfs_btree_offsets( 808 int64_t fields, /* bitmask of fields */ 809 const short *offsets, /* table of field offsets */ 810 int nbits, /* number of bits to inspect */ 811 int *first, /* output: first byte offset */ 812 int *last) /* output: last byte offset */ 813 { 814 int i; /* current bit number */ 815 int64_t imask; /* mask for current bit number */ 816 817 ASSERT(fields != 0); 818 /* 819 * Find the lowest bit, so the first byte offset. 820 */ 821 for (i = 0, imask = 1LL; ; i++, imask <<= 1) { 822 if (imask & fields) { 823 *first = offsets[i]; 824 break; 825 } 826 } 827 /* 828 * Find the highest bit, so the last byte offset. 829 */ 830 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { 831 if (imask & fields) { 832 *last = offsets[i + 1] - 1; 833 break; 834 } 835 } 836 } 837 838 /* 839 * Get a buffer for the block, return it read in. 840 * Long-form addressing. 841 */ 842 int 843 xfs_btree_read_bufl( 844 struct xfs_mount *mp, /* file system mount point */ 845 struct xfs_trans *tp, /* transaction pointer */ 846 xfs_fsblock_t fsbno, /* file system block number */ 847 uint lock, /* lock flags for read_buf */ 848 struct xfs_buf **bpp, /* buffer for fsbno */ 849 int refval, /* ref count value for buffer */ 850 const struct xfs_buf_ops *ops) 851 { 852 struct xfs_buf *bp; /* return value */ 853 xfs_daddr_t d; /* real disk block address */ 854 int error; 855 856 if (!xfs_verify_fsbno(mp, fsbno)) 857 return -EFSCORRUPTED; 858 d = XFS_FSB_TO_DADDR(mp, fsbno); 859 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 860 mp->m_bsize, lock, &bp, ops); 861 if (error) 862 return error; 863 if (bp) 864 xfs_buf_set_ref(bp, refval); 865 *bpp = bp; 866 return 0; 867 } 868 869 /* 870 * Read-ahead the block, don't wait for it, don't return a buffer. 871 * Long-form addressing. 872 */ 873 /* ARGSUSED */ 874 void 875 xfs_btree_reada_bufl( 876 struct xfs_mount *mp, /* file system mount point */ 877 xfs_fsblock_t fsbno, /* file system block number */ 878 xfs_extlen_t count, /* count of filesystem blocks */ 879 const struct xfs_buf_ops *ops) 880 { 881 xfs_daddr_t d; 882 883 ASSERT(fsbno != NULLFSBLOCK); 884 d = XFS_FSB_TO_DADDR(mp, fsbno); 885 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 886 } 887 888 /* 889 * Read-ahead the block, don't wait for it, don't return a buffer. 890 * Short-form addressing. 891 */ 892 /* ARGSUSED */ 893 void 894 xfs_btree_reada_bufs( 895 struct xfs_mount *mp, /* file system mount point */ 896 xfs_agnumber_t agno, /* allocation group number */ 897 xfs_agblock_t agbno, /* allocation group block number */ 898 xfs_extlen_t count, /* count of filesystem blocks */ 899 const struct xfs_buf_ops *ops) 900 { 901 xfs_daddr_t d; 902 903 ASSERT(agno != NULLAGNUMBER); 904 ASSERT(agbno != NULLAGBLOCK); 905 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 906 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 907 } 908 909 STATIC int 910 xfs_btree_readahead_lblock( 911 struct xfs_btree_cur *cur, 912 int lr, 913 struct xfs_btree_block *block) 914 { 915 int rval = 0; 916 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 917 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 918 919 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 920 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 921 cur->bc_ops->buf_ops); 922 rval++; 923 } 924 925 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 926 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 927 cur->bc_ops->buf_ops); 928 rval++; 929 } 930 931 return rval; 932 } 933 934 STATIC int 935 xfs_btree_readahead_sblock( 936 struct xfs_btree_cur *cur, 937 int lr, 938 struct xfs_btree_block *block) 939 { 940 int rval = 0; 941 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 942 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 943 944 945 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 946 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 947 left, 1, cur->bc_ops->buf_ops); 948 rval++; 949 } 950 951 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 952 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 953 right, 1, cur->bc_ops->buf_ops); 954 rval++; 955 } 956 957 return rval; 958 } 959 960 /* 961 * Read-ahead btree blocks, at the given level. 962 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 963 */ 964 STATIC int 965 xfs_btree_readahead( 966 struct xfs_btree_cur *cur, /* btree cursor */ 967 int lev, /* level in btree */ 968 int lr) /* left/right bits */ 969 { 970 struct xfs_btree_block *block; 971 972 /* 973 * No readahead needed if we are at the root level and the 974 * btree root is stored in the inode. 975 */ 976 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 977 (lev == cur->bc_nlevels - 1)) 978 return 0; 979 980 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) 981 return 0; 982 983 cur->bc_ra[lev] |= lr; 984 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); 985 986 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 987 return xfs_btree_readahead_lblock(cur, lr, block); 988 return xfs_btree_readahead_sblock(cur, lr, block); 989 } 990 991 STATIC xfs_daddr_t 992 xfs_btree_ptr_to_daddr( 993 struct xfs_btree_cur *cur, 994 union xfs_btree_ptr *ptr) 995 { 996 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 997 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK)); 998 999 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); 1000 } else { 1001 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); 1002 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); 1003 1004 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, 1005 be32_to_cpu(ptr->s)); 1006 } 1007 } 1008 1009 /* 1010 * Readahead @count btree blocks at the given @ptr location. 1011 * 1012 * We don't need to care about long or short form btrees here as we have a 1013 * method of converting the ptr directly to a daddr available to us. 1014 */ 1015 STATIC void 1016 xfs_btree_readahead_ptr( 1017 struct xfs_btree_cur *cur, 1018 union xfs_btree_ptr *ptr, 1019 xfs_extlen_t count) 1020 { 1021 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, 1022 xfs_btree_ptr_to_daddr(cur, ptr), 1023 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 1024 } 1025 1026 /* 1027 * Set the buffer for level "lev" in the cursor to bp, releasing 1028 * any previous buffer. 1029 */ 1030 STATIC void 1031 xfs_btree_setbuf( 1032 xfs_btree_cur_t *cur, /* btree cursor */ 1033 int lev, /* level in btree */ 1034 xfs_buf_t *bp) /* new buffer to set */ 1035 { 1036 struct xfs_btree_block *b; /* btree block */ 1037 1038 if (cur->bc_bufs[lev]) 1039 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); 1040 cur->bc_bufs[lev] = bp; 1041 cur->bc_ra[lev] = 0; 1042 1043 b = XFS_BUF_TO_BLOCK(bp); 1044 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1045 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 1046 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1047 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 1048 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1049 } else { 1050 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 1051 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 1052 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 1053 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 1054 } 1055 } 1056 1057 bool 1058 xfs_btree_ptr_is_null( 1059 struct xfs_btree_cur *cur, 1060 union xfs_btree_ptr *ptr) 1061 { 1062 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1063 return ptr->l == cpu_to_be64(NULLFSBLOCK); 1064 else 1065 return ptr->s == cpu_to_be32(NULLAGBLOCK); 1066 } 1067 1068 STATIC void 1069 xfs_btree_set_ptr_null( 1070 struct xfs_btree_cur *cur, 1071 union xfs_btree_ptr *ptr) 1072 { 1073 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1074 ptr->l = cpu_to_be64(NULLFSBLOCK); 1075 else 1076 ptr->s = cpu_to_be32(NULLAGBLOCK); 1077 } 1078 1079 /* 1080 * Get/set/init sibling pointers 1081 */ 1082 void 1083 xfs_btree_get_sibling( 1084 struct xfs_btree_cur *cur, 1085 struct xfs_btree_block *block, 1086 union xfs_btree_ptr *ptr, 1087 int lr) 1088 { 1089 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1090 1091 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1092 if (lr == XFS_BB_RIGHTSIB) 1093 ptr->l = block->bb_u.l.bb_rightsib; 1094 else 1095 ptr->l = block->bb_u.l.bb_leftsib; 1096 } else { 1097 if (lr == XFS_BB_RIGHTSIB) 1098 ptr->s = block->bb_u.s.bb_rightsib; 1099 else 1100 ptr->s = block->bb_u.s.bb_leftsib; 1101 } 1102 } 1103 1104 STATIC void 1105 xfs_btree_set_sibling( 1106 struct xfs_btree_cur *cur, 1107 struct xfs_btree_block *block, 1108 union xfs_btree_ptr *ptr, 1109 int lr) 1110 { 1111 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 1112 1113 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 1114 if (lr == XFS_BB_RIGHTSIB) 1115 block->bb_u.l.bb_rightsib = ptr->l; 1116 else 1117 block->bb_u.l.bb_leftsib = ptr->l; 1118 } else { 1119 if (lr == XFS_BB_RIGHTSIB) 1120 block->bb_u.s.bb_rightsib = ptr->s; 1121 else 1122 block->bb_u.s.bb_leftsib = ptr->s; 1123 } 1124 } 1125 1126 void 1127 xfs_btree_init_block_int( 1128 struct xfs_mount *mp, 1129 struct xfs_btree_block *buf, 1130 xfs_daddr_t blkno, 1131 xfs_btnum_t btnum, 1132 __u16 level, 1133 __u16 numrecs, 1134 __u64 owner, 1135 unsigned int flags) 1136 { 1137 int crc = xfs_sb_version_hascrc(&mp->m_sb); 1138 __u32 magic = xfs_btree_magic(crc, btnum); 1139 1140 buf->bb_magic = cpu_to_be32(magic); 1141 buf->bb_level = cpu_to_be16(level); 1142 buf->bb_numrecs = cpu_to_be16(numrecs); 1143 1144 if (flags & XFS_BTREE_LONG_PTRS) { 1145 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 1146 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1147 if (crc) { 1148 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1149 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1150 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); 1151 buf->bb_u.l.bb_pad = 0; 1152 buf->bb_u.l.bb_lsn = 0; 1153 } 1154 } else { 1155 /* owner is a 32 bit value on short blocks */ 1156 __u32 __owner = (__u32)owner; 1157 1158 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1159 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1160 if (crc) { 1161 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1162 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1163 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); 1164 buf->bb_u.s.bb_lsn = 0; 1165 } 1166 } 1167 } 1168 1169 void 1170 xfs_btree_init_block( 1171 struct xfs_mount *mp, 1172 struct xfs_buf *bp, 1173 xfs_btnum_t btnum, 1174 __u16 level, 1175 __u16 numrecs, 1176 __u64 owner, 1177 unsigned int flags) 1178 { 1179 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1180 btnum, level, numrecs, owner, flags); 1181 } 1182 1183 STATIC void 1184 xfs_btree_init_block_cur( 1185 struct xfs_btree_cur *cur, 1186 struct xfs_buf *bp, 1187 int level, 1188 int numrecs) 1189 { 1190 __u64 owner; 1191 1192 /* 1193 * we can pull the owner from the cursor right now as the different 1194 * owners align directly with the pointer size of the btree. This may 1195 * change in future, but is safe for current users of the generic btree 1196 * code. 1197 */ 1198 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1199 owner = cur->bc_private.b.ip->i_ino; 1200 else 1201 owner = cur->bc_private.a.agno; 1202 1203 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1204 cur->bc_btnum, level, numrecs, 1205 owner, cur->bc_flags); 1206 } 1207 1208 /* 1209 * Return true if ptr is the last record in the btree and 1210 * we need to track updates to this record. The decision 1211 * will be further refined in the update_lastrec method. 1212 */ 1213 STATIC int 1214 xfs_btree_is_lastrec( 1215 struct xfs_btree_cur *cur, 1216 struct xfs_btree_block *block, 1217 int level) 1218 { 1219 union xfs_btree_ptr ptr; 1220 1221 if (level > 0) 1222 return 0; 1223 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1224 return 0; 1225 1226 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1227 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1228 return 0; 1229 return 1; 1230 } 1231 1232 STATIC void 1233 xfs_btree_buf_to_ptr( 1234 struct xfs_btree_cur *cur, 1235 struct xfs_buf *bp, 1236 union xfs_btree_ptr *ptr) 1237 { 1238 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1239 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1240 XFS_BUF_ADDR(bp))); 1241 else { 1242 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1243 XFS_BUF_ADDR(bp))); 1244 } 1245 } 1246 1247 STATIC void 1248 xfs_btree_set_refs( 1249 struct xfs_btree_cur *cur, 1250 struct xfs_buf *bp) 1251 { 1252 switch (cur->bc_btnum) { 1253 case XFS_BTNUM_BNO: 1254 case XFS_BTNUM_CNT: 1255 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1256 break; 1257 case XFS_BTNUM_INO: 1258 case XFS_BTNUM_FINO: 1259 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1260 break; 1261 case XFS_BTNUM_BMAP: 1262 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1263 break; 1264 case XFS_BTNUM_RMAP: 1265 xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); 1266 break; 1267 case XFS_BTNUM_REFC: 1268 xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); 1269 break; 1270 default: 1271 ASSERT(0); 1272 } 1273 } 1274 1275 STATIC int 1276 xfs_btree_get_buf_block( 1277 struct xfs_btree_cur *cur, 1278 union xfs_btree_ptr *ptr, 1279 int flags, 1280 struct xfs_btree_block **block, 1281 struct xfs_buf **bpp) 1282 { 1283 struct xfs_mount *mp = cur->bc_mp; 1284 xfs_daddr_t d; 1285 1286 /* need to sort out how callers deal with failures first */ 1287 ASSERT(!(flags & XBF_TRYLOCK)); 1288 1289 d = xfs_btree_ptr_to_daddr(cur, ptr); 1290 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1291 mp->m_bsize, flags); 1292 1293 if (!*bpp) 1294 return -ENOMEM; 1295 1296 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1297 *block = XFS_BUF_TO_BLOCK(*bpp); 1298 return 0; 1299 } 1300 1301 /* 1302 * Read in the buffer at the given ptr and return the buffer and 1303 * the block pointer within the buffer. 1304 */ 1305 STATIC int 1306 xfs_btree_read_buf_block( 1307 struct xfs_btree_cur *cur, 1308 union xfs_btree_ptr *ptr, 1309 int flags, 1310 struct xfs_btree_block **block, 1311 struct xfs_buf **bpp) 1312 { 1313 struct xfs_mount *mp = cur->bc_mp; 1314 xfs_daddr_t d; 1315 int error; 1316 1317 /* need to sort out how callers deal with failures first */ 1318 ASSERT(!(flags & XBF_TRYLOCK)); 1319 1320 d = xfs_btree_ptr_to_daddr(cur, ptr); 1321 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1322 mp->m_bsize, flags, bpp, 1323 cur->bc_ops->buf_ops); 1324 if (error) 1325 return error; 1326 1327 xfs_btree_set_refs(cur, *bpp); 1328 *block = XFS_BUF_TO_BLOCK(*bpp); 1329 return 0; 1330 } 1331 1332 /* 1333 * Copy keys from one btree block to another. 1334 */ 1335 STATIC void 1336 xfs_btree_copy_keys( 1337 struct xfs_btree_cur *cur, 1338 union xfs_btree_key *dst_key, 1339 union xfs_btree_key *src_key, 1340 int numkeys) 1341 { 1342 ASSERT(numkeys >= 0); 1343 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1344 } 1345 1346 /* 1347 * Copy records from one btree block to another. 1348 */ 1349 STATIC void 1350 xfs_btree_copy_recs( 1351 struct xfs_btree_cur *cur, 1352 union xfs_btree_rec *dst_rec, 1353 union xfs_btree_rec *src_rec, 1354 int numrecs) 1355 { 1356 ASSERT(numrecs >= 0); 1357 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1358 } 1359 1360 /* 1361 * Copy block pointers from one btree block to another. 1362 */ 1363 STATIC void 1364 xfs_btree_copy_ptrs( 1365 struct xfs_btree_cur *cur, 1366 union xfs_btree_ptr *dst_ptr, 1367 union xfs_btree_ptr *src_ptr, 1368 int numptrs) 1369 { 1370 ASSERT(numptrs >= 0); 1371 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1372 } 1373 1374 /* 1375 * Shift keys one index left/right inside a single btree block. 1376 */ 1377 STATIC void 1378 xfs_btree_shift_keys( 1379 struct xfs_btree_cur *cur, 1380 union xfs_btree_key *key, 1381 int dir, 1382 int numkeys) 1383 { 1384 char *dst_key; 1385 1386 ASSERT(numkeys >= 0); 1387 ASSERT(dir == 1 || dir == -1); 1388 1389 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1390 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1391 } 1392 1393 /* 1394 * Shift records one index left/right inside a single btree block. 1395 */ 1396 STATIC void 1397 xfs_btree_shift_recs( 1398 struct xfs_btree_cur *cur, 1399 union xfs_btree_rec *rec, 1400 int dir, 1401 int numrecs) 1402 { 1403 char *dst_rec; 1404 1405 ASSERT(numrecs >= 0); 1406 ASSERT(dir == 1 || dir == -1); 1407 1408 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1409 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1410 } 1411 1412 /* 1413 * Shift block pointers one index left/right inside a single btree block. 1414 */ 1415 STATIC void 1416 xfs_btree_shift_ptrs( 1417 struct xfs_btree_cur *cur, 1418 union xfs_btree_ptr *ptr, 1419 int dir, 1420 int numptrs) 1421 { 1422 char *dst_ptr; 1423 1424 ASSERT(numptrs >= 0); 1425 ASSERT(dir == 1 || dir == -1); 1426 1427 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1428 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1429 } 1430 1431 /* 1432 * Log key values from the btree block. 1433 */ 1434 STATIC void 1435 xfs_btree_log_keys( 1436 struct xfs_btree_cur *cur, 1437 struct xfs_buf *bp, 1438 int first, 1439 int last) 1440 { 1441 1442 if (bp) { 1443 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1444 xfs_trans_log_buf(cur->bc_tp, bp, 1445 xfs_btree_key_offset(cur, first), 1446 xfs_btree_key_offset(cur, last + 1) - 1); 1447 } else { 1448 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1449 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1450 } 1451 } 1452 1453 /* 1454 * Log record values from the btree block. 1455 */ 1456 void 1457 xfs_btree_log_recs( 1458 struct xfs_btree_cur *cur, 1459 struct xfs_buf *bp, 1460 int first, 1461 int last) 1462 { 1463 1464 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1465 xfs_trans_log_buf(cur->bc_tp, bp, 1466 xfs_btree_rec_offset(cur, first), 1467 xfs_btree_rec_offset(cur, last + 1) - 1); 1468 1469 } 1470 1471 /* 1472 * Log block pointer fields from a btree block (nonleaf). 1473 */ 1474 STATIC void 1475 xfs_btree_log_ptrs( 1476 struct xfs_btree_cur *cur, /* btree cursor */ 1477 struct xfs_buf *bp, /* buffer containing btree block */ 1478 int first, /* index of first pointer to log */ 1479 int last) /* index of last pointer to log */ 1480 { 1481 1482 if (bp) { 1483 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1484 int level = xfs_btree_get_level(block); 1485 1486 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1487 xfs_trans_log_buf(cur->bc_tp, bp, 1488 xfs_btree_ptr_offset(cur, first, level), 1489 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1490 } else { 1491 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1492 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1493 } 1494 1495 } 1496 1497 /* 1498 * Log fields from a btree block header. 1499 */ 1500 void 1501 xfs_btree_log_block( 1502 struct xfs_btree_cur *cur, /* btree cursor */ 1503 struct xfs_buf *bp, /* buffer containing btree block */ 1504 int fields) /* mask of fields: XFS_BB_... */ 1505 { 1506 int first; /* first byte offset logged */ 1507 int last; /* last byte offset logged */ 1508 static const short soffsets[] = { /* table of offsets (short) */ 1509 offsetof(struct xfs_btree_block, bb_magic), 1510 offsetof(struct xfs_btree_block, bb_level), 1511 offsetof(struct xfs_btree_block, bb_numrecs), 1512 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1513 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1514 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1515 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1516 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1517 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1518 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1519 XFS_BTREE_SBLOCK_CRC_LEN 1520 }; 1521 static const short loffsets[] = { /* table of offsets (long) */ 1522 offsetof(struct xfs_btree_block, bb_magic), 1523 offsetof(struct xfs_btree_block, bb_level), 1524 offsetof(struct xfs_btree_block, bb_numrecs), 1525 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1526 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1527 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1528 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1529 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1530 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1531 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1532 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1533 XFS_BTREE_LBLOCK_CRC_LEN 1534 }; 1535 1536 if (bp) { 1537 int nbits; 1538 1539 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1540 /* 1541 * We don't log the CRC when updating a btree 1542 * block but instead recreate it during log 1543 * recovery. As the log buffers have checksums 1544 * of their own this is safe and avoids logging a crc 1545 * update in a lot of places. 1546 */ 1547 if (fields == XFS_BB_ALL_BITS) 1548 fields = XFS_BB_ALL_BITS_CRC; 1549 nbits = XFS_BB_NUM_BITS_CRC; 1550 } else { 1551 nbits = XFS_BB_NUM_BITS; 1552 } 1553 xfs_btree_offsets(fields, 1554 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1555 loffsets : soffsets, 1556 nbits, &first, &last); 1557 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1558 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1559 } else { 1560 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1561 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1562 } 1563 } 1564 1565 /* 1566 * Increment cursor by one record at the level. 1567 * For nonzero levels the leaf-ward information is untouched. 1568 */ 1569 int /* error */ 1570 xfs_btree_increment( 1571 struct xfs_btree_cur *cur, 1572 int level, 1573 int *stat) /* success/failure */ 1574 { 1575 struct xfs_btree_block *block; 1576 union xfs_btree_ptr ptr; 1577 struct xfs_buf *bp; 1578 int error; /* error return value */ 1579 int lev; 1580 1581 ASSERT(level < cur->bc_nlevels); 1582 1583 /* Read-ahead to the right at this level. */ 1584 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1585 1586 /* Get a pointer to the btree block. */ 1587 block = xfs_btree_get_block(cur, level, &bp); 1588 1589 #ifdef DEBUG 1590 error = xfs_btree_check_block(cur, block, level, bp); 1591 if (error) 1592 goto error0; 1593 #endif 1594 1595 /* We're done if we remain in the block after the increment. */ 1596 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) 1597 goto out1; 1598 1599 /* Fail if we just went off the right edge of the tree. */ 1600 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1601 if (xfs_btree_ptr_is_null(cur, &ptr)) 1602 goto out0; 1603 1604 XFS_BTREE_STATS_INC(cur, increment); 1605 1606 /* 1607 * March up the tree incrementing pointers. 1608 * Stop when we don't go off the right edge of a block. 1609 */ 1610 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1611 block = xfs_btree_get_block(cur, lev, &bp); 1612 1613 #ifdef DEBUG 1614 error = xfs_btree_check_block(cur, block, lev, bp); 1615 if (error) 1616 goto error0; 1617 #endif 1618 1619 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) 1620 break; 1621 1622 /* Read-ahead the right block for the next loop. */ 1623 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1624 } 1625 1626 /* 1627 * If we went off the root then we are either seriously 1628 * confused or have the tree root in an inode. 1629 */ 1630 if (lev == cur->bc_nlevels) { 1631 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1632 goto out0; 1633 ASSERT(0); 1634 error = -EFSCORRUPTED; 1635 goto error0; 1636 } 1637 ASSERT(lev < cur->bc_nlevels); 1638 1639 /* 1640 * Now walk back down the tree, fixing up the cursor's buffer 1641 * pointers and key numbers. 1642 */ 1643 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1644 union xfs_btree_ptr *ptrp; 1645 1646 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1647 --lev; 1648 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1649 if (error) 1650 goto error0; 1651 1652 xfs_btree_setbuf(cur, lev, bp); 1653 cur->bc_ptrs[lev] = 1; 1654 } 1655 out1: 1656 *stat = 1; 1657 return 0; 1658 1659 out0: 1660 *stat = 0; 1661 return 0; 1662 1663 error0: 1664 return error; 1665 } 1666 1667 /* 1668 * Decrement cursor by one record at the level. 1669 * For nonzero levels the leaf-ward information is untouched. 1670 */ 1671 int /* error */ 1672 xfs_btree_decrement( 1673 struct xfs_btree_cur *cur, 1674 int level, 1675 int *stat) /* success/failure */ 1676 { 1677 struct xfs_btree_block *block; 1678 xfs_buf_t *bp; 1679 int error; /* error return value */ 1680 int lev; 1681 union xfs_btree_ptr ptr; 1682 1683 ASSERT(level < cur->bc_nlevels); 1684 1685 /* Read-ahead to the left at this level. */ 1686 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1687 1688 /* We're done if we remain in the block after the decrement. */ 1689 if (--cur->bc_ptrs[level] > 0) 1690 goto out1; 1691 1692 /* Get a pointer to the btree block. */ 1693 block = xfs_btree_get_block(cur, level, &bp); 1694 1695 #ifdef DEBUG 1696 error = xfs_btree_check_block(cur, block, level, bp); 1697 if (error) 1698 goto error0; 1699 #endif 1700 1701 /* Fail if we just went off the left edge of the tree. */ 1702 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1703 if (xfs_btree_ptr_is_null(cur, &ptr)) 1704 goto out0; 1705 1706 XFS_BTREE_STATS_INC(cur, decrement); 1707 1708 /* 1709 * March up the tree decrementing pointers. 1710 * Stop when we don't go off the left edge of a block. 1711 */ 1712 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1713 if (--cur->bc_ptrs[lev] > 0) 1714 break; 1715 /* Read-ahead the left block for the next loop. */ 1716 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1717 } 1718 1719 /* 1720 * If we went off the root then we are seriously confused. 1721 * or the root of the tree is in an inode. 1722 */ 1723 if (lev == cur->bc_nlevels) { 1724 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1725 goto out0; 1726 ASSERT(0); 1727 error = -EFSCORRUPTED; 1728 goto error0; 1729 } 1730 ASSERT(lev < cur->bc_nlevels); 1731 1732 /* 1733 * Now walk back down the tree, fixing up the cursor's buffer 1734 * pointers and key numbers. 1735 */ 1736 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1737 union xfs_btree_ptr *ptrp; 1738 1739 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1740 --lev; 1741 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1742 if (error) 1743 goto error0; 1744 xfs_btree_setbuf(cur, lev, bp); 1745 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); 1746 } 1747 out1: 1748 *stat = 1; 1749 return 0; 1750 1751 out0: 1752 *stat = 0; 1753 return 0; 1754 1755 error0: 1756 return error; 1757 } 1758 1759 int 1760 xfs_btree_lookup_get_block( 1761 struct xfs_btree_cur *cur, /* btree cursor */ 1762 int level, /* level in the btree */ 1763 union xfs_btree_ptr *pp, /* ptr to btree block */ 1764 struct xfs_btree_block **blkp) /* return btree block */ 1765 { 1766 struct xfs_buf *bp; /* buffer pointer for btree block */ 1767 int error = 0; 1768 1769 /* special case the root block if in an inode */ 1770 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1771 (level == cur->bc_nlevels - 1)) { 1772 *blkp = xfs_btree_get_iroot(cur); 1773 return 0; 1774 } 1775 1776 /* 1777 * If the old buffer at this level for the disk address we are 1778 * looking for re-use it. 1779 * 1780 * Otherwise throw it away and get a new one. 1781 */ 1782 bp = cur->bc_bufs[level]; 1783 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { 1784 *blkp = XFS_BUF_TO_BLOCK(bp); 1785 return 0; 1786 } 1787 1788 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1789 if (error) 1790 return error; 1791 1792 /* Check the inode owner since the verifiers don't. */ 1793 if (xfs_sb_version_hascrc(&cur->bc_mp->m_sb) && 1794 !(cur->bc_private.b.flags & XFS_BTCUR_BPRV_INVALID_OWNER) && 1795 (cur->bc_flags & XFS_BTREE_LONG_PTRS) && 1796 be64_to_cpu((*blkp)->bb_u.l.bb_owner) != 1797 cur->bc_private.b.ip->i_ino) 1798 goto out_bad; 1799 1800 /* Did we get the level we were looking for? */ 1801 if (be16_to_cpu((*blkp)->bb_level) != level) 1802 goto out_bad; 1803 1804 /* Check that internal nodes have at least one record. */ 1805 if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0) 1806 goto out_bad; 1807 1808 xfs_btree_setbuf(cur, level, bp); 1809 return 0; 1810 1811 out_bad: 1812 *blkp = NULL; 1813 xfs_trans_brelse(cur->bc_tp, bp); 1814 return -EFSCORRUPTED; 1815 } 1816 1817 /* 1818 * Get current search key. For level 0 we don't actually have a key 1819 * structure so we make one up from the record. For all other levels 1820 * we just return the right key. 1821 */ 1822 STATIC union xfs_btree_key * 1823 xfs_lookup_get_search_key( 1824 struct xfs_btree_cur *cur, 1825 int level, 1826 int keyno, 1827 struct xfs_btree_block *block, 1828 union xfs_btree_key *kp) 1829 { 1830 if (level == 0) { 1831 cur->bc_ops->init_key_from_rec(kp, 1832 xfs_btree_rec_addr(cur, keyno, block)); 1833 return kp; 1834 } 1835 1836 return xfs_btree_key_addr(cur, keyno, block); 1837 } 1838 1839 /* 1840 * Lookup the record. The cursor is made to point to it, based on dir. 1841 * stat is set to 0 if can't find any such record, 1 for success. 1842 */ 1843 int /* error */ 1844 xfs_btree_lookup( 1845 struct xfs_btree_cur *cur, /* btree cursor */ 1846 xfs_lookup_t dir, /* <=, ==, or >= */ 1847 int *stat) /* success/failure */ 1848 { 1849 struct xfs_btree_block *block; /* current btree block */ 1850 int64_t diff; /* difference for the current key */ 1851 int error; /* error return value */ 1852 int keyno; /* current key number */ 1853 int level; /* level in the btree */ 1854 union xfs_btree_ptr *pp; /* ptr to btree block */ 1855 union xfs_btree_ptr ptr; /* ptr to btree block */ 1856 1857 XFS_BTREE_STATS_INC(cur, lookup); 1858 1859 /* No such thing as a zero-level tree. */ 1860 if (cur->bc_nlevels == 0) 1861 return -EFSCORRUPTED; 1862 1863 block = NULL; 1864 keyno = 0; 1865 1866 /* initialise start pointer from cursor */ 1867 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1868 pp = &ptr; 1869 1870 /* 1871 * Iterate over each level in the btree, starting at the root. 1872 * For each level above the leaves, find the key we need, based 1873 * on the lookup record, then follow the corresponding block 1874 * pointer down to the next level. 1875 */ 1876 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1877 /* Get the block we need to do the lookup on. */ 1878 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1879 if (error) 1880 goto error0; 1881 1882 if (diff == 0) { 1883 /* 1884 * If we already had a key match at a higher level, we 1885 * know we need to use the first entry in this block. 1886 */ 1887 keyno = 1; 1888 } else { 1889 /* Otherwise search this block. Do a binary search. */ 1890 1891 int high; /* high entry number */ 1892 int low; /* low entry number */ 1893 1894 /* Set low and high entry numbers, 1-based. */ 1895 low = 1; 1896 high = xfs_btree_get_numrecs(block); 1897 if (!high) { 1898 /* Block is empty, must be an empty leaf. */ 1899 ASSERT(level == 0 && cur->bc_nlevels == 1); 1900 1901 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; 1902 *stat = 0; 1903 return 0; 1904 } 1905 1906 /* Binary search the block. */ 1907 while (low <= high) { 1908 union xfs_btree_key key; 1909 union xfs_btree_key *kp; 1910 1911 XFS_BTREE_STATS_INC(cur, compare); 1912 1913 /* keyno is average of low and high. */ 1914 keyno = (low + high) >> 1; 1915 1916 /* Get current search key */ 1917 kp = xfs_lookup_get_search_key(cur, level, 1918 keyno, block, &key); 1919 1920 /* 1921 * Compute difference to get next direction: 1922 * - less than, move right 1923 * - greater than, move left 1924 * - equal, we're done 1925 */ 1926 diff = cur->bc_ops->key_diff(cur, kp); 1927 if (diff < 0) 1928 low = keyno + 1; 1929 else if (diff > 0) 1930 high = keyno - 1; 1931 else 1932 break; 1933 } 1934 } 1935 1936 /* 1937 * If there are more levels, set up for the next level 1938 * by getting the block number and filling in the cursor. 1939 */ 1940 if (level > 0) { 1941 /* 1942 * If we moved left, need the previous key number, 1943 * unless there isn't one. 1944 */ 1945 if (diff > 0 && --keyno < 1) 1946 keyno = 1; 1947 pp = xfs_btree_ptr_addr(cur, keyno, block); 1948 1949 #ifdef DEBUG 1950 error = xfs_btree_check_ptr(cur, pp, 0, level); 1951 if (error) 1952 goto error0; 1953 #endif 1954 cur->bc_ptrs[level] = keyno; 1955 } 1956 } 1957 1958 /* Done with the search. See if we need to adjust the results. */ 1959 if (dir != XFS_LOOKUP_LE && diff < 0) { 1960 keyno++; 1961 /* 1962 * If ge search and we went off the end of the block, but it's 1963 * not the last block, we're in the wrong block. 1964 */ 1965 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1966 if (dir == XFS_LOOKUP_GE && 1967 keyno > xfs_btree_get_numrecs(block) && 1968 !xfs_btree_ptr_is_null(cur, &ptr)) { 1969 int i; 1970 1971 cur->bc_ptrs[0] = keyno; 1972 error = xfs_btree_increment(cur, 0, &i); 1973 if (error) 1974 goto error0; 1975 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); 1976 *stat = 1; 1977 return 0; 1978 } 1979 } else if (dir == XFS_LOOKUP_LE && diff > 0) 1980 keyno--; 1981 cur->bc_ptrs[0] = keyno; 1982 1983 /* Return if we succeeded or not. */ 1984 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 1985 *stat = 0; 1986 else if (dir != XFS_LOOKUP_EQ || diff == 0) 1987 *stat = 1; 1988 else 1989 *stat = 0; 1990 return 0; 1991 1992 error0: 1993 return error; 1994 } 1995 1996 /* Find the high key storage area from a regular key. */ 1997 union xfs_btree_key * 1998 xfs_btree_high_key_from_key( 1999 struct xfs_btree_cur *cur, 2000 union xfs_btree_key *key) 2001 { 2002 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2003 return (union xfs_btree_key *)((char *)key + 2004 (cur->bc_ops->key_len / 2)); 2005 } 2006 2007 /* Determine the low (and high if overlapped) keys of a leaf block */ 2008 STATIC void 2009 xfs_btree_get_leaf_keys( 2010 struct xfs_btree_cur *cur, 2011 struct xfs_btree_block *block, 2012 union xfs_btree_key *key) 2013 { 2014 union xfs_btree_key max_hkey; 2015 union xfs_btree_key hkey; 2016 union xfs_btree_rec *rec; 2017 union xfs_btree_key *high; 2018 int n; 2019 2020 rec = xfs_btree_rec_addr(cur, 1, block); 2021 cur->bc_ops->init_key_from_rec(key, rec); 2022 2023 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2024 2025 cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); 2026 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2027 rec = xfs_btree_rec_addr(cur, n, block); 2028 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 2029 if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey) 2030 > 0) 2031 max_hkey = hkey; 2032 } 2033 2034 high = xfs_btree_high_key_from_key(cur, key); 2035 memcpy(high, &max_hkey, cur->bc_ops->key_len / 2); 2036 } 2037 } 2038 2039 /* Determine the low (and high if overlapped) keys of a node block */ 2040 STATIC void 2041 xfs_btree_get_node_keys( 2042 struct xfs_btree_cur *cur, 2043 struct xfs_btree_block *block, 2044 union xfs_btree_key *key) 2045 { 2046 union xfs_btree_key *hkey; 2047 union xfs_btree_key *max_hkey; 2048 union xfs_btree_key *high; 2049 int n; 2050 2051 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2052 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2053 cur->bc_ops->key_len / 2); 2054 2055 max_hkey = xfs_btree_high_key_addr(cur, 1, block); 2056 for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { 2057 hkey = xfs_btree_high_key_addr(cur, n, block); 2058 if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0) 2059 max_hkey = hkey; 2060 } 2061 2062 high = xfs_btree_high_key_from_key(cur, key); 2063 memcpy(high, max_hkey, cur->bc_ops->key_len / 2); 2064 } else { 2065 memcpy(key, xfs_btree_key_addr(cur, 1, block), 2066 cur->bc_ops->key_len); 2067 } 2068 } 2069 2070 /* Derive the keys for any btree block. */ 2071 void 2072 xfs_btree_get_keys( 2073 struct xfs_btree_cur *cur, 2074 struct xfs_btree_block *block, 2075 union xfs_btree_key *key) 2076 { 2077 if (be16_to_cpu(block->bb_level) == 0) 2078 xfs_btree_get_leaf_keys(cur, block, key); 2079 else 2080 xfs_btree_get_node_keys(cur, block, key); 2081 } 2082 2083 /* 2084 * Decide if we need to update the parent keys of a btree block. For 2085 * a standard btree this is only necessary if we're updating the first 2086 * record/key. For an overlapping btree, we must always update the 2087 * keys because the highest key can be in any of the records or keys 2088 * in the block. 2089 */ 2090 static inline bool 2091 xfs_btree_needs_key_update( 2092 struct xfs_btree_cur *cur, 2093 int ptr) 2094 { 2095 return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; 2096 } 2097 2098 /* 2099 * Update the low and high parent keys of the given level, progressing 2100 * towards the root. If force_all is false, stop if the keys for a given 2101 * level do not need updating. 2102 */ 2103 STATIC int 2104 __xfs_btree_updkeys( 2105 struct xfs_btree_cur *cur, 2106 int level, 2107 struct xfs_btree_block *block, 2108 struct xfs_buf *bp0, 2109 bool force_all) 2110 { 2111 union xfs_btree_key key; /* keys from current level */ 2112 union xfs_btree_key *lkey; /* keys from the next level up */ 2113 union xfs_btree_key *hkey; 2114 union xfs_btree_key *nlkey; /* keys from the next level up */ 2115 union xfs_btree_key *nhkey; 2116 struct xfs_buf *bp; 2117 int ptr; 2118 2119 ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); 2120 2121 /* Exit if there aren't any parent levels to update. */ 2122 if (level + 1 >= cur->bc_nlevels) 2123 return 0; 2124 2125 trace_xfs_btree_updkeys(cur, level, bp0); 2126 2127 lkey = &key; 2128 hkey = xfs_btree_high_key_from_key(cur, lkey); 2129 xfs_btree_get_keys(cur, block, lkey); 2130 for (level++; level < cur->bc_nlevels; level++) { 2131 #ifdef DEBUG 2132 int error; 2133 #endif 2134 block = xfs_btree_get_block(cur, level, &bp); 2135 trace_xfs_btree_updkeys(cur, level, bp); 2136 #ifdef DEBUG 2137 error = xfs_btree_check_block(cur, block, level, bp); 2138 if (error) 2139 return error; 2140 #endif 2141 ptr = cur->bc_ptrs[level]; 2142 nlkey = xfs_btree_key_addr(cur, ptr, block); 2143 nhkey = xfs_btree_high_key_addr(cur, ptr, block); 2144 if (!force_all && 2145 !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 || 2146 cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0)) 2147 break; 2148 xfs_btree_copy_keys(cur, nlkey, lkey, 1); 2149 xfs_btree_log_keys(cur, bp, ptr, ptr); 2150 if (level + 1 >= cur->bc_nlevels) 2151 break; 2152 xfs_btree_get_node_keys(cur, block, lkey); 2153 } 2154 2155 return 0; 2156 } 2157 2158 /* Update all the keys from some level in cursor back to the root. */ 2159 STATIC int 2160 xfs_btree_updkeys_force( 2161 struct xfs_btree_cur *cur, 2162 int level) 2163 { 2164 struct xfs_buf *bp; 2165 struct xfs_btree_block *block; 2166 2167 block = xfs_btree_get_block(cur, level, &bp); 2168 return __xfs_btree_updkeys(cur, level, block, bp, true); 2169 } 2170 2171 /* 2172 * Update the parent keys of the given level, progressing towards the root. 2173 */ 2174 STATIC int 2175 xfs_btree_update_keys( 2176 struct xfs_btree_cur *cur, 2177 int level) 2178 { 2179 struct xfs_btree_block *block; 2180 struct xfs_buf *bp; 2181 union xfs_btree_key *kp; 2182 union xfs_btree_key key; 2183 int ptr; 2184 2185 ASSERT(level >= 0); 2186 2187 block = xfs_btree_get_block(cur, level, &bp); 2188 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) 2189 return __xfs_btree_updkeys(cur, level, block, bp, false); 2190 2191 /* 2192 * Go up the tree from this level toward the root. 2193 * At each level, update the key value to the value input. 2194 * Stop when we reach a level where the cursor isn't pointing 2195 * at the first entry in the block. 2196 */ 2197 xfs_btree_get_keys(cur, block, &key); 2198 for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 2199 #ifdef DEBUG 2200 int error; 2201 #endif 2202 block = xfs_btree_get_block(cur, level, &bp); 2203 #ifdef DEBUG 2204 error = xfs_btree_check_block(cur, block, level, bp); 2205 if (error) 2206 return error; 2207 #endif 2208 ptr = cur->bc_ptrs[level]; 2209 kp = xfs_btree_key_addr(cur, ptr, block); 2210 xfs_btree_copy_keys(cur, kp, &key, 1); 2211 xfs_btree_log_keys(cur, bp, ptr, ptr); 2212 } 2213 2214 return 0; 2215 } 2216 2217 /* 2218 * Update the record referred to by cur to the value in the 2219 * given record. This either works (return 0) or gets an 2220 * EFSCORRUPTED error. 2221 */ 2222 int 2223 xfs_btree_update( 2224 struct xfs_btree_cur *cur, 2225 union xfs_btree_rec *rec) 2226 { 2227 struct xfs_btree_block *block; 2228 struct xfs_buf *bp; 2229 int error; 2230 int ptr; 2231 union xfs_btree_rec *rp; 2232 2233 /* Pick up the current block. */ 2234 block = xfs_btree_get_block(cur, 0, &bp); 2235 2236 #ifdef DEBUG 2237 error = xfs_btree_check_block(cur, block, 0, bp); 2238 if (error) 2239 goto error0; 2240 #endif 2241 /* Get the address of the rec to be updated. */ 2242 ptr = cur->bc_ptrs[0]; 2243 rp = xfs_btree_rec_addr(cur, ptr, block); 2244 2245 /* Fill in the new contents and log them. */ 2246 xfs_btree_copy_recs(cur, rp, rec, 1); 2247 xfs_btree_log_recs(cur, bp, ptr, ptr); 2248 2249 /* 2250 * If we are tracking the last record in the tree and 2251 * we are at the far right edge of the tree, update it. 2252 */ 2253 if (xfs_btree_is_lastrec(cur, block, 0)) { 2254 cur->bc_ops->update_lastrec(cur, block, rec, 2255 ptr, LASTREC_UPDATE); 2256 } 2257 2258 /* Pass new key value up to our parent. */ 2259 if (xfs_btree_needs_key_update(cur, ptr)) { 2260 error = xfs_btree_update_keys(cur, 0); 2261 if (error) 2262 goto error0; 2263 } 2264 2265 return 0; 2266 2267 error0: 2268 return error; 2269 } 2270 2271 /* 2272 * Move 1 record left from cur/level if possible. 2273 * Update cur to reflect the new path. 2274 */ 2275 STATIC int /* error */ 2276 xfs_btree_lshift( 2277 struct xfs_btree_cur *cur, 2278 int level, 2279 int *stat) /* success/failure */ 2280 { 2281 struct xfs_buf *lbp; /* left buffer pointer */ 2282 struct xfs_btree_block *left; /* left btree block */ 2283 int lrecs; /* left record count */ 2284 struct xfs_buf *rbp; /* right buffer pointer */ 2285 struct xfs_btree_block *right; /* right btree block */ 2286 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2287 int rrecs; /* right record count */ 2288 union xfs_btree_ptr lptr; /* left btree pointer */ 2289 union xfs_btree_key *rkp = NULL; /* right btree key */ 2290 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 2291 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 2292 int error; /* error return value */ 2293 int i; 2294 2295 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2296 level == cur->bc_nlevels - 1) 2297 goto out0; 2298 2299 /* Set up variables for this block as "right". */ 2300 right = xfs_btree_get_block(cur, level, &rbp); 2301 2302 #ifdef DEBUG 2303 error = xfs_btree_check_block(cur, right, level, rbp); 2304 if (error) 2305 goto error0; 2306 #endif 2307 2308 /* If we've got no left sibling then we can't shift an entry left. */ 2309 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2310 if (xfs_btree_ptr_is_null(cur, &lptr)) 2311 goto out0; 2312 2313 /* 2314 * If the cursor entry is the one that would be moved, don't 2315 * do it... it's too complicated. 2316 */ 2317 if (cur->bc_ptrs[level] <= 1) 2318 goto out0; 2319 2320 /* Set up the left neighbor as "left". */ 2321 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2322 if (error) 2323 goto error0; 2324 2325 /* If it's full, it can't take another entry. */ 2326 lrecs = xfs_btree_get_numrecs(left); 2327 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2328 goto out0; 2329 2330 rrecs = xfs_btree_get_numrecs(right); 2331 2332 /* 2333 * We add one entry to the left side and remove one for the right side. 2334 * Account for it here, the changes will be updated on disk and logged 2335 * later. 2336 */ 2337 lrecs++; 2338 rrecs--; 2339 2340 XFS_BTREE_STATS_INC(cur, lshift); 2341 XFS_BTREE_STATS_ADD(cur, moves, 1); 2342 2343 /* 2344 * If non-leaf, copy a key and a ptr to the left block. 2345 * Log the changes to the left block. 2346 */ 2347 if (level > 0) { 2348 /* It's a non-leaf. Move keys and pointers. */ 2349 union xfs_btree_key *lkp; /* left btree key */ 2350 union xfs_btree_ptr *lpp; /* left address pointer */ 2351 2352 lkp = xfs_btree_key_addr(cur, lrecs, left); 2353 rkp = xfs_btree_key_addr(cur, 1, right); 2354 2355 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2356 rpp = xfs_btree_ptr_addr(cur, 1, right); 2357 #ifdef DEBUG 2358 error = xfs_btree_check_ptr(cur, rpp, 0, level); 2359 if (error) 2360 goto error0; 2361 #endif 2362 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2363 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2364 2365 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2366 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2367 2368 ASSERT(cur->bc_ops->keys_inorder(cur, 2369 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2370 } else { 2371 /* It's a leaf. Move records. */ 2372 union xfs_btree_rec *lrp; /* left record pointer */ 2373 2374 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2375 rrp = xfs_btree_rec_addr(cur, 1, right); 2376 2377 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2378 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2379 2380 ASSERT(cur->bc_ops->recs_inorder(cur, 2381 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2382 } 2383 2384 xfs_btree_set_numrecs(left, lrecs); 2385 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2386 2387 xfs_btree_set_numrecs(right, rrecs); 2388 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2389 2390 /* 2391 * Slide the contents of right down one entry. 2392 */ 2393 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2394 if (level > 0) { 2395 /* It's a nonleaf. operate on keys and ptrs */ 2396 #ifdef DEBUG 2397 int i; /* loop index */ 2398 2399 for (i = 0; i < rrecs; i++) { 2400 error = xfs_btree_check_ptr(cur, rpp, i + 1, level); 2401 if (error) 2402 goto error0; 2403 } 2404 #endif 2405 xfs_btree_shift_keys(cur, 2406 xfs_btree_key_addr(cur, 2, right), 2407 -1, rrecs); 2408 xfs_btree_shift_ptrs(cur, 2409 xfs_btree_ptr_addr(cur, 2, right), 2410 -1, rrecs); 2411 2412 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2413 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2414 } else { 2415 /* It's a leaf. operate on records */ 2416 xfs_btree_shift_recs(cur, 2417 xfs_btree_rec_addr(cur, 2, right), 2418 -1, rrecs); 2419 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2420 } 2421 2422 /* 2423 * Using a temporary cursor, update the parent key values of the 2424 * block on the left. 2425 */ 2426 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2427 error = xfs_btree_dup_cursor(cur, &tcur); 2428 if (error) 2429 goto error0; 2430 i = xfs_btree_firstrec(tcur, level); 2431 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2432 2433 error = xfs_btree_decrement(tcur, level, &i); 2434 if (error) 2435 goto error1; 2436 2437 /* Update the parent high keys of the left block, if needed. */ 2438 error = xfs_btree_update_keys(tcur, level); 2439 if (error) 2440 goto error1; 2441 2442 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2443 } 2444 2445 /* Update the parent keys of the right block. */ 2446 error = xfs_btree_update_keys(cur, level); 2447 if (error) 2448 goto error0; 2449 2450 /* Slide the cursor value left one. */ 2451 cur->bc_ptrs[level]--; 2452 2453 *stat = 1; 2454 return 0; 2455 2456 out0: 2457 *stat = 0; 2458 return 0; 2459 2460 error0: 2461 return error; 2462 2463 error1: 2464 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2465 return error; 2466 } 2467 2468 /* 2469 * Move 1 record right from cur/level if possible. 2470 * Update cur to reflect the new path. 2471 */ 2472 STATIC int /* error */ 2473 xfs_btree_rshift( 2474 struct xfs_btree_cur *cur, 2475 int level, 2476 int *stat) /* success/failure */ 2477 { 2478 struct xfs_buf *lbp; /* left buffer pointer */ 2479 struct xfs_btree_block *left; /* left btree block */ 2480 struct xfs_buf *rbp; /* right buffer pointer */ 2481 struct xfs_btree_block *right; /* right btree block */ 2482 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2483 union xfs_btree_ptr rptr; /* right block pointer */ 2484 union xfs_btree_key *rkp; /* right btree key */ 2485 int rrecs; /* right record count */ 2486 int lrecs; /* left record count */ 2487 int error; /* error return value */ 2488 int i; /* loop counter */ 2489 2490 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2491 (level == cur->bc_nlevels - 1)) 2492 goto out0; 2493 2494 /* Set up variables for this block as "left". */ 2495 left = xfs_btree_get_block(cur, level, &lbp); 2496 2497 #ifdef DEBUG 2498 error = xfs_btree_check_block(cur, left, level, lbp); 2499 if (error) 2500 goto error0; 2501 #endif 2502 2503 /* If we've got no right sibling then we can't shift an entry right. */ 2504 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2505 if (xfs_btree_ptr_is_null(cur, &rptr)) 2506 goto out0; 2507 2508 /* 2509 * If the cursor entry is the one that would be moved, don't 2510 * do it... it's too complicated. 2511 */ 2512 lrecs = xfs_btree_get_numrecs(left); 2513 if (cur->bc_ptrs[level] >= lrecs) 2514 goto out0; 2515 2516 /* Set up the right neighbor as "right". */ 2517 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2518 if (error) 2519 goto error0; 2520 2521 /* If it's full, it can't take another entry. */ 2522 rrecs = xfs_btree_get_numrecs(right); 2523 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2524 goto out0; 2525 2526 XFS_BTREE_STATS_INC(cur, rshift); 2527 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2528 2529 /* 2530 * Make a hole at the start of the right neighbor block, then 2531 * copy the last left block entry to the hole. 2532 */ 2533 if (level > 0) { 2534 /* It's a nonleaf. make a hole in the keys and ptrs */ 2535 union xfs_btree_key *lkp; 2536 union xfs_btree_ptr *lpp; 2537 union xfs_btree_ptr *rpp; 2538 2539 lkp = xfs_btree_key_addr(cur, lrecs, left); 2540 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2541 rkp = xfs_btree_key_addr(cur, 1, right); 2542 rpp = xfs_btree_ptr_addr(cur, 1, right); 2543 2544 #ifdef DEBUG 2545 for (i = rrecs - 1; i >= 0; i--) { 2546 error = xfs_btree_check_ptr(cur, rpp, i, level); 2547 if (error) 2548 goto error0; 2549 } 2550 #endif 2551 2552 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2553 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2554 2555 #ifdef DEBUG 2556 error = xfs_btree_check_ptr(cur, lpp, 0, level); 2557 if (error) 2558 goto error0; 2559 #endif 2560 2561 /* Now put the new data in, and log it. */ 2562 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2563 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2564 2565 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2566 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2567 2568 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2569 xfs_btree_key_addr(cur, 2, right))); 2570 } else { 2571 /* It's a leaf. make a hole in the records */ 2572 union xfs_btree_rec *lrp; 2573 union xfs_btree_rec *rrp; 2574 2575 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2576 rrp = xfs_btree_rec_addr(cur, 1, right); 2577 2578 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2579 2580 /* Now put the new data in, and log it. */ 2581 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2582 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2583 } 2584 2585 /* 2586 * Decrement and log left's numrecs, bump and log right's numrecs. 2587 */ 2588 xfs_btree_set_numrecs(left, --lrecs); 2589 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2590 2591 xfs_btree_set_numrecs(right, ++rrecs); 2592 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2593 2594 /* 2595 * Using a temporary cursor, update the parent key values of the 2596 * block on the right. 2597 */ 2598 error = xfs_btree_dup_cursor(cur, &tcur); 2599 if (error) 2600 goto error0; 2601 i = xfs_btree_lastrec(tcur, level); 2602 XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0); 2603 2604 error = xfs_btree_increment(tcur, level, &i); 2605 if (error) 2606 goto error1; 2607 2608 /* Update the parent high keys of the left block, if needed. */ 2609 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2610 error = xfs_btree_update_keys(cur, level); 2611 if (error) 2612 goto error1; 2613 } 2614 2615 /* Update the parent keys of the right block. */ 2616 error = xfs_btree_update_keys(tcur, level); 2617 if (error) 2618 goto error1; 2619 2620 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2621 2622 *stat = 1; 2623 return 0; 2624 2625 out0: 2626 *stat = 0; 2627 return 0; 2628 2629 error0: 2630 return error; 2631 2632 error1: 2633 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2634 return error; 2635 } 2636 2637 /* 2638 * Split cur/level block in half. 2639 * Return new block number and the key to its first 2640 * record (to be inserted into parent). 2641 */ 2642 STATIC int /* error */ 2643 __xfs_btree_split( 2644 struct xfs_btree_cur *cur, 2645 int level, 2646 union xfs_btree_ptr *ptrp, 2647 union xfs_btree_key *key, 2648 struct xfs_btree_cur **curp, 2649 int *stat) /* success/failure */ 2650 { 2651 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2652 struct xfs_buf *lbp; /* left buffer pointer */ 2653 struct xfs_btree_block *left; /* left btree block */ 2654 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2655 struct xfs_buf *rbp; /* right buffer pointer */ 2656 struct xfs_btree_block *right; /* right btree block */ 2657 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2658 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2659 struct xfs_btree_block *rrblock; /* right-right btree block */ 2660 int lrecs; 2661 int rrecs; 2662 int src_index; 2663 int error; /* error return value */ 2664 #ifdef DEBUG 2665 int i; 2666 #endif 2667 2668 XFS_BTREE_STATS_INC(cur, split); 2669 2670 /* Set up left block (current one). */ 2671 left = xfs_btree_get_block(cur, level, &lbp); 2672 2673 #ifdef DEBUG 2674 error = xfs_btree_check_block(cur, left, level, lbp); 2675 if (error) 2676 goto error0; 2677 #endif 2678 2679 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2680 2681 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2682 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2683 if (error) 2684 goto error0; 2685 if (*stat == 0) 2686 goto out0; 2687 XFS_BTREE_STATS_INC(cur, alloc); 2688 2689 /* Set up the new block as "right". */ 2690 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); 2691 if (error) 2692 goto error0; 2693 2694 /* Fill in the btree header for the new right block. */ 2695 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2696 2697 /* 2698 * Split the entries between the old and the new block evenly. 2699 * Make sure that if there's an odd number of entries now, that 2700 * each new block will have the same number of entries. 2701 */ 2702 lrecs = xfs_btree_get_numrecs(left); 2703 rrecs = lrecs / 2; 2704 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) 2705 rrecs++; 2706 src_index = (lrecs - rrecs + 1); 2707 2708 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2709 2710 /* Adjust numrecs for the later get_*_keys() calls. */ 2711 lrecs -= rrecs; 2712 xfs_btree_set_numrecs(left, lrecs); 2713 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2714 2715 /* 2716 * Copy btree block entries from the left block over to the 2717 * new block, the right. Update the right block and log the 2718 * changes. 2719 */ 2720 if (level > 0) { 2721 /* It's a non-leaf. Move keys and pointers. */ 2722 union xfs_btree_key *lkp; /* left btree key */ 2723 union xfs_btree_ptr *lpp; /* left address pointer */ 2724 union xfs_btree_key *rkp; /* right btree key */ 2725 union xfs_btree_ptr *rpp; /* right address pointer */ 2726 2727 lkp = xfs_btree_key_addr(cur, src_index, left); 2728 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2729 rkp = xfs_btree_key_addr(cur, 1, right); 2730 rpp = xfs_btree_ptr_addr(cur, 1, right); 2731 2732 #ifdef DEBUG 2733 for (i = src_index; i < rrecs; i++) { 2734 error = xfs_btree_check_ptr(cur, lpp, i, level); 2735 if (error) 2736 goto error0; 2737 } 2738 #endif 2739 2740 /* Copy the keys & pointers to the new block. */ 2741 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2742 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2743 2744 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2745 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2746 2747 /* Stash the keys of the new block for later insertion. */ 2748 xfs_btree_get_node_keys(cur, right, key); 2749 } else { 2750 /* It's a leaf. Move records. */ 2751 union xfs_btree_rec *lrp; /* left record pointer */ 2752 union xfs_btree_rec *rrp; /* right record pointer */ 2753 2754 lrp = xfs_btree_rec_addr(cur, src_index, left); 2755 rrp = xfs_btree_rec_addr(cur, 1, right); 2756 2757 /* Copy records to the new block. */ 2758 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2759 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2760 2761 /* Stash the keys of the new block for later insertion. */ 2762 xfs_btree_get_leaf_keys(cur, right, key); 2763 } 2764 2765 /* 2766 * Find the left block number by looking in the buffer. 2767 * Adjust sibling pointers. 2768 */ 2769 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2770 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2771 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2772 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2773 2774 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2775 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2776 2777 /* 2778 * If there's a block to the new block's right, make that block 2779 * point back to right instead of to left. 2780 */ 2781 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2782 error = xfs_btree_read_buf_block(cur, &rrptr, 2783 0, &rrblock, &rrbp); 2784 if (error) 2785 goto error0; 2786 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2787 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2788 } 2789 2790 /* Update the parent high keys of the left block, if needed. */ 2791 if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { 2792 error = xfs_btree_update_keys(cur, level); 2793 if (error) 2794 goto error0; 2795 } 2796 2797 /* 2798 * If the cursor is really in the right block, move it there. 2799 * If it's just pointing past the last entry in left, then we'll 2800 * insert there, so don't change anything in that case. 2801 */ 2802 if (cur->bc_ptrs[level] > lrecs + 1) { 2803 xfs_btree_setbuf(cur, level, rbp); 2804 cur->bc_ptrs[level] -= lrecs; 2805 } 2806 /* 2807 * If there are more levels, we'll need another cursor which refers 2808 * the right block, no matter where this cursor was. 2809 */ 2810 if (level + 1 < cur->bc_nlevels) { 2811 error = xfs_btree_dup_cursor(cur, curp); 2812 if (error) 2813 goto error0; 2814 (*curp)->bc_ptrs[level + 1]++; 2815 } 2816 *ptrp = rptr; 2817 *stat = 1; 2818 return 0; 2819 out0: 2820 *stat = 0; 2821 return 0; 2822 2823 error0: 2824 return error; 2825 } 2826 2827 struct xfs_btree_split_args { 2828 struct xfs_btree_cur *cur; 2829 int level; 2830 union xfs_btree_ptr *ptrp; 2831 union xfs_btree_key *key; 2832 struct xfs_btree_cur **curp; 2833 int *stat; /* success/failure */ 2834 int result; 2835 bool kswapd; /* allocation in kswapd context */ 2836 struct completion *done; 2837 struct work_struct work; 2838 }; 2839 2840 /* 2841 * Stack switching interfaces for allocation 2842 */ 2843 static void 2844 xfs_btree_split_worker( 2845 struct work_struct *work) 2846 { 2847 struct xfs_btree_split_args *args = container_of(work, 2848 struct xfs_btree_split_args, work); 2849 unsigned long pflags; 2850 unsigned long new_pflags = PF_MEMALLOC_NOFS; 2851 2852 /* 2853 * we are in a transaction context here, but may also be doing work 2854 * in kswapd context, and hence we may need to inherit that state 2855 * temporarily to ensure that we don't block waiting for memory reclaim 2856 * in any way. 2857 */ 2858 if (args->kswapd) 2859 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2860 2861 current_set_flags_nested(&pflags, new_pflags); 2862 2863 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2864 args->key, args->curp, args->stat); 2865 complete(args->done); 2866 2867 current_restore_flags_nested(&pflags, new_pflags); 2868 } 2869 2870 /* 2871 * BMBT split requests often come in with little stack to work on. Push 2872 * them off to a worker thread so there is lots of stack to use. For the other 2873 * btree types, just call directly to avoid the context switch overhead here. 2874 */ 2875 STATIC int /* error */ 2876 xfs_btree_split( 2877 struct xfs_btree_cur *cur, 2878 int level, 2879 union xfs_btree_ptr *ptrp, 2880 union xfs_btree_key *key, 2881 struct xfs_btree_cur **curp, 2882 int *stat) /* success/failure */ 2883 { 2884 struct xfs_btree_split_args args; 2885 DECLARE_COMPLETION_ONSTACK(done); 2886 2887 if (cur->bc_btnum != XFS_BTNUM_BMAP) 2888 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2889 2890 args.cur = cur; 2891 args.level = level; 2892 args.ptrp = ptrp; 2893 args.key = key; 2894 args.curp = curp; 2895 args.stat = stat; 2896 args.done = &done; 2897 args.kswapd = current_is_kswapd(); 2898 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2899 queue_work(xfs_alloc_wq, &args.work); 2900 wait_for_completion(&done); 2901 destroy_work_on_stack(&args.work); 2902 return args.result; 2903 } 2904 2905 2906 /* 2907 * Copy the old inode root contents into a real block and make the 2908 * broot point to it. 2909 */ 2910 int /* error */ 2911 xfs_btree_new_iroot( 2912 struct xfs_btree_cur *cur, /* btree cursor */ 2913 int *logflags, /* logging flags for inode */ 2914 int *stat) /* return status - 0 fail */ 2915 { 2916 struct xfs_buf *cbp; /* buffer for cblock */ 2917 struct xfs_btree_block *block; /* btree block */ 2918 struct xfs_btree_block *cblock; /* child btree block */ 2919 union xfs_btree_key *ckp; /* child key pointer */ 2920 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2921 union xfs_btree_key *kp; /* pointer to btree key */ 2922 union xfs_btree_ptr *pp; /* pointer to block addr */ 2923 union xfs_btree_ptr nptr; /* new block addr */ 2924 int level; /* btree level */ 2925 int error; /* error return code */ 2926 #ifdef DEBUG 2927 int i; /* loop counter */ 2928 #endif 2929 2930 XFS_BTREE_STATS_INC(cur, newroot); 2931 2932 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2933 2934 level = cur->bc_nlevels - 1; 2935 2936 block = xfs_btree_get_iroot(cur); 2937 pp = xfs_btree_ptr_addr(cur, 1, block); 2938 2939 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2940 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 2941 if (error) 2942 goto error0; 2943 if (*stat == 0) 2944 return 0; 2945 2946 XFS_BTREE_STATS_INC(cur, alloc); 2947 2948 /* Copy the root into a real block. */ 2949 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); 2950 if (error) 2951 goto error0; 2952 2953 /* 2954 * we can't just memcpy() the root in for CRC enabled btree blocks. 2955 * In that case have to also ensure the blkno remains correct 2956 */ 2957 memcpy(cblock, block, xfs_btree_block_len(cur)); 2958 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 2959 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 2960 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); 2961 else 2962 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); 2963 } 2964 2965 be16_add_cpu(&block->bb_level, 1); 2966 xfs_btree_set_numrecs(block, 1); 2967 cur->bc_nlevels++; 2968 cur->bc_ptrs[level + 1] = 1; 2969 2970 kp = xfs_btree_key_addr(cur, 1, block); 2971 ckp = xfs_btree_key_addr(cur, 1, cblock); 2972 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 2973 2974 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 2975 #ifdef DEBUG 2976 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 2977 error = xfs_btree_check_ptr(cur, pp, i, level); 2978 if (error) 2979 goto error0; 2980 } 2981 #endif 2982 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 2983 2984 #ifdef DEBUG 2985 error = xfs_btree_check_ptr(cur, &nptr, 0, level); 2986 if (error) 2987 goto error0; 2988 #endif 2989 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 2990 2991 xfs_iroot_realloc(cur->bc_private.b.ip, 2992 1 - xfs_btree_get_numrecs(cblock), 2993 cur->bc_private.b.whichfork); 2994 2995 xfs_btree_setbuf(cur, level, cbp); 2996 2997 /* 2998 * Do all this logging at the end so that 2999 * the root is at the right level. 3000 */ 3001 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 3002 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3003 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 3004 3005 *logflags |= 3006 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); 3007 *stat = 1; 3008 return 0; 3009 error0: 3010 return error; 3011 } 3012 3013 /* 3014 * Allocate a new root block, fill it in. 3015 */ 3016 STATIC int /* error */ 3017 xfs_btree_new_root( 3018 struct xfs_btree_cur *cur, /* btree cursor */ 3019 int *stat) /* success/failure */ 3020 { 3021 struct xfs_btree_block *block; /* one half of the old root block */ 3022 struct xfs_buf *bp; /* buffer containing block */ 3023 int error; /* error return value */ 3024 struct xfs_buf *lbp; /* left buffer pointer */ 3025 struct xfs_btree_block *left; /* left btree block */ 3026 struct xfs_buf *nbp; /* new (root) buffer */ 3027 struct xfs_btree_block *new; /* new (root) btree block */ 3028 int nptr; /* new value for key index, 1 or 2 */ 3029 struct xfs_buf *rbp; /* right buffer pointer */ 3030 struct xfs_btree_block *right; /* right btree block */ 3031 union xfs_btree_ptr rptr; 3032 union xfs_btree_ptr lptr; 3033 3034 XFS_BTREE_STATS_INC(cur, newroot); 3035 3036 /* initialise our start point from the cursor */ 3037 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 3038 3039 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 3040 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 3041 if (error) 3042 goto error0; 3043 if (*stat == 0) 3044 goto out0; 3045 XFS_BTREE_STATS_INC(cur, alloc); 3046 3047 /* Set up the new block. */ 3048 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); 3049 if (error) 3050 goto error0; 3051 3052 /* Set the root in the holding structure increasing the level by 1. */ 3053 cur->bc_ops->set_root(cur, &lptr, 1); 3054 3055 /* 3056 * At the previous root level there are now two blocks: the old root, 3057 * and the new block generated when it was split. We don't know which 3058 * one the cursor is pointing at, so we set up variables "left" and 3059 * "right" for each case. 3060 */ 3061 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 3062 3063 #ifdef DEBUG 3064 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 3065 if (error) 3066 goto error0; 3067 #endif 3068 3069 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3070 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3071 /* Our block is left, pick up the right block. */ 3072 lbp = bp; 3073 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 3074 left = block; 3075 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3076 if (error) 3077 goto error0; 3078 bp = rbp; 3079 nptr = 1; 3080 } else { 3081 /* Our block is right, pick up the left block. */ 3082 rbp = bp; 3083 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 3084 right = block; 3085 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 3086 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3087 if (error) 3088 goto error0; 3089 bp = lbp; 3090 nptr = 2; 3091 } 3092 3093 /* Fill in the new block's btree header and log it. */ 3094 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 3095 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 3096 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 3097 !xfs_btree_ptr_is_null(cur, &rptr)); 3098 3099 /* Fill in the key data in the new root. */ 3100 if (xfs_btree_get_level(left) > 0) { 3101 /* 3102 * Get the keys for the left block's keys and put them directly 3103 * in the parent block. Do the same for the right block. 3104 */ 3105 xfs_btree_get_node_keys(cur, left, 3106 xfs_btree_key_addr(cur, 1, new)); 3107 xfs_btree_get_node_keys(cur, right, 3108 xfs_btree_key_addr(cur, 2, new)); 3109 } else { 3110 /* 3111 * Get the keys for the left block's records and put them 3112 * directly in the parent block. Do the same for the right 3113 * block. 3114 */ 3115 xfs_btree_get_leaf_keys(cur, left, 3116 xfs_btree_key_addr(cur, 1, new)); 3117 xfs_btree_get_leaf_keys(cur, right, 3118 xfs_btree_key_addr(cur, 2, new)); 3119 } 3120 xfs_btree_log_keys(cur, nbp, 1, 2); 3121 3122 /* Fill in the pointer data in the new root. */ 3123 xfs_btree_copy_ptrs(cur, 3124 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 3125 xfs_btree_copy_ptrs(cur, 3126 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 3127 xfs_btree_log_ptrs(cur, nbp, 1, 2); 3128 3129 /* Fix up the cursor. */ 3130 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 3131 cur->bc_ptrs[cur->bc_nlevels] = nptr; 3132 cur->bc_nlevels++; 3133 *stat = 1; 3134 return 0; 3135 error0: 3136 return error; 3137 out0: 3138 *stat = 0; 3139 return 0; 3140 } 3141 3142 STATIC int 3143 xfs_btree_make_block_unfull( 3144 struct xfs_btree_cur *cur, /* btree cursor */ 3145 int level, /* btree level */ 3146 int numrecs,/* # of recs in block */ 3147 int *oindex,/* old tree index */ 3148 int *index, /* new tree index */ 3149 union xfs_btree_ptr *nptr, /* new btree ptr */ 3150 struct xfs_btree_cur **ncur, /* new btree cursor */ 3151 union xfs_btree_key *key, /* key of new block */ 3152 int *stat) 3153 { 3154 int error = 0; 3155 3156 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3157 level == cur->bc_nlevels - 1) { 3158 struct xfs_inode *ip = cur->bc_private.b.ip; 3159 3160 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 3161 /* A root block that can be made bigger. */ 3162 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 3163 *stat = 1; 3164 } else { 3165 /* A root block that needs replacing */ 3166 int logflags = 0; 3167 3168 error = xfs_btree_new_iroot(cur, &logflags, stat); 3169 if (error || *stat == 0) 3170 return error; 3171 3172 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 3173 } 3174 3175 return 0; 3176 } 3177 3178 /* First, try shifting an entry to the right neighbor. */ 3179 error = xfs_btree_rshift(cur, level, stat); 3180 if (error || *stat) 3181 return error; 3182 3183 /* Next, try shifting an entry to the left neighbor. */ 3184 error = xfs_btree_lshift(cur, level, stat); 3185 if (error) 3186 return error; 3187 3188 if (*stat) { 3189 *oindex = *index = cur->bc_ptrs[level]; 3190 return 0; 3191 } 3192 3193 /* 3194 * Next, try splitting the current block in half. 3195 * 3196 * If this works we have to re-set our variables because we 3197 * could be in a different block now. 3198 */ 3199 error = xfs_btree_split(cur, level, nptr, key, ncur, stat); 3200 if (error || *stat == 0) 3201 return error; 3202 3203 3204 *index = cur->bc_ptrs[level]; 3205 return 0; 3206 } 3207 3208 /* 3209 * Insert one record/level. Return information to the caller 3210 * allowing the next level up to proceed if necessary. 3211 */ 3212 STATIC int 3213 xfs_btree_insrec( 3214 struct xfs_btree_cur *cur, /* btree cursor */ 3215 int level, /* level to insert record at */ 3216 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 3217 union xfs_btree_rec *rec, /* record to insert */ 3218 union xfs_btree_key *key, /* i/o: block key for ptrp */ 3219 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 3220 int *stat) /* success/failure */ 3221 { 3222 struct xfs_btree_block *block; /* btree block */ 3223 struct xfs_buf *bp; /* buffer for block */ 3224 union xfs_btree_ptr nptr; /* new block ptr */ 3225 struct xfs_btree_cur *ncur; /* new btree cursor */ 3226 union xfs_btree_key nkey; /* new block key */ 3227 union xfs_btree_key *lkey; 3228 int optr; /* old key/record index */ 3229 int ptr; /* key/record index */ 3230 int numrecs;/* number of records */ 3231 int error; /* error return value */ 3232 #ifdef DEBUG 3233 int i; 3234 #endif 3235 xfs_daddr_t old_bn; 3236 3237 ncur = NULL; 3238 lkey = &nkey; 3239 3240 /* 3241 * If we have an external root pointer, and we've made it to the 3242 * root level, allocate a new root block and we're done. 3243 */ 3244 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 3245 (level >= cur->bc_nlevels)) { 3246 error = xfs_btree_new_root(cur, stat); 3247 xfs_btree_set_ptr_null(cur, ptrp); 3248 3249 return error; 3250 } 3251 3252 /* If we're off the left edge, return failure. */ 3253 ptr = cur->bc_ptrs[level]; 3254 if (ptr == 0) { 3255 *stat = 0; 3256 return 0; 3257 } 3258 3259 optr = ptr; 3260 3261 XFS_BTREE_STATS_INC(cur, insrec); 3262 3263 /* Get pointers to the btree buffer and block. */ 3264 block = xfs_btree_get_block(cur, level, &bp); 3265 old_bn = bp ? bp->b_bn : XFS_BUF_DADDR_NULL; 3266 numrecs = xfs_btree_get_numrecs(block); 3267 3268 #ifdef DEBUG 3269 error = xfs_btree_check_block(cur, block, level, bp); 3270 if (error) 3271 goto error0; 3272 3273 /* Check that the new entry is being inserted in the right place. */ 3274 if (ptr <= numrecs) { 3275 if (level == 0) { 3276 ASSERT(cur->bc_ops->recs_inorder(cur, rec, 3277 xfs_btree_rec_addr(cur, ptr, block))); 3278 } else { 3279 ASSERT(cur->bc_ops->keys_inorder(cur, key, 3280 xfs_btree_key_addr(cur, ptr, block))); 3281 } 3282 } 3283 #endif 3284 3285 /* 3286 * If the block is full, we can't insert the new entry until we 3287 * make the block un-full. 3288 */ 3289 xfs_btree_set_ptr_null(cur, &nptr); 3290 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 3291 error = xfs_btree_make_block_unfull(cur, level, numrecs, 3292 &optr, &ptr, &nptr, &ncur, lkey, stat); 3293 if (error || *stat == 0) 3294 goto error0; 3295 } 3296 3297 /* 3298 * The current block may have changed if the block was 3299 * previously full and we have just made space in it. 3300 */ 3301 block = xfs_btree_get_block(cur, level, &bp); 3302 numrecs = xfs_btree_get_numrecs(block); 3303 3304 #ifdef DEBUG 3305 error = xfs_btree_check_block(cur, block, level, bp); 3306 if (error) 3307 return error; 3308 #endif 3309 3310 /* 3311 * At this point we know there's room for our new entry in the block 3312 * we're pointing at. 3313 */ 3314 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3315 3316 if (level > 0) { 3317 /* It's a nonleaf. make a hole in the keys and ptrs */ 3318 union xfs_btree_key *kp; 3319 union xfs_btree_ptr *pp; 3320 3321 kp = xfs_btree_key_addr(cur, ptr, block); 3322 pp = xfs_btree_ptr_addr(cur, ptr, block); 3323 3324 #ifdef DEBUG 3325 for (i = numrecs - ptr; i >= 0; i--) { 3326 error = xfs_btree_check_ptr(cur, pp, i, level); 3327 if (error) 3328 return error; 3329 } 3330 #endif 3331 3332 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3333 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3334 3335 #ifdef DEBUG 3336 error = xfs_btree_check_ptr(cur, ptrp, 0, level); 3337 if (error) 3338 goto error0; 3339 #endif 3340 3341 /* Now put the new data in, bump numrecs and log it. */ 3342 xfs_btree_copy_keys(cur, kp, key, 1); 3343 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3344 numrecs++; 3345 xfs_btree_set_numrecs(block, numrecs); 3346 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3347 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3348 #ifdef DEBUG 3349 if (ptr < numrecs) { 3350 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3351 xfs_btree_key_addr(cur, ptr + 1, block))); 3352 } 3353 #endif 3354 } else { 3355 /* It's a leaf. make a hole in the records */ 3356 union xfs_btree_rec *rp; 3357 3358 rp = xfs_btree_rec_addr(cur, ptr, block); 3359 3360 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3361 3362 /* Now put the new data in, bump numrecs and log it. */ 3363 xfs_btree_copy_recs(cur, rp, rec, 1); 3364 xfs_btree_set_numrecs(block, ++numrecs); 3365 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3366 #ifdef DEBUG 3367 if (ptr < numrecs) { 3368 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3369 xfs_btree_rec_addr(cur, ptr + 1, block))); 3370 } 3371 #endif 3372 } 3373 3374 /* Log the new number of records in the btree header. */ 3375 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3376 3377 /* 3378 * If we just inserted into a new tree block, we have to 3379 * recalculate nkey here because nkey is out of date. 3380 * 3381 * Otherwise we're just updating an existing block (having shoved 3382 * some records into the new tree block), so use the regular key 3383 * update mechanism. 3384 */ 3385 if (bp && bp->b_bn != old_bn) { 3386 xfs_btree_get_keys(cur, block, lkey); 3387 } else if (xfs_btree_needs_key_update(cur, optr)) { 3388 error = xfs_btree_update_keys(cur, level); 3389 if (error) 3390 goto error0; 3391 } 3392 3393 /* 3394 * If we are tracking the last record in the tree and 3395 * we are at the far right edge of the tree, update it. 3396 */ 3397 if (xfs_btree_is_lastrec(cur, block, level)) { 3398 cur->bc_ops->update_lastrec(cur, block, rec, 3399 ptr, LASTREC_INSREC); 3400 } 3401 3402 /* 3403 * Return the new block number, if any. 3404 * If there is one, give back a record value and a cursor too. 3405 */ 3406 *ptrp = nptr; 3407 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3408 xfs_btree_copy_keys(cur, key, lkey, 1); 3409 *curp = ncur; 3410 } 3411 3412 *stat = 1; 3413 return 0; 3414 3415 error0: 3416 return error; 3417 } 3418 3419 /* 3420 * Insert the record at the point referenced by cur. 3421 * 3422 * A multi-level split of the tree on insert will invalidate the original 3423 * cursor. All callers of this function should assume that the cursor is 3424 * no longer valid and revalidate it. 3425 */ 3426 int 3427 xfs_btree_insert( 3428 struct xfs_btree_cur *cur, 3429 int *stat) 3430 { 3431 int error; /* error return value */ 3432 int i; /* result value, 0 for failure */ 3433 int level; /* current level number in btree */ 3434 union xfs_btree_ptr nptr; /* new block number (split result) */ 3435 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3436 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3437 union xfs_btree_key bkey; /* key of block to insert */ 3438 union xfs_btree_key *key; 3439 union xfs_btree_rec rec; /* record to insert */ 3440 3441 level = 0; 3442 ncur = NULL; 3443 pcur = cur; 3444 key = &bkey; 3445 3446 xfs_btree_set_ptr_null(cur, &nptr); 3447 3448 /* Make a key out of the record data to be inserted, and save it. */ 3449 cur->bc_ops->init_rec_from_cur(cur, &rec); 3450 cur->bc_ops->init_key_from_rec(key, &rec); 3451 3452 /* 3453 * Loop going up the tree, starting at the leaf level. 3454 * Stop when we don't get a split block, that must mean that 3455 * the insert is finished with this level. 3456 */ 3457 do { 3458 /* 3459 * Insert nrec/nptr into this level of the tree. 3460 * Note if we fail, nptr will be null. 3461 */ 3462 error = xfs_btree_insrec(pcur, level, &nptr, &rec, key, 3463 &ncur, &i); 3464 if (error) { 3465 if (pcur != cur) 3466 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3467 goto error0; 3468 } 3469 3470 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3471 level++; 3472 3473 /* 3474 * See if the cursor we just used is trash. 3475 * Can't trash the caller's cursor, but otherwise we should 3476 * if ncur is a new cursor or we're about to be done. 3477 */ 3478 if (pcur != cur && 3479 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3480 /* Save the state from the cursor before we trash it */ 3481 if (cur->bc_ops->update_cursor) 3482 cur->bc_ops->update_cursor(pcur, cur); 3483 cur->bc_nlevels = pcur->bc_nlevels; 3484 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3485 } 3486 /* If we got a new cursor, switch to it. */ 3487 if (ncur) { 3488 pcur = ncur; 3489 ncur = NULL; 3490 } 3491 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3492 3493 *stat = i; 3494 return 0; 3495 error0: 3496 return error; 3497 } 3498 3499 /* 3500 * Try to merge a non-leaf block back into the inode root. 3501 * 3502 * Note: the killroot names comes from the fact that we're effectively 3503 * killing the old root block. But because we can't just delete the 3504 * inode we have to copy the single block it was pointing to into the 3505 * inode. 3506 */ 3507 STATIC int 3508 xfs_btree_kill_iroot( 3509 struct xfs_btree_cur *cur) 3510 { 3511 int whichfork = cur->bc_private.b.whichfork; 3512 struct xfs_inode *ip = cur->bc_private.b.ip; 3513 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3514 struct xfs_btree_block *block; 3515 struct xfs_btree_block *cblock; 3516 union xfs_btree_key *kp; 3517 union xfs_btree_key *ckp; 3518 union xfs_btree_ptr *pp; 3519 union xfs_btree_ptr *cpp; 3520 struct xfs_buf *cbp; 3521 int level; 3522 int index; 3523 int numrecs; 3524 int error; 3525 #ifdef DEBUG 3526 union xfs_btree_ptr ptr; 3527 int i; 3528 #endif 3529 3530 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3531 ASSERT(cur->bc_nlevels > 1); 3532 3533 /* 3534 * Don't deal with the root block needs to be a leaf case. 3535 * We're just going to turn the thing back into extents anyway. 3536 */ 3537 level = cur->bc_nlevels - 1; 3538 if (level == 1) 3539 goto out0; 3540 3541 /* 3542 * Give up if the root has multiple children. 3543 */ 3544 block = xfs_btree_get_iroot(cur); 3545 if (xfs_btree_get_numrecs(block) != 1) 3546 goto out0; 3547 3548 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3549 numrecs = xfs_btree_get_numrecs(cblock); 3550 3551 /* 3552 * Only do this if the next level will fit. 3553 * Then the data must be copied up to the inode, 3554 * instead of freeing the root you free the next level. 3555 */ 3556 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3557 goto out0; 3558 3559 XFS_BTREE_STATS_INC(cur, killroot); 3560 3561 #ifdef DEBUG 3562 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3563 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3564 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3565 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3566 #endif 3567 3568 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3569 if (index) { 3570 xfs_iroot_realloc(cur->bc_private.b.ip, index, 3571 cur->bc_private.b.whichfork); 3572 block = ifp->if_broot; 3573 } 3574 3575 be16_add_cpu(&block->bb_numrecs, index); 3576 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3577 3578 kp = xfs_btree_key_addr(cur, 1, block); 3579 ckp = xfs_btree_key_addr(cur, 1, cblock); 3580 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3581 3582 pp = xfs_btree_ptr_addr(cur, 1, block); 3583 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3584 #ifdef DEBUG 3585 for (i = 0; i < numrecs; i++) { 3586 error = xfs_btree_check_ptr(cur, cpp, i, level - 1); 3587 if (error) 3588 return error; 3589 } 3590 #endif 3591 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3592 3593 error = xfs_btree_free_block(cur, cbp); 3594 if (error) 3595 return error; 3596 3597 cur->bc_bufs[level - 1] = NULL; 3598 be16_add_cpu(&block->bb_level, -1); 3599 xfs_trans_log_inode(cur->bc_tp, ip, 3600 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 3601 cur->bc_nlevels--; 3602 out0: 3603 return 0; 3604 } 3605 3606 /* 3607 * Kill the current root node, and replace it with it's only child node. 3608 */ 3609 STATIC int 3610 xfs_btree_kill_root( 3611 struct xfs_btree_cur *cur, 3612 struct xfs_buf *bp, 3613 int level, 3614 union xfs_btree_ptr *newroot) 3615 { 3616 int error; 3617 3618 XFS_BTREE_STATS_INC(cur, killroot); 3619 3620 /* 3621 * Update the root pointer, decreasing the level by 1 and then 3622 * free the old root. 3623 */ 3624 cur->bc_ops->set_root(cur, newroot, -1); 3625 3626 error = xfs_btree_free_block(cur, bp); 3627 if (error) 3628 return error; 3629 3630 cur->bc_bufs[level] = NULL; 3631 cur->bc_ra[level] = 0; 3632 cur->bc_nlevels--; 3633 3634 return 0; 3635 } 3636 3637 STATIC int 3638 xfs_btree_dec_cursor( 3639 struct xfs_btree_cur *cur, 3640 int level, 3641 int *stat) 3642 { 3643 int error; 3644 int i; 3645 3646 if (level > 0) { 3647 error = xfs_btree_decrement(cur, level, &i); 3648 if (error) 3649 return error; 3650 } 3651 3652 *stat = 1; 3653 return 0; 3654 } 3655 3656 /* 3657 * Single level of the btree record deletion routine. 3658 * Delete record pointed to by cur/level. 3659 * Remove the record from its block then rebalance the tree. 3660 * Return 0 for error, 1 for done, 2 to go on to the next level. 3661 */ 3662 STATIC int /* error */ 3663 xfs_btree_delrec( 3664 struct xfs_btree_cur *cur, /* btree cursor */ 3665 int level, /* level removing record from */ 3666 int *stat) /* fail/done/go-on */ 3667 { 3668 struct xfs_btree_block *block; /* btree block */ 3669 union xfs_btree_ptr cptr; /* current block ptr */ 3670 struct xfs_buf *bp; /* buffer for block */ 3671 int error; /* error return value */ 3672 int i; /* loop counter */ 3673 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3674 struct xfs_buf *lbp; /* left buffer pointer */ 3675 struct xfs_btree_block *left; /* left btree block */ 3676 int lrecs = 0; /* left record count */ 3677 int ptr; /* key/record index */ 3678 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3679 struct xfs_buf *rbp; /* right buffer pointer */ 3680 struct xfs_btree_block *right; /* right btree block */ 3681 struct xfs_btree_block *rrblock; /* right-right btree block */ 3682 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3683 int rrecs = 0; /* right record count */ 3684 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3685 int numrecs; /* temporary numrec count */ 3686 3687 tcur = NULL; 3688 3689 /* Get the index of the entry being deleted, check for nothing there. */ 3690 ptr = cur->bc_ptrs[level]; 3691 if (ptr == 0) { 3692 *stat = 0; 3693 return 0; 3694 } 3695 3696 /* Get the buffer & block containing the record or key/ptr. */ 3697 block = xfs_btree_get_block(cur, level, &bp); 3698 numrecs = xfs_btree_get_numrecs(block); 3699 3700 #ifdef DEBUG 3701 error = xfs_btree_check_block(cur, block, level, bp); 3702 if (error) 3703 goto error0; 3704 #endif 3705 3706 /* Fail if we're off the end of the block. */ 3707 if (ptr > numrecs) { 3708 *stat = 0; 3709 return 0; 3710 } 3711 3712 XFS_BTREE_STATS_INC(cur, delrec); 3713 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3714 3715 /* Excise the entries being deleted. */ 3716 if (level > 0) { 3717 /* It's a nonleaf. operate on keys and ptrs */ 3718 union xfs_btree_key *lkp; 3719 union xfs_btree_ptr *lpp; 3720 3721 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3722 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3723 3724 #ifdef DEBUG 3725 for (i = 0; i < numrecs - ptr; i++) { 3726 error = xfs_btree_check_ptr(cur, lpp, i, level); 3727 if (error) 3728 goto error0; 3729 } 3730 #endif 3731 3732 if (ptr < numrecs) { 3733 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3734 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3735 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3736 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3737 } 3738 } else { 3739 /* It's a leaf. operate on records */ 3740 if (ptr < numrecs) { 3741 xfs_btree_shift_recs(cur, 3742 xfs_btree_rec_addr(cur, ptr + 1, block), 3743 -1, numrecs - ptr); 3744 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3745 } 3746 } 3747 3748 /* 3749 * Decrement and log the number of entries in the block. 3750 */ 3751 xfs_btree_set_numrecs(block, --numrecs); 3752 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3753 3754 /* 3755 * If we are tracking the last record in the tree and 3756 * we are at the far right edge of the tree, update it. 3757 */ 3758 if (xfs_btree_is_lastrec(cur, block, level)) { 3759 cur->bc_ops->update_lastrec(cur, block, NULL, 3760 ptr, LASTREC_DELREC); 3761 } 3762 3763 /* 3764 * We're at the root level. First, shrink the root block in-memory. 3765 * Try to get rid of the next level down. If we can't then there's 3766 * nothing left to do. 3767 */ 3768 if (level == cur->bc_nlevels - 1) { 3769 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3770 xfs_iroot_realloc(cur->bc_private.b.ip, -1, 3771 cur->bc_private.b.whichfork); 3772 3773 error = xfs_btree_kill_iroot(cur); 3774 if (error) 3775 goto error0; 3776 3777 error = xfs_btree_dec_cursor(cur, level, stat); 3778 if (error) 3779 goto error0; 3780 *stat = 1; 3781 return 0; 3782 } 3783 3784 /* 3785 * If this is the root level, and there's only one entry left, 3786 * and it's NOT the leaf level, then we can get rid of this 3787 * level. 3788 */ 3789 if (numrecs == 1 && level > 0) { 3790 union xfs_btree_ptr *pp; 3791 /* 3792 * pp is still set to the first pointer in the block. 3793 * Make it the new root of the btree. 3794 */ 3795 pp = xfs_btree_ptr_addr(cur, 1, block); 3796 error = xfs_btree_kill_root(cur, bp, level, pp); 3797 if (error) 3798 goto error0; 3799 } else if (level > 0) { 3800 error = xfs_btree_dec_cursor(cur, level, stat); 3801 if (error) 3802 goto error0; 3803 } 3804 *stat = 1; 3805 return 0; 3806 } 3807 3808 /* 3809 * If we deleted the leftmost entry in the block, update the 3810 * key values above us in the tree. 3811 */ 3812 if (xfs_btree_needs_key_update(cur, ptr)) { 3813 error = xfs_btree_update_keys(cur, level); 3814 if (error) 3815 goto error0; 3816 } 3817 3818 /* 3819 * If the number of records remaining in the block is at least 3820 * the minimum, we're done. 3821 */ 3822 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3823 error = xfs_btree_dec_cursor(cur, level, stat); 3824 if (error) 3825 goto error0; 3826 return 0; 3827 } 3828 3829 /* 3830 * Otherwise, we have to move some records around to keep the 3831 * tree balanced. Look at the left and right sibling blocks to 3832 * see if we can re-balance by moving only one record. 3833 */ 3834 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3835 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3836 3837 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3838 /* 3839 * One child of root, need to get a chance to copy its contents 3840 * into the root and delete it. Can't go up to next level, 3841 * there's nothing to delete there. 3842 */ 3843 if (xfs_btree_ptr_is_null(cur, &rptr) && 3844 xfs_btree_ptr_is_null(cur, &lptr) && 3845 level == cur->bc_nlevels - 2) { 3846 error = xfs_btree_kill_iroot(cur); 3847 if (!error) 3848 error = xfs_btree_dec_cursor(cur, level, stat); 3849 if (error) 3850 goto error0; 3851 return 0; 3852 } 3853 } 3854 3855 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3856 !xfs_btree_ptr_is_null(cur, &lptr)); 3857 3858 /* 3859 * Duplicate the cursor so our btree manipulations here won't 3860 * disrupt the next level up. 3861 */ 3862 error = xfs_btree_dup_cursor(cur, &tcur); 3863 if (error) 3864 goto error0; 3865 3866 /* 3867 * If there's a right sibling, see if it's ok to shift an entry 3868 * out of it. 3869 */ 3870 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3871 /* 3872 * Move the temp cursor to the last entry in the next block. 3873 * Actually any entry but the first would suffice. 3874 */ 3875 i = xfs_btree_lastrec(tcur, level); 3876 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3877 3878 error = xfs_btree_increment(tcur, level, &i); 3879 if (error) 3880 goto error0; 3881 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3882 3883 i = xfs_btree_lastrec(tcur, level); 3884 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3885 3886 /* Grab a pointer to the block. */ 3887 right = xfs_btree_get_block(tcur, level, &rbp); 3888 #ifdef DEBUG 3889 error = xfs_btree_check_block(tcur, right, level, rbp); 3890 if (error) 3891 goto error0; 3892 #endif 3893 /* Grab the current block number, for future use. */ 3894 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3895 3896 /* 3897 * If right block is full enough so that removing one entry 3898 * won't make it too empty, and left-shifting an entry out 3899 * of right to us works, we're done. 3900 */ 3901 if (xfs_btree_get_numrecs(right) - 1 >= 3902 cur->bc_ops->get_minrecs(tcur, level)) { 3903 error = xfs_btree_lshift(tcur, level, &i); 3904 if (error) 3905 goto error0; 3906 if (i) { 3907 ASSERT(xfs_btree_get_numrecs(block) >= 3908 cur->bc_ops->get_minrecs(tcur, level)); 3909 3910 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3911 tcur = NULL; 3912 3913 error = xfs_btree_dec_cursor(cur, level, stat); 3914 if (error) 3915 goto error0; 3916 return 0; 3917 } 3918 } 3919 3920 /* 3921 * Otherwise, grab the number of records in right for 3922 * future reference, and fix up the temp cursor to point 3923 * to our block again (last record). 3924 */ 3925 rrecs = xfs_btree_get_numrecs(right); 3926 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3927 i = xfs_btree_firstrec(tcur, level); 3928 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3929 3930 error = xfs_btree_decrement(tcur, level, &i); 3931 if (error) 3932 goto error0; 3933 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3934 } 3935 } 3936 3937 /* 3938 * If there's a left sibling, see if it's ok to shift an entry 3939 * out of it. 3940 */ 3941 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3942 /* 3943 * Move the temp cursor to the first entry in the 3944 * previous block. 3945 */ 3946 i = xfs_btree_firstrec(tcur, level); 3947 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3948 3949 error = xfs_btree_decrement(tcur, level, &i); 3950 if (error) 3951 goto error0; 3952 i = xfs_btree_firstrec(tcur, level); 3953 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3954 3955 /* Grab a pointer to the block. */ 3956 left = xfs_btree_get_block(tcur, level, &lbp); 3957 #ifdef DEBUG 3958 error = xfs_btree_check_block(cur, left, level, lbp); 3959 if (error) 3960 goto error0; 3961 #endif 3962 /* Grab the current block number, for future use. */ 3963 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 3964 3965 /* 3966 * If left block is full enough so that removing one entry 3967 * won't make it too empty, and right-shifting an entry out 3968 * of left to us works, we're done. 3969 */ 3970 if (xfs_btree_get_numrecs(left) - 1 >= 3971 cur->bc_ops->get_minrecs(tcur, level)) { 3972 error = xfs_btree_rshift(tcur, level, &i); 3973 if (error) 3974 goto error0; 3975 if (i) { 3976 ASSERT(xfs_btree_get_numrecs(block) >= 3977 cur->bc_ops->get_minrecs(tcur, level)); 3978 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3979 tcur = NULL; 3980 if (level == 0) 3981 cur->bc_ptrs[0]++; 3982 3983 *stat = 1; 3984 return 0; 3985 } 3986 } 3987 3988 /* 3989 * Otherwise, grab the number of records in right for 3990 * future reference. 3991 */ 3992 lrecs = xfs_btree_get_numrecs(left); 3993 } 3994 3995 /* Delete the temp cursor, we're done with it. */ 3996 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3997 tcur = NULL; 3998 3999 /* If here, we need to do a join to keep the tree balanced. */ 4000 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 4001 4002 if (!xfs_btree_ptr_is_null(cur, &lptr) && 4003 lrecs + xfs_btree_get_numrecs(block) <= 4004 cur->bc_ops->get_maxrecs(cur, level)) { 4005 /* 4006 * Set "right" to be the starting block, 4007 * "left" to be the left neighbor. 4008 */ 4009 rptr = cptr; 4010 right = block; 4011 rbp = bp; 4012 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 4013 if (error) 4014 goto error0; 4015 4016 /* 4017 * If that won't work, see if we can join with the right neighbor block. 4018 */ 4019 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 4020 rrecs + xfs_btree_get_numrecs(block) <= 4021 cur->bc_ops->get_maxrecs(cur, level)) { 4022 /* 4023 * Set "left" to be the starting block, 4024 * "right" to be the right neighbor. 4025 */ 4026 lptr = cptr; 4027 left = block; 4028 lbp = bp; 4029 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 4030 if (error) 4031 goto error0; 4032 4033 /* 4034 * Otherwise, we can't fix the imbalance. 4035 * Just return. This is probably a logic error, but it's not fatal. 4036 */ 4037 } else { 4038 error = xfs_btree_dec_cursor(cur, level, stat); 4039 if (error) 4040 goto error0; 4041 return 0; 4042 } 4043 4044 rrecs = xfs_btree_get_numrecs(right); 4045 lrecs = xfs_btree_get_numrecs(left); 4046 4047 /* 4048 * We're now going to join "left" and "right" by moving all the stuff 4049 * in "right" to "left" and deleting "right". 4050 */ 4051 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 4052 if (level > 0) { 4053 /* It's a non-leaf. Move keys and pointers. */ 4054 union xfs_btree_key *lkp; /* left btree key */ 4055 union xfs_btree_ptr *lpp; /* left address pointer */ 4056 union xfs_btree_key *rkp; /* right btree key */ 4057 union xfs_btree_ptr *rpp; /* right address pointer */ 4058 4059 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 4060 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 4061 rkp = xfs_btree_key_addr(cur, 1, right); 4062 rpp = xfs_btree_ptr_addr(cur, 1, right); 4063 #ifdef DEBUG 4064 for (i = 1; i < rrecs; i++) { 4065 error = xfs_btree_check_ptr(cur, rpp, i, level); 4066 if (error) 4067 goto error0; 4068 } 4069 #endif 4070 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 4071 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 4072 4073 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 4074 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 4075 } else { 4076 /* It's a leaf. Move records. */ 4077 union xfs_btree_rec *lrp; /* left record pointer */ 4078 union xfs_btree_rec *rrp; /* right record pointer */ 4079 4080 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 4081 rrp = xfs_btree_rec_addr(cur, 1, right); 4082 4083 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 4084 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 4085 } 4086 4087 XFS_BTREE_STATS_INC(cur, join); 4088 4089 /* 4090 * Fix up the number of records and right block pointer in the 4091 * surviving block, and log it. 4092 */ 4093 xfs_btree_set_numrecs(left, lrecs + rrecs); 4094 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), 4095 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4096 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 4097 4098 /* If there is a right sibling, point it to the remaining block. */ 4099 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 4100 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 4101 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 4102 if (error) 4103 goto error0; 4104 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 4105 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 4106 } 4107 4108 /* Free the deleted block. */ 4109 error = xfs_btree_free_block(cur, rbp); 4110 if (error) 4111 goto error0; 4112 4113 /* 4114 * If we joined with the left neighbor, set the buffer in the 4115 * cursor to the left block, and fix up the index. 4116 */ 4117 if (bp != lbp) { 4118 cur->bc_bufs[level] = lbp; 4119 cur->bc_ptrs[level] += lrecs; 4120 cur->bc_ra[level] = 0; 4121 } 4122 /* 4123 * If we joined with the right neighbor and there's a level above 4124 * us, increment the cursor at that level. 4125 */ 4126 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 4127 (level + 1 < cur->bc_nlevels)) { 4128 error = xfs_btree_increment(cur, level + 1, &i); 4129 if (error) 4130 goto error0; 4131 } 4132 4133 /* 4134 * Readjust the ptr at this level if it's not a leaf, since it's 4135 * still pointing at the deletion point, which makes the cursor 4136 * inconsistent. If this makes the ptr 0, the caller fixes it up. 4137 * We can't use decrement because it would change the next level up. 4138 */ 4139 if (level > 0) 4140 cur->bc_ptrs[level]--; 4141 4142 /* 4143 * We combined blocks, so we have to update the parent keys if the 4144 * btree supports overlapped intervals. However, bc_ptrs[level + 1] 4145 * points to the old block so that the caller knows which record to 4146 * delete. Therefore, the caller must be savvy enough to call updkeys 4147 * for us if we return stat == 2. The other exit points from this 4148 * function don't require deletions further up the tree, so they can 4149 * call updkeys directly. 4150 */ 4151 4152 /* Return value means the next level up has something to do. */ 4153 *stat = 2; 4154 return 0; 4155 4156 error0: 4157 if (tcur) 4158 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 4159 return error; 4160 } 4161 4162 /* 4163 * Delete the record pointed to by cur. 4164 * The cursor refers to the place where the record was (could be inserted) 4165 * when the operation returns. 4166 */ 4167 int /* error */ 4168 xfs_btree_delete( 4169 struct xfs_btree_cur *cur, 4170 int *stat) /* success/failure */ 4171 { 4172 int error; /* error return value */ 4173 int level; 4174 int i; 4175 bool joined = false; 4176 4177 /* 4178 * Go up the tree, starting at leaf level. 4179 * 4180 * If 2 is returned then a join was done; go to the next level. 4181 * Otherwise we are done. 4182 */ 4183 for (level = 0, i = 2; i == 2; level++) { 4184 error = xfs_btree_delrec(cur, level, &i); 4185 if (error) 4186 goto error0; 4187 if (i == 2) 4188 joined = true; 4189 } 4190 4191 /* 4192 * If we combined blocks as part of deleting the record, delrec won't 4193 * have updated the parent high keys so we have to do that here. 4194 */ 4195 if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { 4196 error = xfs_btree_updkeys_force(cur, 0); 4197 if (error) 4198 goto error0; 4199 } 4200 4201 if (i == 0) { 4202 for (level = 1; level < cur->bc_nlevels; level++) { 4203 if (cur->bc_ptrs[level] == 0) { 4204 error = xfs_btree_decrement(cur, level, &i); 4205 if (error) 4206 goto error0; 4207 break; 4208 } 4209 } 4210 } 4211 4212 *stat = i; 4213 return 0; 4214 error0: 4215 return error; 4216 } 4217 4218 /* 4219 * Get the data from the pointed-to record. 4220 */ 4221 int /* error */ 4222 xfs_btree_get_rec( 4223 struct xfs_btree_cur *cur, /* btree cursor */ 4224 union xfs_btree_rec **recp, /* output: btree record */ 4225 int *stat) /* output: success/failure */ 4226 { 4227 struct xfs_btree_block *block; /* btree block */ 4228 struct xfs_buf *bp; /* buffer pointer */ 4229 int ptr; /* record number */ 4230 #ifdef DEBUG 4231 int error; /* error return value */ 4232 #endif 4233 4234 ptr = cur->bc_ptrs[0]; 4235 block = xfs_btree_get_block(cur, 0, &bp); 4236 4237 #ifdef DEBUG 4238 error = xfs_btree_check_block(cur, block, 0, bp); 4239 if (error) 4240 return error; 4241 #endif 4242 4243 /* 4244 * Off the right end or left end, return failure. 4245 */ 4246 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 4247 *stat = 0; 4248 return 0; 4249 } 4250 4251 /* 4252 * Point to the record and extract its data. 4253 */ 4254 *recp = xfs_btree_rec_addr(cur, ptr, block); 4255 *stat = 1; 4256 return 0; 4257 } 4258 4259 /* Visit a block in a btree. */ 4260 STATIC int 4261 xfs_btree_visit_block( 4262 struct xfs_btree_cur *cur, 4263 int level, 4264 xfs_btree_visit_blocks_fn fn, 4265 void *data) 4266 { 4267 struct xfs_btree_block *block; 4268 struct xfs_buf *bp; 4269 union xfs_btree_ptr rptr; 4270 int error; 4271 4272 /* do right sibling readahead */ 4273 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4274 block = xfs_btree_get_block(cur, level, &bp); 4275 4276 /* process the block */ 4277 error = fn(cur, level, data); 4278 if (error) 4279 return error; 4280 4281 /* now read rh sibling block for next iteration */ 4282 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4283 if (xfs_btree_ptr_is_null(cur, &rptr)) 4284 return -ENOENT; 4285 4286 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4287 } 4288 4289 4290 /* Visit every block in a btree. */ 4291 int 4292 xfs_btree_visit_blocks( 4293 struct xfs_btree_cur *cur, 4294 xfs_btree_visit_blocks_fn fn, 4295 void *data) 4296 { 4297 union xfs_btree_ptr lptr; 4298 int level; 4299 struct xfs_btree_block *block = NULL; 4300 int error = 0; 4301 4302 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4303 4304 /* for each level */ 4305 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4306 /* grab the left hand block */ 4307 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4308 if (error) 4309 return error; 4310 4311 /* readahead the left most block for the next level down */ 4312 if (level > 0) { 4313 union xfs_btree_ptr *ptr; 4314 4315 ptr = xfs_btree_ptr_addr(cur, 1, block); 4316 xfs_btree_readahead_ptr(cur, ptr, 1); 4317 4318 /* save for the next iteration of the loop */ 4319 xfs_btree_copy_ptrs(cur, &lptr, ptr, 1); 4320 } 4321 4322 /* for each buffer in the level */ 4323 do { 4324 error = xfs_btree_visit_block(cur, level, fn, data); 4325 } while (!error); 4326 4327 if (error != -ENOENT) 4328 return error; 4329 } 4330 4331 return 0; 4332 } 4333 4334 /* 4335 * Change the owner of a btree. 4336 * 4337 * The mechanism we use here is ordered buffer logging. Because we don't know 4338 * how many buffers were are going to need to modify, we don't really want to 4339 * have to make transaction reservations for the worst case of every buffer in a 4340 * full size btree as that may be more space that we can fit in the log.... 4341 * 4342 * We do the btree walk in the most optimal manner possible - we have sibling 4343 * pointers so we can just walk all the blocks on each level from left to right 4344 * in a single pass, and then move to the next level and do the same. We can 4345 * also do readahead on the sibling pointers to get IO moving more quickly, 4346 * though for slow disks this is unlikely to make much difference to performance 4347 * as the amount of CPU work we have to do before moving to the next block is 4348 * relatively small. 4349 * 4350 * For each btree block that we load, modify the owner appropriately, set the 4351 * buffer as an ordered buffer and log it appropriately. We need to ensure that 4352 * we mark the region we change dirty so that if the buffer is relogged in 4353 * a subsequent transaction the changes we make here as an ordered buffer are 4354 * correctly relogged in that transaction. If we are in recovery context, then 4355 * just queue the modified buffer as delayed write buffer so the transaction 4356 * recovery completion writes the changes to disk. 4357 */ 4358 struct xfs_btree_block_change_owner_info { 4359 uint64_t new_owner; 4360 struct list_head *buffer_list; 4361 }; 4362 4363 static int 4364 xfs_btree_block_change_owner( 4365 struct xfs_btree_cur *cur, 4366 int level, 4367 void *data) 4368 { 4369 struct xfs_btree_block_change_owner_info *bbcoi = data; 4370 struct xfs_btree_block *block; 4371 struct xfs_buf *bp; 4372 4373 /* modify the owner */ 4374 block = xfs_btree_get_block(cur, level, &bp); 4375 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 4376 if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) 4377 return 0; 4378 block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); 4379 } else { 4380 if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner)) 4381 return 0; 4382 block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner); 4383 } 4384 4385 /* 4386 * If the block is a root block hosted in an inode, we might not have a 4387 * buffer pointer here and we shouldn't attempt to log the change as the 4388 * information is already held in the inode and discarded when the root 4389 * block is formatted into the on-disk inode fork. We still change it, 4390 * though, so everything is consistent in memory. 4391 */ 4392 if (!bp) { 4393 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4394 ASSERT(level == cur->bc_nlevels - 1); 4395 return 0; 4396 } 4397 4398 if (cur->bc_tp) { 4399 if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) { 4400 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4401 return -EAGAIN; 4402 } 4403 } else { 4404 xfs_buf_delwri_queue(bp, bbcoi->buffer_list); 4405 } 4406 4407 return 0; 4408 } 4409 4410 int 4411 xfs_btree_change_owner( 4412 struct xfs_btree_cur *cur, 4413 uint64_t new_owner, 4414 struct list_head *buffer_list) 4415 { 4416 struct xfs_btree_block_change_owner_info bbcoi; 4417 4418 bbcoi.new_owner = new_owner; 4419 bbcoi.buffer_list = buffer_list; 4420 4421 return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner, 4422 &bbcoi); 4423 } 4424 4425 /* Verify the v5 fields of a long-format btree block. */ 4426 xfs_failaddr_t 4427 xfs_btree_lblock_v5hdr_verify( 4428 struct xfs_buf *bp, 4429 uint64_t owner) 4430 { 4431 struct xfs_mount *mp = bp->b_target->bt_mount; 4432 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4433 4434 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4435 return __this_address; 4436 if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4437 return __this_address; 4438 if (block->bb_u.l.bb_blkno != cpu_to_be64(bp->b_bn)) 4439 return __this_address; 4440 if (owner != XFS_RMAP_OWN_UNKNOWN && 4441 be64_to_cpu(block->bb_u.l.bb_owner) != owner) 4442 return __this_address; 4443 return NULL; 4444 } 4445 4446 /* Verify a long-format btree block. */ 4447 xfs_failaddr_t 4448 xfs_btree_lblock_verify( 4449 struct xfs_buf *bp, 4450 unsigned int max_recs) 4451 { 4452 struct xfs_mount *mp = bp->b_target->bt_mount; 4453 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4454 4455 /* numrecs verification */ 4456 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4457 return __this_address; 4458 4459 /* sibling pointer verification */ 4460 if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK) && 4461 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_leftsib))) 4462 return __this_address; 4463 if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK) && 4464 !xfs_verify_fsbno(mp, be64_to_cpu(block->bb_u.l.bb_rightsib))) 4465 return __this_address; 4466 4467 return NULL; 4468 } 4469 4470 /** 4471 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4472 * btree block 4473 * 4474 * @bp: buffer containing the btree block 4475 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4476 * @pag_max_level: pointer to the per-ag max level field 4477 */ 4478 xfs_failaddr_t 4479 xfs_btree_sblock_v5hdr_verify( 4480 struct xfs_buf *bp) 4481 { 4482 struct xfs_mount *mp = bp->b_target->bt_mount; 4483 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4484 struct xfs_perag *pag = bp->b_pag; 4485 4486 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4487 return __this_address; 4488 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4489 return __this_address; 4490 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4491 return __this_address; 4492 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4493 return __this_address; 4494 return NULL; 4495 } 4496 4497 /** 4498 * xfs_btree_sblock_verify() -- verify a short-format btree block 4499 * 4500 * @bp: buffer containing the btree block 4501 * @max_recs: maximum records allowed in this btree node 4502 */ 4503 xfs_failaddr_t 4504 xfs_btree_sblock_verify( 4505 struct xfs_buf *bp, 4506 unsigned int max_recs) 4507 { 4508 struct xfs_mount *mp = bp->b_target->bt_mount; 4509 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4510 xfs_agblock_t agno; 4511 4512 /* numrecs verification */ 4513 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4514 return __this_address; 4515 4516 /* sibling pointer verification */ 4517 agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp)); 4518 if (block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK) && 4519 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_leftsib))) 4520 return __this_address; 4521 if (block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK) && 4522 !xfs_verify_agbno(mp, agno, be32_to_cpu(block->bb_u.s.bb_rightsib))) 4523 return __this_address; 4524 4525 return NULL; 4526 } 4527 4528 /* 4529 * Calculate the number of btree levels needed to store a given number of 4530 * records in a short-format btree. 4531 */ 4532 uint 4533 xfs_btree_compute_maxlevels( 4534 uint *limits, 4535 unsigned long len) 4536 { 4537 uint level; 4538 unsigned long maxblocks; 4539 4540 maxblocks = (len + limits[0] - 1) / limits[0]; 4541 for (level = 1; maxblocks > 1; level++) 4542 maxblocks = (maxblocks + limits[1] - 1) / limits[1]; 4543 return level; 4544 } 4545 4546 /* 4547 * Query a regular btree for all records overlapping a given interval. 4548 * Start with a LE lookup of the key of low_rec and return all records 4549 * until we find a record with a key greater than the key of high_rec. 4550 */ 4551 STATIC int 4552 xfs_btree_simple_query_range( 4553 struct xfs_btree_cur *cur, 4554 union xfs_btree_key *low_key, 4555 union xfs_btree_key *high_key, 4556 xfs_btree_query_range_fn fn, 4557 void *priv) 4558 { 4559 union xfs_btree_rec *recp; 4560 union xfs_btree_key rec_key; 4561 int64_t diff; 4562 int stat; 4563 bool firstrec = true; 4564 int error; 4565 4566 ASSERT(cur->bc_ops->init_high_key_from_rec); 4567 ASSERT(cur->bc_ops->diff_two_keys); 4568 4569 /* 4570 * Find the leftmost record. The btree cursor must be set 4571 * to the low record used to generate low_key. 4572 */ 4573 stat = 0; 4574 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); 4575 if (error) 4576 goto out; 4577 4578 /* Nothing? See if there's anything to the right. */ 4579 if (!stat) { 4580 error = xfs_btree_increment(cur, 0, &stat); 4581 if (error) 4582 goto out; 4583 } 4584 4585 while (stat) { 4586 /* Find the record. */ 4587 error = xfs_btree_get_rec(cur, &recp, &stat); 4588 if (error || !stat) 4589 break; 4590 4591 /* Skip if high_key(rec) < low_key. */ 4592 if (firstrec) { 4593 cur->bc_ops->init_high_key_from_rec(&rec_key, recp); 4594 firstrec = false; 4595 diff = cur->bc_ops->diff_two_keys(cur, low_key, 4596 &rec_key); 4597 if (diff > 0) 4598 goto advloop; 4599 } 4600 4601 /* Stop if high_key < low_key(rec). */ 4602 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4603 diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key); 4604 if (diff > 0) 4605 break; 4606 4607 /* Callback */ 4608 error = fn(cur, recp, priv); 4609 if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT) 4610 break; 4611 4612 advloop: 4613 /* Move on to the next record. */ 4614 error = xfs_btree_increment(cur, 0, &stat); 4615 if (error) 4616 break; 4617 } 4618 4619 out: 4620 return error; 4621 } 4622 4623 /* 4624 * Query an overlapped interval btree for all records overlapping a given 4625 * interval. This function roughly follows the algorithm given in 4626 * "Interval Trees" of _Introduction to Algorithms_, which is section 4627 * 14.3 in the 2nd and 3rd editions. 4628 * 4629 * First, generate keys for the low and high records passed in. 4630 * 4631 * For any leaf node, generate the high and low keys for the record. 4632 * If the record keys overlap with the query low/high keys, pass the 4633 * record to the function iterator. 4634 * 4635 * For any internal node, compare the low and high keys of each 4636 * pointer against the query low/high keys. If there's an overlap, 4637 * follow the pointer. 4638 * 4639 * As an optimization, we stop scanning a block when we find a low key 4640 * that is greater than the query's high key. 4641 */ 4642 STATIC int 4643 xfs_btree_overlapped_query_range( 4644 struct xfs_btree_cur *cur, 4645 union xfs_btree_key *low_key, 4646 union xfs_btree_key *high_key, 4647 xfs_btree_query_range_fn fn, 4648 void *priv) 4649 { 4650 union xfs_btree_ptr ptr; 4651 union xfs_btree_ptr *pp; 4652 union xfs_btree_key rec_key; 4653 union xfs_btree_key rec_hkey; 4654 union xfs_btree_key *lkp; 4655 union xfs_btree_key *hkp; 4656 union xfs_btree_rec *recp; 4657 struct xfs_btree_block *block; 4658 int64_t ldiff; 4659 int64_t hdiff; 4660 int level; 4661 struct xfs_buf *bp; 4662 int i; 4663 int error; 4664 4665 /* Load the root of the btree. */ 4666 level = cur->bc_nlevels - 1; 4667 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 4668 error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); 4669 if (error) 4670 return error; 4671 xfs_btree_get_block(cur, level, &bp); 4672 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4673 #ifdef DEBUG 4674 error = xfs_btree_check_block(cur, block, level, bp); 4675 if (error) 4676 goto out; 4677 #endif 4678 cur->bc_ptrs[level] = 1; 4679 4680 while (level < cur->bc_nlevels) { 4681 block = xfs_btree_get_block(cur, level, &bp); 4682 4683 /* End of node, pop back towards the root. */ 4684 if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { 4685 pop_up: 4686 if (level < cur->bc_nlevels - 1) 4687 cur->bc_ptrs[level + 1]++; 4688 level++; 4689 continue; 4690 } 4691 4692 if (level == 0) { 4693 /* Handle a leaf node. */ 4694 recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); 4695 4696 cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp); 4697 ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey, 4698 low_key); 4699 4700 cur->bc_ops->init_key_from_rec(&rec_key, recp); 4701 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, 4702 &rec_key); 4703 4704 /* 4705 * If (record's high key >= query's low key) and 4706 * (query's high key >= record's low key), then 4707 * this record overlaps the query range; callback. 4708 */ 4709 if (ldiff >= 0 && hdiff >= 0) { 4710 error = fn(cur, recp, priv); 4711 if (error < 0 || 4712 error == XFS_BTREE_QUERY_RANGE_ABORT) 4713 break; 4714 } else if (hdiff < 0) { 4715 /* Record is larger than high key; pop. */ 4716 goto pop_up; 4717 } 4718 cur->bc_ptrs[level]++; 4719 continue; 4720 } 4721 4722 /* Handle an internal node. */ 4723 lkp = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); 4724 hkp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); 4725 pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); 4726 4727 ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key); 4728 hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp); 4729 4730 /* 4731 * If (pointer's high key >= query's low key) and 4732 * (query's high key >= pointer's low key), then 4733 * this record overlaps the query range; follow pointer. 4734 */ 4735 if (ldiff >= 0 && hdiff >= 0) { 4736 level--; 4737 error = xfs_btree_lookup_get_block(cur, level, pp, 4738 &block); 4739 if (error) 4740 goto out; 4741 xfs_btree_get_block(cur, level, &bp); 4742 trace_xfs_btree_overlapped_query_range(cur, level, bp); 4743 #ifdef DEBUG 4744 error = xfs_btree_check_block(cur, block, level, bp); 4745 if (error) 4746 goto out; 4747 #endif 4748 cur->bc_ptrs[level] = 1; 4749 continue; 4750 } else if (hdiff < 0) { 4751 /* The low key is larger than the upper range; pop. */ 4752 goto pop_up; 4753 } 4754 cur->bc_ptrs[level]++; 4755 } 4756 4757 out: 4758 /* 4759 * If we don't end this function with the cursor pointing at a record 4760 * block, a subsequent non-error cursor deletion will not release 4761 * node-level buffers, causing a buffer leak. This is quite possible 4762 * with a zero-results range query, so release the buffers if we 4763 * failed to return any results. 4764 */ 4765 if (cur->bc_bufs[0] == NULL) { 4766 for (i = 0; i < cur->bc_nlevels; i++) { 4767 if (cur->bc_bufs[i]) { 4768 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 4769 cur->bc_bufs[i] = NULL; 4770 cur->bc_ptrs[i] = 0; 4771 cur->bc_ra[i] = 0; 4772 } 4773 } 4774 } 4775 4776 return error; 4777 } 4778 4779 /* 4780 * Query a btree for all records overlapping a given interval of keys. The 4781 * supplied function will be called with each record found; return one of the 4782 * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error 4783 * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a 4784 * negative error code. 4785 */ 4786 int 4787 xfs_btree_query_range( 4788 struct xfs_btree_cur *cur, 4789 union xfs_btree_irec *low_rec, 4790 union xfs_btree_irec *high_rec, 4791 xfs_btree_query_range_fn fn, 4792 void *priv) 4793 { 4794 union xfs_btree_rec rec; 4795 union xfs_btree_key low_key; 4796 union xfs_btree_key high_key; 4797 4798 /* Find the keys of both ends of the interval. */ 4799 cur->bc_rec = *high_rec; 4800 cur->bc_ops->init_rec_from_cur(cur, &rec); 4801 cur->bc_ops->init_key_from_rec(&high_key, &rec); 4802 4803 cur->bc_rec = *low_rec; 4804 cur->bc_ops->init_rec_from_cur(cur, &rec); 4805 cur->bc_ops->init_key_from_rec(&low_key, &rec); 4806 4807 /* Enforce low key < high key. */ 4808 if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0) 4809 return -EINVAL; 4810 4811 if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) 4812 return xfs_btree_simple_query_range(cur, &low_key, 4813 &high_key, fn, priv); 4814 return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, 4815 fn, priv); 4816 } 4817 4818 /* Query a btree for all records. */ 4819 int 4820 xfs_btree_query_all( 4821 struct xfs_btree_cur *cur, 4822 xfs_btree_query_range_fn fn, 4823 void *priv) 4824 { 4825 union xfs_btree_key low_key; 4826 union xfs_btree_key high_key; 4827 4828 memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); 4829 memset(&low_key, 0, sizeof(low_key)); 4830 memset(&high_key, 0xFF, sizeof(high_key)); 4831 4832 return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); 4833 } 4834 4835 /* 4836 * Calculate the number of blocks needed to store a given number of records 4837 * in a short-format (per-AG metadata) btree. 4838 */ 4839 xfs_extlen_t 4840 xfs_btree_calc_size( 4841 uint *limits, 4842 unsigned long long len) 4843 { 4844 int level; 4845 int maxrecs; 4846 xfs_extlen_t rval; 4847 4848 maxrecs = limits[0]; 4849 for (level = 0, rval = 0; len > 1; level++) { 4850 len += maxrecs - 1; 4851 do_div(len, maxrecs); 4852 maxrecs = limits[1]; 4853 rval += len; 4854 } 4855 return rval; 4856 } 4857 4858 static int 4859 xfs_btree_count_blocks_helper( 4860 struct xfs_btree_cur *cur, 4861 int level, 4862 void *data) 4863 { 4864 xfs_extlen_t *blocks = data; 4865 (*blocks)++; 4866 4867 return 0; 4868 } 4869 4870 /* Count the blocks in a btree and return the result in *blocks. */ 4871 int 4872 xfs_btree_count_blocks( 4873 struct xfs_btree_cur *cur, 4874 xfs_extlen_t *blocks) 4875 { 4876 *blocks = 0; 4877 return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper, 4878 blocks); 4879 } 4880 4881 /* Compare two btree pointers. */ 4882 int64_t 4883 xfs_btree_diff_two_ptrs( 4884 struct xfs_btree_cur *cur, 4885 const union xfs_btree_ptr *a, 4886 const union xfs_btree_ptr *b) 4887 { 4888 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4889 return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); 4890 return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); 4891 } 4892 4893 /* If there's an extent, we're done. */ 4894 STATIC int 4895 xfs_btree_has_record_helper( 4896 struct xfs_btree_cur *cur, 4897 union xfs_btree_rec *rec, 4898 void *priv) 4899 { 4900 return XFS_BTREE_QUERY_RANGE_ABORT; 4901 } 4902 4903 /* Is there a record covering a given range of keys? */ 4904 int 4905 xfs_btree_has_record( 4906 struct xfs_btree_cur *cur, 4907 union xfs_btree_irec *low, 4908 union xfs_btree_irec *high, 4909 bool *exists) 4910 { 4911 int error; 4912 4913 error = xfs_btree_query_range(cur, low, high, 4914 &xfs_btree_has_record_helper, NULL); 4915 if (error == XFS_BTREE_QUERY_RANGE_ABORT) { 4916 *exists = true; 4917 return 0; 4918 } 4919 *exists = false; 4920 return error; 4921 } 4922