1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_shared.h" 21 #include "xfs_format.h" 22 #include "xfs_log_format.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_mount.h" 26 #include "xfs_inode.h" 27 #include "xfs_trans.h" 28 #include "xfs_inode_item.h" 29 #include "xfs_buf_item.h" 30 #include "xfs_btree.h" 31 #include "xfs_error.h" 32 #include "xfs_trace.h" 33 #include "xfs_cksum.h" 34 #include "xfs_alloc.h" 35 #include "xfs_log.h" 36 37 /* 38 * Cursor allocation zone. 39 */ 40 kmem_zone_t *xfs_btree_cur_zone; 41 42 /* 43 * Btree magic numbers. 44 */ 45 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 46 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 47 XFS_FIBT_MAGIC }, 48 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, 49 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } 50 }; 51 #define xfs_btree_magic(cur) \ 52 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] 53 54 55 STATIC int /* error (0 or EFSCORRUPTED) */ 56 xfs_btree_check_lblock( 57 struct xfs_btree_cur *cur, /* btree cursor */ 58 struct xfs_btree_block *block, /* btree long form block pointer */ 59 int level, /* level of the btree block */ 60 struct xfs_buf *bp) /* buffer for block, if any */ 61 { 62 int lblock_ok = 1; /* block passes checks */ 63 struct xfs_mount *mp; /* file system mount point */ 64 65 mp = cur->bc_mp; 66 67 if (xfs_sb_version_hascrc(&mp->m_sb)) { 68 lblock_ok = lblock_ok && 69 uuid_equal(&block->bb_u.l.bb_uuid, 70 &mp->m_sb.sb_meta_uuid) && 71 block->bb_u.l.bb_blkno == cpu_to_be64( 72 bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 73 } 74 75 lblock_ok = lblock_ok && 76 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && 77 be16_to_cpu(block->bb_level) == level && 78 be16_to_cpu(block->bb_numrecs) <= 79 cur->bc_ops->get_maxrecs(cur, level) && 80 block->bb_u.l.bb_leftsib && 81 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) || 82 XFS_FSB_SANITY_CHECK(mp, 83 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 84 block->bb_u.l.bb_rightsib && 85 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) || 86 XFS_FSB_SANITY_CHECK(mp, 87 be64_to_cpu(block->bb_u.l.bb_rightsib))); 88 89 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, 90 XFS_ERRTAG_BTREE_CHECK_LBLOCK, 91 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 92 if (bp) 93 trace_xfs_btree_corrupt(bp, _RET_IP_); 94 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 95 return -EFSCORRUPTED; 96 } 97 return 0; 98 } 99 100 STATIC int /* error (0 or EFSCORRUPTED) */ 101 xfs_btree_check_sblock( 102 struct xfs_btree_cur *cur, /* btree cursor */ 103 struct xfs_btree_block *block, /* btree short form block pointer */ 104 int level, /* level of the btree block */ 105 struct xfs_buf *bp) /* buffer containing block */ 106 { 107 struct xfs_mount *mp; /* file system mount point */ 108 struct xfs_buf *agbp; /* buffer for ag. freespace struct */ 109 struct xfs_agf *agf; /* ag. freespace structure */ 110 xfs_agblock_t agflen; /* native ag. freespace length */ 111 int sblock_ok = 1; /* block passes checks */ 112 113 mp = cur->bc_mp; 114 agbp = cur->bc_private.a.agbp; 115 agf = XFS_BUF_TO_AGF(agbp); 116 agflen = be32_to_cpu(agf->agf_length); 117 118 if (xfs_sb_version_hascrc(&mp->m_sb)) { 119 sblock_ok = sblock_ok && 120 uuid_equal(&block->bb_u.s.bb_uuid, 121 &mp->m_sb.sb_meta_uuid) && 122 block->bb_u.s.bb_blkno == cpu_to_be64( 123 bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 124 } 125 126 sblock_ok = sblock_ok && 127 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && 128 be16_to_cpu(block->bb_level) == level && 129 be16_to_cpu(block->bb_numrecs) <= 130 cur->bc_ops->get_maxrecs(cur, level) && 131 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 132 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && 133 block->bb_u.s.bb_leftsib && 134 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 135 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && 136 block->bb_u.s.bb_rightsib; 137 138 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, 139 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 140 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 141 if (bp) 142 trace_xfs_btree_corrupt(bp, _RET_IP_); 143 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 144 return -EFSCORRUPTED; 145 } 146 return 0; 147 } 148 149 /* 150 * Debug routine: check that block header is ok. 151 */ 152 int 153 xfs_btree_check_block( 154 struct xfs_btree_cur *cur, /* btree cursor */ 155 struct xfs_btree_block *block, /* generic btree block pointer */ 156 int level, /* level of the btree block */ 157 struct xfs_buf *bp) /* buffer containing block, if any */ 158 { 159 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 160 return xfs_btree_check_lblock(cur, block, level, bp); 161 else 162 return xfs_btree_check_sblock(cur, block, level, bp); 163 } 164 165 /* 166 * Check that (long) pointer is ok. 167 */ 168 int /* error (0 or EFSCORRUPTED) */ 169 xfs_btree_check_lptr( 170 struct xfs_btree_cur *cur, /* btree cursor */ 171 xfs_fsblock_t bno, /* btree block disk address */ 172 int level) /* btree block level */ 173 { 174 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 175 level > 0 && 176 bno != NULLFSBLOCK && 177 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); 178 return 0; 179 } 180 181 #ifdef DEBUG 182 /* 183 * Check that (short) pointer is ok. 184 */ 185 STATIC int /* error (0 or EFSCORRUPTED) */ 186 xfs_btree_check_sptr( 187 struct xfs_btree_cur *cur, /* btree cursor */ 188 xfs_agblock_t bno, /* btree block disk address */ 189 int level) /* btree block level */ 190 { 191 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; 192 193 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 194 level > 0 && 195 bno != NULLAGBLOCK && 196 bno != 0 && 197 bno < agblocks); 198 return 0; 199 } 200 201 /* 202 * Check that block ptr is ok. 203 */ 204 STATIC int /* error (0 or EFSCORRUPTED) */ 205 xfs_btree_check_ptr( 206 struct xfs_btree_cur *cur, /* btree cursor */ 207 union xfs_btree_ptr *ptr, /* btree block disk address */ 208 int index, /* offset from ptr to check */ 209 int level) /* btree block level */ 210 { 211 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 212 return xfs_btree_check_lptr(cur, 213 be64_to_cpu((&ptr->l)[index]), level); 214 } else { 215 return xfs_btree_check_sptr(cur, 216 be32_to_cpu((&ptr->s)[index]), level); 217 } 218 } 219 #endif 220 221 /* 222 * Calculate CRC on the whole btree block and stuff it into the 223 * long-form btree header. 224 * 225 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 226 * it into the buffer so recovery knows what the last modification was that made 227 * it to disk. 228 */ 229 void 230 xfs_btree_lblock_calc_crc( 231 struct xfs_buf *bp) 232 { 233 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 234 struct xfs_buf_log_item *bip = bp->b_fspriv; 235 236 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 237 return; 238 if (bip) 239 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 240 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 241 } 242 243 bool 244 xfs_btree_lblock_verify_crc( 245 struct xfs_buf *bp) 246 { 247 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 248 struct xfs_mount *mp = bp->b_target->bt_mount; 249 250 if (xfs_sb_version_hascrc(&mp->m_sb)) { 251 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 252 return false; 253 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 254 } 255 256 return true; 257 } 258 259 /* 260 * Calculate CRC on the whole btree block and stuff it into the 261 * short-form btree header. 262 * 263 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 264 * it into the buffer so recovery knows what the last modification was that made 265 * it to disk. 266 */ 267 void 268 xfs_btree_sblock_calc_crc( 269 struct xfs_buf *bp) 270 { 271 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 272 struct xfs_buf_log_item *bip = bp->b_fspriv; 273 274 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 275 return; 276 if (bip) 277 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 278 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 279 } 280 281 bool 282 xfs_btree_sblock_verify_crc( 283 struct xfs_buf *bp) 284 { 285 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 286 struct xfs_mount *mp = bp->b_target->bt_mount; 287 288 if (xfs_sb_version_hascrc(&mp->m_sb)) { 289 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 290 return false; 291 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 292 } 293 294 return true; 295 } 296 297 /* 298 * Delete the btree cursor. 299 */ 300 void 301 xfs_btree_del_cursor( 302 xfs_btree_cur_t *cur, /* btree cursor */ 303 int error) /* del because of error */ 304 { 305 int i; /* btree level */ 306 307 /* 308 * Clear the buffer pointers, and release the buffers. 309 * If we're doing this in the face of an error, we 310 * need to make sure to inspect all of the entries 311 * in the bc_bufs array for buffers to be unlocked. 312 * This is because some of the btree code works from 313 * level n down to 0, and if we get an error along 314 * the way we won't have initialized all the entries 315 * down to 0. 316 */ 317 for (i = 0; i < cur->bc_nlevels; i++) { 318 if (cur->bc_bufs[i]) 319 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 320 else if (!error) 321 break; 322 } 323 /* 324 * Can't free a bmap cursor without having dealt with the 325 * allocated indirect blocks' accounting. 326 */ 327 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || 328 cur->bc_private.b.allocated == 0); 329 /* 330 * Free the cursor. 331 */ 332 kmem_zone_free(xfs_btree_cur_zone, cur); 333 } 334 335 /* 336 * Duplicate the btree cursor. 337 * Allocate a new one, copy the record, re-get the buffers. 338 */ 339 int /* error */ 340 xfs_btree_dup_cursor( 341 xfs_btree_cur_t *cur, /* input cursor */ 342 xfs_btree_cur_t **ncur) /* output cursor */ 343 { 344 xfs_buf_t *bp; /* btree block's buffer pointer */ 345 int error; /* error return value */ 346 int i; /* level number of btree block */ 347 xfs_mount_t *mp; /* mount structure for filesystem */ 348 xfs_btree_cur_t *new; /* new cursor value */ 349 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 350 351 tp = cur->bc_tp; 352 mp = cur->bc_mp; 353 354 /* 355 * Allocate a new cursor like the old one. 356 */ 357 new = cur->bc_ops->dup_cursor(cur); 358 359 /* 360 * Copy the record currently in the cursor. 361 */ 362 new->bc_rec = cur->bc_rec; 363 364 /* 365 * For each level current, re-get the buffer and copy the ptr value. 366 */ 367 for (i = 0; i < new->bc_nlevels; i++) { 368 new->bc_ptrs[i] = cur->bc_ptrs[i]; 369 new->bc_ra[i] = cur->bc_ra[i]; 370 bp = cur->bc_bufs[i]; 371 if (bp) { 372 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 373 XFS_BUF_ADDR(bp), mp->m_bsize, 374 0, &bp, 375 cur->bc_ops->buf_ops); 376 if (error) { 377 xfs_btree_del_cursor(new, error); 378 *ncur = NULL; 379 return error; 380 } 381 } 382 new->bc_bufs[i] = bp; 383 } 384 *ncur = new; 385 return 0; 386 } 387 388 /* 389 * XFS btree block layout and addressing: 390 * 391 * There are two types of blocks in the btree: leaf and non-leaf blocks. 392 * 393 * The leaf record start with a header then followed by records containing 394 * the values. A non-leaf block also starts with the same header, and 395 * then first contains lookup keys followed by an equal number of pointers 396 * to the btree blocks at the previous level. 397 * 398 * +--------+-------+-------+-------+-------+-------+-------+ 399 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 400 * +--------+-------+-------+-------+-------+-------+-------+ 401 * 402 * +--------+-------+-------+-------+-------+-------+-------+ 403 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 404 * +--------+-------+-------+-------+-------+-------+-------+ 405 * 406 * The header is called struct xfs_btree_block for reasons better left unknown 407 * and comes in different versions for short (32bit) and long (64bit) block 408 * pointers. The record and key structures are defined by the btree instances 409 * and opaque to the btree core. The block pointers are simple disk endian 410 * integers, available in a short (32bit) and long (64bit) variant. 411 * 412 * The helpers below calculate the offset of a given record, key or pointer 413 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 414 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 415 * inside the btree block is done using indices starting at one, not zero! 416 */ 417 418 /* 419 * Return size of the btree block header for this btree instance. 420 */ 421 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 422 { 423 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 424 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 425 return XFS_BTREE_LBLOCK_CRC_LEN; 426 return XFS_BTREE_LBLOCK_LEN; 427 } 428 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 429 return XFS_BTREE_SBLOCK_CRC_LEN; 430 return XFS_BTREE_SBLOCK_LEN; 431 } 432 433 /* 434 * Return size of btree block pointers for this btree instance. 435 */ 436 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 437 { 438 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 439 sizeof(__be64) : sizeof(__be32); 440 } 441 442 /* 443 * Calculate offset of the n-th record in a btree block. 444 */ 445 STATIC size_t 446 xfs_btree_rec_offset( 447 struct xfs_btree_cur *cur, 448 int n) 449 { 450 return xfs_btree_block_len(cur) + 451 (n - 1) * cur->bc_ops->rec_len; 452 } 453 454 /* 455 * Calculate offset of the n-th key in a btree block. 456 */ 457 STATIC size_t 458 xfs_btree_key_offset( 459 struct xfs_btree_cur *cur, 460 int n) 461 { 462 return xfs_btree_block_len(cur) + 463 (n - 1) * cur->bc_ops->key_len; 464 } 465 466 /* 467 * Calculate offset of the n-th block pointer in a btree block. 468 */ 469 STATIC size_t 470 xfs_btree_ptr_offset( 471 struct xfs_btree_cur *cur, 472 int n, 473 int level) 474 { 475 return xfs_btree_block_len(cur) + 476 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 477 (n - 1) * xfs_btree_ptr_len(cur); 478 } 479 480 /* 481 * Return a pointer to the n-th record in the btree block. 482 */ 483 STATIC union xfs_btree_rec * 484 xfs_btree_rec_addr( 485 struct xfs_btree_cur *cur, 486 int n, 487 struct xfs_btree_block *block) 488 { 489 return (union xfs_btree_rec *) 490 ((char *)block + xfs_btree_rec_offset(cur, n)); 491 } 492 493 /* 494 * Return a pointer to the n-th key in the btree block. 495 */ 496 STATIC union xfs_btree_key * 497 xfs_btree_key_addr( 498 struct xfs_btree_cur *cur, 499 int n, 500 struct xfs_btree_block *block) 501 { 502 return (union xfs_btree_key *) 503 ((char *)block + xfs_btree_key_offset(cur, n)); 504 } 505 506 /* 507 * Return a pointer to the n-th block pointer in the btree block. 508 */ 509 STATIC union xfs_btree_ptr * 510 xfs_btree_ptr_addr( 511 struct xfs_btree_cur *cur, 512 int n, 513 struct xfs_btree_block *block) 514 { 515 int level = xfs_btree_get_level(block); 516 517 ASSERT(block->bb_level != 0); 518 519 return (union xfs_btree_ptr *) 520 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 521 } 522 523 /* 524 * Get the root block which is stored in the inode. 525 * 526 * For now this btree implementation assumes the btree root is always 527 * stored in the if_broot field of an inode fork. 528 */ 529 STATIC struct xfs_btree_block * 530 xfs_btree_get_iroot( 531 struct xfs_btree_cur *cur) 532 { 533 struct xfs_ifork *ifp; 534 535 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); 536 return (struct xfs_btree_block *)ifp->if_broot; 537 } 538 539 /* 540 * Retrieve the block pointer from the cursor at the given level. 541 * This may be an inode btree root or from a buffer. 542 */ 543 STATIC struct xfs_btree_block * /* generic btree block pointer */ 544 xfs_btree_get_block( 545 struct xfs_btree_cur *cur, /* btree cursor */ 546 int level, /* level in btree */ 547 struct xfs_buf **bpp) /* buffer containing the block */ 548 { 549 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 550 (level == cur->bc_nlevels - 1)) { 551 *bpp = NULL; 552 return xfs_btree_get_iroot(cur); 553 } 554 555 *bpp = cur->bc_bufs[level]; 556 return XFS_BUF_TO_BLOCK(*bpp); 557 } 558 559 /* 560 * Get a buffer for the block, return it with no data read. 561 * Long-form addressing. 562 */ 563 xfs_buf_t * /* buffer for fsbno */ 564 xfs_btree_get_bufl( 565 xfs_mount_t *mp, /* file system mount point */ 566 xfs_trans_t *tp, /* transaction pointer */ 567 xfs_fsblock_t fsbno, /* file system block number */ 568 uint lock) /* lock flags for get_buf */ 569 { 570 xfs_daddr_t d; /* real disk block address */ 571 572 ASSERT(fsbno != NULLFSBLOCK); 573 d = XFS_FSB_TO_DADDR(mp, fsbno); 574 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 575 } 576 577 /* 578 * Get a buffer for the block, return it with no data read. 579 * Short-form addressing. 580 */ 581 xfs_buf_t * /* buffer for agno/agbno */ 582 xfs_btree_get_bufs( 583 xfs_mount_t *mp, /* file system mount point */ 584 xfs_trans_t *tp, /* transaction pointer */ 585 xfs_agnumber_t agno, /* allocation group number */ 586 xfs_agblock_t agbno, /* allocation group block number */ 587 uint lock) /* lock flags for get_buf */ 588 { 589 xfs_daddr_t d; /* real disk block address */ 590 591 ASSERT(agno != NULLAGNUMBER); 592 ASSERT(agbno != NULLAGBLOCK); 593 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 594 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 595 } 596 597 /* 598 * Check for the cursor referring to the last block at the given level. 599 */ 600 int /* 1=is last block, 0=not last block */ 601 xfs_btree_islastblock( 602 xfs_btree_cur_t *cur, /* btree cursor */ 603 int level) /* level to check */ 604 { 605 struct xfs_btree_block *block; /* generic btree block pointer */ 606 xfs_buf_t *bp; /* buffer containing block */ 607 608 block = xfs_btree_get_block(cur, level, &bp); 609 xfs_btree_check_block(cur, block, level, bp); 610 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 611 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); 612 else 613 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); 614 } 615 616 /* 617 * Change the cursor to point to the first record at the given level. 618 * Other levels are unaffected. 619 */ 620 STATIC int /* success=1, failure=0 */ 621 xfs_btree_firstrec( 622 xfs_btree_cur_t *cur, /* btree cursor */ 623 int level) /* level to change */ 624 { 625 struct xfs_btree_block *block; /* generic btree block pointer */ 626 xfs_buf_t *bp; /* buffer containing block */ 627 628 /* 629 * Get the block pointer for this level. 630 */ 631 block = xfs_btree_get_block(cur, level, &bp); 632 xfs_btree_check_block(cur, block, level, bp); 633 /* 634 * It's empty, there is no such record. 635 */ 636 if (!block->bb_numrecs) 637 return 0; 638 /* 639 * Set the ptr value to 1, that's the first record/key. 640 */ 641 cur->bc_ptrs[level] = 1; 642 return 1; 643 } 644 645 /* 646 * Change the cursor to point to the last record in the current block 647 * at the given level. Other levels are unaffected. 648 */ 649 STATIC int /* success=1, failure=0 */ 650 xfs_btree_lastrec( 651 xfs_btree_cur_t *cur, /* btree cursor */ 652 int level) /* level to change */ 653 { 654 struct xfs_btree_block *block; /* generic btree block pointer */ 655 xfs_buf_t *bp; /* buffer containing block */ 656 657 /* 658 * Get the block pointer for this level. 659 */ 660 block = xfs_btree_get_block(cur, level, &bp); 661 xfs_btree_check_block(cur, block, level, bp); 662 /* 663 * It's empty, there is no such record. 664 */ 665 if (!block->bb_numrecs) 666 return 0; 667 /* 668 * Set the ptr value to numrecs, that's the last record/key. 669 */ 670 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); 671 return 1; 672 } 673 674 /* 675 * Compute first and last byte offsets for the fields given. 676 * Interprets the offsets table, which contains struct field offsets. 677 */ 678 void 679 xfs_btree_offsets( 680 __int64_t fields, /* bitmask of fields */ 681 const short *offsets, /* table of field offsets */ 682 int nbits, /* number of bits to inspect */ 683 int *first, /* output: first byte offset */ 684 int *last) /* output: last byte offset */ 685 { 686 int i; /* current bit number */ 687 __int64_t imask; /* mask for current bit number */ 688 689 ASSERT(fields != 0); 690 /* 691 * Find the lowest bit, so the first byte offset. 692 */ 693 for (i = 0, imask = 1LL; ; i++, imask <<= 1) { 694 if (imask & fields) { 695 *first = offsets[i]; 696 break; 697 } 698 } 699 /* 700 * Find the highest bit, so the last byte offset. 701 */ 702 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { 703 if (imask & fields) { 704 *last = offsets[i + 1] - 1; 705 break; 706 } 707 } 708 } 709 710 /* 711 * Get a buffer for the block, return it read in. 712 * Long-form addressing. 713 */ 714 int 715 xfs_btree_read_bufl( 716 struct xfs_mount *mp, /* file system mount point */ 717 struct xfs_trans *tp, /* transaction pointer */ 718 xfs_fsblock_t fsbno, /* file system block number */ 719 uint lock, /* lock flags for read_buf */ 720 struct xfs_buf **bpp, /* buffer for fsbno */ 721 int refval, /* ref count value for buffer */ 722 const struct xfs_buf_ops *ops) 723 { 724 struct xfs_buf *bp; /* return value */ 725 xfs_daddr_t d; /* real disk block address */ 726 int error; 727 728 ASSERT(fsbno != NULLFSBLOCK); 729 d = XFS_FSB_TO_DADDR(mp, fsbno); 730 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 731 mp->m_bsize, lock, &bp, ops); 732 if (error) 733 return error; 734 if (bp) 735 xfs_buf_set_ref(bp, refval); 736 *bpp = bp; 737 return 0; 738 } 739 740 /* 741 * Read-ahead the block, don't wait for it, don't return a buffer. 742 * Long-form addressing. 743 */ 744 /* ARGSUSED */ 745 void 746 xfs_btree_reada_bufl( 747 struct xfs_mount *mp, /* file system mount point */ 748 xfs_fsblock_t fsbno, /* file system block number */ 749 xfs_extlen_t count, /* count of filesystem blocks */ 750 const struct xfs_buf_ops *ops) 751 { 752 xfs_daddr_t d; 753 754 ASSERT(fsbno != NULLFSBLOCK); 755 d = XFS_FSB_TO_DADDR(mp, fsbno); 756 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 757 } 758 759 /* 760 * Read-ahead the block, don't wait for it, don't return a buffer. 761 * Short-form addressing. 762 */ 763 /* ARGSUSED */ 764 void 765 xfs_btree_reada_bufs( 766 struct xfs_mount *mp, /* file system mount point */ 767 xfs_agnumber_t agno, /* allocation group number */ 768 xfs_agblock_t agbno, /* allocation group block number */ 769 xfs_extlen_t count, /* count of filesystem blocks */ 770 const struct xfs_buf_ops *ops) 771 { 772 xfs_daddr_t d; 773 774 ASSERT(agno != NULLAGNUMBER); 775 ASSERT(agbno != NULLAGBLOCK); 776 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 777 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 778 } 779 780 STATIC int 781 xfs_btree_readahead_lblock( 782 struct xfs_btree_cur *cur, 783 int lr, 784 struct xfs_btree_block *block) 785 { 786 int rval = 0; 787 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 788 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 789 790 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 791 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 792 cur->bc_ops->buf_ops); 793 rval++; 794 } 795 796 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 797 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 798 cur->bc_ops->buf_ops); 799 rval++; 800 } 801 802 return rval; 803 } 804 805 STATIC int 806 xfs_btree_readahead_sblock( 807 struct xfs_btree_cur *cur, 808 int lr, 809 struct xfs_btree_block *block) 810 { 811 int rval = 0; 812 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 813 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 814 815 816 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 817 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 818 left, 1, cur->bc_ops->buf_ops); 819 rval++; 820 } 821 822 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 823 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 824 right, 1, cur->bc_ops->buf_ops); 825 rval++; 826 } 827 828 return rval; 829 } 830 831 /* 832 * Read-ahead btree blocks, at the given level. 833 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 834 */ 835 STATIC int 836 xfs_btree_readahead( 837 struct xfs_btree_cur *cur, /* btree cursor */ 838 int lev, /* level in btree */ 839 int lr) /* left/right bits */ 840 { 841 struct xfs_btree_block *block; 842 843 /* 844 * No readahead needed if we are at the root level and the 845 * btree root is stored in the inode. 846 */ 847 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 848 (lev == cur->bc_nlevels - 1)) 849 return 0; 850 851 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) 852 return 0; 853 854 cur->bc_ra[lev] |= lr; 855 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); 856 857 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 858 return xfs_btree_readahead_lblock(cur, lr, block); 859 return xfs_btree_readahead_sblock(cur, lr, block); 860 } 861 862 STATIC xfs_daddr_t 863 xfs_btree_ptr_to_daddr( 864 struct xfs_btree_cur *cur, 865 union xfs_btree_ptr *ptr) 866 { 867 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 868 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK)); 869 870 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); 871 } else { 872 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); 873 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); 874 875 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, 876 be32_to_cpu(ptr->s)); 877 } 878 } 879 880 /* 881 * Readahead @count btree blocks at the given @ptr location. 882 * 883 * We don't need to care about long or short form btrees here as we have a 884 * method of converting the ptr directly to a daddr available to us. 885 */ 886 STATIC void 887 xfs_btree_readahead_ptr( 888 struct xfs_btree_cur *cur, 889 union xfs_btree_ptr *ptr, 890 xfs_extlen_t count) 891 { 892 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, 893 xfs_btree_ptr_to_daddr(cur, ptr), 894 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 895 } 896 897 /* 898 * Set the buffer for level "lev" in the cursor to bp, releasing 899 * any previous buffer. 900 */ 901 STATIC void 902 xfs_btree_setbuf( 903 xfs_btree_cur_t *cur, /* btree cursor */ 904 int lev, /* level in btree */ 905 xfs_buf_t *bp) /* new buffer to set */ 906 { 907 struct xfs_btree_block *b; /* btree block */ 908 909 if (cur->bc_bufs[lev]) 910 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); 911 cur->bc_bufs[lev] = bp; 912 cur->bc_ra[lev] = 0; 913 914 b = XFS_BUF_TO_BLOCK(bp); 915 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 916 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 917 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 918 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 919 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 920 } else { 921 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 922 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 923 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 924 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 925 } 926 } 927 928 STATIC int 929 xfs_btree_ptr_is_null( 930 struct xfs_btree_cur *cur, 931 union xfs_btree_ptr *ptr) 932 { 933 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 934 return ptr->l == cpu_to_be64(NULLFSBLOCK); 935 else 936 return ptr->s == cpu_to_be32(NULLAGBLOCK); 937 } 938 939 STATIC void 940 xfs_btree_set_ptr_null( 941 struct xfs_btree_cur *cur, 942 union xfs_btree_ptr *ptr) 943 { 944 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 945 ptr->l = cpu_to_be64(NULLFSBLOCK); 946 else 947 ptr->s = cpu_to_be32(NULLAGBLOCK); 948 } 949 950 /* 951 * Get/set/init sibling pointers 952 */ 953 STATIC void 954 xfs_btree_get_sibling( 955 struct xfs_btree_cur *cur, 956 struct xfs_btree_block *block, 957 union xfs_btree_ptr *ptr, 958 int lr) 959 { 960 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 961 962 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 963 if (lr == XFS_BB_RIGHTSIB) 964 ptr->l = block->bb_u.l.bb_rightsib; 965 else 966 ptr->l = block->bb_u.l.bb_leftsib; 967 } else { 968 if (lr == XFS_BB_RIGHTSIB) 969 ptr->s = block->bb_u.s.bb_rightsib; 970 else 971 ptr->s = block->bb_u.s.bb_leftsib; 972 } 973 } 974 975 STATIC void 976 xfs_btree_set_sibling( 977 struct xfs_btree_cur *cur, 978 struct xfs_btree_block *block, 979 union xfs_btree_ptr *ptr, 980 int lr) 981 { 982 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 983 984 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 985 if (lr == XFS_BB_RIGHTSIB) 986 block->bb_u.l.bb_rightsib = ptr->l; 987 else 988 block->bb_u.l.bb_leftsib = ptr->l; 989 } else { 990 if (lr == XFS_BB_RIGHTSIB) 991 block->bb_u.s.bb_rightsib = ptr->s; 992 else 993 block->bb_u.s.bb_leftsib = ptr->s; 994 } 995 } 996 997 void 998 xfs_btree_init_block_int( 999 struct xfs_mount *mp, 1000 struct xfs_btree_block *buf, 1001 xfs_daddr_t blkno, 1002 __u32 magic, 1003 __u16 level, 1004 __u16 numrecs, 1005 __u64 owner, 1006 unsigned int flags) 1007 { 1008 buf->bb_magic = cpu_to_be32(magic); 1009 buf->bb_level = cpu_to_be16(level); 1010 buf->bb_numrecs = cpu_to_be16(numrecs); 1011 1012 if (flags & XFS_BTREE_LONG_PTRS) { 1013 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 1014 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1015 if (flags & XFS_BTREE_CRC_BLOCKS) { 1016 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1017 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1018 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid); 1019 buf->bb_u.l.bb_pad = 0; 1020 buf->bb_u.l.bb_lsn = 0; 1021 } 1022 } else { 1023 /* owner is a 32 bit value on short blocks */ 1024 __u32 __owner = (__u32)owner; 1025 1026 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1027 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1028 if (flags & XFS_BTREE_CRC_BLOCKS) { 1029 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1030 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1031 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); 1032 buf->bb_u.s.bb_lsn = 0; 1033 } 1034 } 1035 } 1036 1037 void 1038 xfs_btree_init_block( 1039 struct xfs_mount *mp, 1040 struct xfs_buf *bp, 1041 __u32 magic, 1042 __u16 level, 1043 __u16 numrecs, 1044 __u64 owner, 1045 unsigned int flags) 1046 { 1047 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1048 magic, level, numrecs, owner, flags); 1049 } 1050 1051 STATIC void 1052 xfs_btree_init_block_cur( 1053 struct xfs_btree_cur *cur, 1054 struct xfs_buf *bp, 1055 int level, 1056 int numrecs) 1057 { 1058 __u64 owner; 1059 1060 /* 1061 * we can pull the owner from the cursor right now as the different 1062 * owners align directly with the pointer size of the btree. This may 1063 * change in future, but is safe for current users of the generic btree 1064 * code. 1065 */ 1066 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1067 owner = cur->bc_private.b.ip->i_ino; 1068 else 1069 owner = cur->bc_private.a.agno; 1070 1071 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1072 xfs_btree_magic(cur), level, numrecs, 1073 owner, cur->bc_flags); 1074 } 1075 1076 /* 1077 * Return true if ptr is the last record in the btree and 1078 * we need to track updates to this record. The decision 1079 * will be further refined in the update_lastrec method. 1080 */ 1081 STATIC int 1082 xfs_btree_is_lastrec( 1083 struct xfs_btree_cur *cur, 1084 struct xfs_btree_block *block, 1085 int level) 1086 { 1087 union xfs_btree_ptr ptr; 1088 1089 if (level > 0) 1090 return 0; 1091 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1092 return 0; 1093 1094 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1095 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1096 return 0; 1097 return 1; 1098 } 1099 1100 STATIC void 1101 xfs_btree_buf_to_ptr( 1102 struct xfs_btree_cur *cur, 1103 struct xfs_buf *bp, 1104 union xfs_btree_ptr *ptr) 1105 { 1106 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1107 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1108 XFS_BUF_ADDR(bp))); 1109 else { 1110 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1111 XFS_BUF_ADDR(bp))); 1112 } 1113 } 1114 1115 STATIC void 1116 xfs_btree_set_refs( 1117 struct xfs_btree_cur *cur, 1118 struct xfs_buf *bp) 1119 { 1120 switch (cur->bc_btnum) { 1121 case XFS_BTNUM_BNO: 1122 case XFS_BTNUM_CNT: 1123 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1124 break; 1125 case XFS_BTNUM_INO: 1126 case XFS_BTNUM_FINO: 1127 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1128 break; 1129 case XFS_BTNUM_BMAP: 1130 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1131 break; 1132 default: 1133 ASSERT(0); 1134 } 1135 } 1136 1137 STATIC int 1138 xfs_btree_get_buf_block( 1139 struct xfs_btree_cur *cur, 1140 union xfs_btree_ptr *ptr, 1141 int flags, 1142 struct xfs_btree_block **block, 1143 struct xfs_buf **bpp) 1144 { 1145 struct xfs_mount *mp = cur->bc_mp; 1146 xfs_daddr_t d; 1147 1148 /* need to sort out how callers deal with failures first */ 1149 ASSERT(!(flags & XBF_TRYLOCK)); 1150 1151 d = xfs_btree_ptr_to_daddr(cur, ptr); 1152 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1153 mp->m_bsize, flags); 1154 1155 if (!*bpp) 1156 return -ENOMEM; 1157 1158 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1159 *block = XFS_BUF_TO_BLOCK(*bpp); 1160 return 0; 1161 } 1162 1163 /* 1164 * Read in the buffer at the given ptr and return the buffer and 1165 * the block pointer within the buffer. 1166 */ 1167 STATIC int 1168 xfs_btree_read_buf_block( 1169 struct xfs_btree_cur *cur, 1170 union xfs_btree_ptr *ptr, 1171 int flags, 1172 struct xfs_btree_block **block, 1173 struct xfs_buf **bpp) 1174 { 1175 struct xfs_mount *mp = cur->bc_mp; 1176 xfs_daddr_t d; 1177 int error; 1178 1179 /* need to sort out how callers deal with failures first */ 1180 ASSERT(!(flags & XBF_TRYLOCK)); 1181 1182 d = xfs_btree_ptr_to_daddr(cur, ptr); 1183 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1184 mp->m_bsize, flags, bpp, 1185 cur->bc_ops->buf_ops); 1186 if (error) 1187 return error; 1188 1189 xfs_btree_set_refs(cur, *bpp); 1190 *block = XFS_BUF_TO_BLOCK(*bpp); 1191 return 0; 1192 } 1193 1194 /* 1195 * Copy keys from one btree block to another. 1196 */ 1197 STATIC void 1198 xfs_btree_copy_keys( 1199 struct xfs_btree_cur *cur, 1200 union xfs_btree_key *dst_key, 1201 union xfs_btree_key *src_key, 1202 int numkeys) 1203 { 1204 ASSERT(numkeys >= 0); 1205 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1206 } 1207 1208 /* 1209 * Copy records from one btree block to another. 1210 */ 1211 STATIC void 1212 xfs_btree_copy_recs( 1213 struct xfs_btree_cur *cur, 1214 union xfs_btree_rec *dst_rec, 1215 union xfs_btree_rec *src_rec, 1216 int numrecs) 1217 { 1218 ASSERT(numrecs >= 0); 1219 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1220 } 1221 1222 /* 1223 * Copy block pointers from one btree block to another. 1224 */ 1225 STATIC void 1226 xfs_btree_copy_ptrs( 1227 struct xfs_btree_cur *cur, 1228 union xfs_btree_ptr *dst_ptr, 1229 union xfs_btree_ptr *src_ptr, 1230 int numptrs) 1231 { 1232 ASSERT(numptrs >= 0); 1233 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1234 } 1235 1236 /* 1237 * Shift keys one index left/right inside a single btree block. 1238 */ 1239 STATIC void 1240 xfs_btree_shift_keys( 1241 struct xfs_btree_cur *cur, 1242 union xfs_btree_key *key, 1243 int dir, 1244 int numkeys) 1245 { 1246 char *dst_key; 1247 1248 ASSERT(numkeys >= 0); 1249 ASSERT(dir == 1 || dir == -1); 1250 1251 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1252 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1253 } 1254 1255 /* 1256 * Shift records one index left/right inside a single btree block. 1257 */ 1258 STATIC void 1259 xfs_btree_shift_recs( 1260 struct xfs_btree_cur *cur, 1261 union xfs_btree_rec *rec, 1262 int dir, 1263 int numrecs) 1264 { 1265 char *dst_rec; 1266 1267 ASSERT(numrecs >= 0); 1268 ASSERT(dir == 1 || dir == -1); 1269 1270 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1271 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1272 } 1273 1274 /* 1275 * Shift block pointers one index left/right inside a single btree block. 1276 */ 1277 STATIC void 1278 xfs_btree_shift_ptrs( 1279 struct xfs_btree_cur *cur, 1280 union xfs_btree_ptr *ptr, 1281 int dir, 1282 int numptrs) 1283 { 1284 char *dst_ptr; 1285 1286 ASSERT(numptrs >= 0); 1287 ASSERT(dir == 1 || dir == -1); 1288 1289 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1290 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1291 } 1292 1293 /* 1294 * Log key values from the btree block. 1295 */ 1296 STATIC void 1297 xfs_btree_log_keys( 1298 struct xfs_btree_cur *cur, 1299 struct xfs_buf *bp, 1300 int first, 1301 int last) 1302 { 1303 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1304 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1305 1306 if (bp) { 1307 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1308 xfs_trans_log_buf(cur->bc_tp, bp, 1309 xfs_btree_key_offset(cur, first), 1310 xfs_btree_key_offset(cur, last + 1) - 1); 1311 } else { 1312 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1313 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1314 } 1315 1316 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1317 } 1318 1319 /* 1320 * Log record values from the btree block. 1321 */ 1322 void 1323 xfs_btree_log_recs( 1324 struct xfs_btree_cur *cur, 1325 struct xfs_buf *bp, 1326 int first, 1327 int last) 1328 { 1329 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1330 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1331 1332 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1333 xfs_trans_log_buf(cur->bc_tp, bp, 1334 xfs_btree_rec_offset(cur, first), 1335 xfs_btree_rec_offset(cur, last + 1) - 1); 1336 1337 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1338 } 1339 1340 /* 1341 * Log block pointer fields from a btree block (nonleaf). 1342 */ 1343 STATIC void 1344 xfs_btree_log_ptrs( 1345 struct xfs_btree_cur *cur, /* btree cursor */ 1346 struct xfs_buf *bp, /* buffer containing btree block */ 1347 int first, /* index of first pointer to log */ 1348 int last) /* index of last pointer to log */ 1349 { 1350 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1351 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1352 1353 if (bp) { 1354 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1355 int level = xfs_btree_get_level(block); 1356 1357 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1358 xfs_trans_log_buf(cur->bc_tp, bp, 1359 xfs_btree_ptr_offset(cur, first, level), 1360 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1361 } else { 1362 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1363 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1364 } 1365 1366 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1367 } 1368 1369 /* 1370 * Log fields from a btree block header. 1371 */ 1372 void 1373 xfs_btree_log_block( 1374 struct xfs_btree_cur *cur, /* btree cursor */ 1375 struct xfs_buf *bp, /* buffer containing btree block */ 1376 int fields) /* mask of fields: XFS_BB_... */ 1377 { 1378 int first; /* first byte offset logged */ 1379 int last; /* last byte offset logged */ 1380 static const short soffsets[] = { /* table of offsets (short) */ 1381 offsetof(struct xfs_btree_block, bb_magic), 1382 offsetof(struct xfs_btree_block, bb_level), 1383 offsetof(struct xfs_btree_block, bb_numrecs), 1384 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1385 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1386 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1387 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1388 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1389 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1390 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1391 XFS_BTREE_SBLOCK_CRC_LEN 1392 }; 1393 static const short loffsets[] = { /* table of offsets (long) */ 1394 offsetof(struct xfs_btree_block, bb_magic), 1395 offsetof(struct xfs_btree_block, bb_level), 1396 offsetof(struct xfs_btree_block, bb_numrecs), 1397 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1398 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1399 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1400 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1401 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1402 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1403 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1404 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1405 XFS_BTREE_LBLOCK_CRC_LEN 1406 }; 1407 1408 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1409 XFS_BTREE_TRACE_ARGBI(cur, bp, fields); 1410 1411 if (bp) { 1412 int nbits; 1413 1414 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1415 /* 1416 * We don't log the CRC when updating a btree 1417 * block but instead recreate it during log 1418 * recovery. As the log buffers have checksums 1419 * of their own this is safe and avoids logging a crc 1420 * update in a lot of places. 1421 */ 1422 if (fields == XFS_BB_ALL_BITS) 1423 fields = XFS_BB_ALL_BITS_CRC; 1424 nbits = XFS_BB_NUM_BITS_CRC; 1425 } else { 1426 nbits = XFS_BB_NUM_BITS; 1427 } 1428 xfs_btree_offsets(fields, 1429 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1430 loffsets : soffsets, 1431 nbits, &first, &last); 1432 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1433 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1434 } else { 1435 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1436 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1437 } 1438 1439 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1440 } 1441 1442 /* 1443 * Increment cursor by one record at the level. 1444 * For nonzero levels the leaf-ward information is untouched. 1445 */ 1446 int /* error */ 1447 xfs_btree_increment( 1448 struct xfs_btree_cur *cur, 1449 int level, 1450 int *stat) /* success/failure */ 1451 { 1452 struct xfs_btree_block *block; 1453 union xfs_btree_ptr ptr; 1454 struct xfs_buf *bp; 1455 int error; /* error return value */ 1456 int lev; 1457 1458 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1459 XFS_BTREE_TRACE_ARGI(cur, level); 1460 1461 ASSERT(level < cur->bc_nlevels); 1462 1463 /* Read-ahead to the right at this level. */ 1464 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1465 1466 /* Get a pointer to the btree block. */ 1467 block = xfs_btree_get_block(cur, level, &bp); 1468 1469 #ifdef DEBUG 1470 error = xfs_btree_check_block(cur, block, level, bp); 1471 if (error) 1472 goto error0; 1473 #endif 1474 1475 /* We're done if we remain in the block after the increment. */ 1476 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) 1477 goto out1; 1478 1479 /* Fail if we just went off the right edge of the tree. */ 1480 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1481 if (xfs_btree_ptr_is_null(cur, &ptr)) 1482 goto out0; 1483 1484 XFS_BTREE_STATS_INC(cur, increment); 1485 1486 /* 1487 * March up the tree incrementing pointers. 1488 * Stop when we don't go off the right edge of a block. 1489 */ 1490 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1491 block = xfs_btree_get_block(cur, lev, &bp); 1492 1493 #ifdef DEBUG 1494 error = xfs_btree_check_block(cur, block, lev, bp); 1495 if (error) 1496 goto error0; 1497 #endif 1498 1499 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) 1500 break; 1501 1502 /* Read-ahead the right block for the next loop. */ 1503 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1504 } 1505 1506 /* 1507 * If we went off the root then we are either seriously 1508 * confused or have the tree root in an inode. 1509 */ 1510 if (lev == cur->bc_nlevels) { 1511 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1512 goto out0; 1513 ASSERT(0); 1514 error = -EFSCORRUPTED; 1515 goto error0; 1516 } 1517 ASSERT(lev < cur->bc_nlevels); 1518 1519 /* 1520 * Now walk back down the tree, fixing up the cursor's buffer 1521 * pointers and key numbers. 1522 */ 1523 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1524 union xfs_btree_ptr *ptrp; 1525 1526 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1527 --lev; 1528 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1529 if (error) 1530 goto error0; 1531 1532 xfs_btree_setbuf(cur, lev, bp); 1533 cur->bc_ptrs[lev] = 1; 1534 } 1535 out1: 1536 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1537 *stat = 1; 1538 return 0; 1539 1540 out0: 1541 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1542 *stat = 0; 1543 return 0; 1544 1545 error0: 1546 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1547 return error; 1548 } 1549 1550 /* 1551 * Decrement cursor by one record at the level. 1552 * For nonzero levels the leaf-ward information is untouched. 1553 */ 1554 int /* error */ 1555 xfs_btree_decrement( 1556 struct xfs_btree_cur *cur, 1557 int level, 1558 int *stat) /* success/failure */ 1559 { 1560 struct xfs_btree_block *block; 1561 xfs_buf_t *bp; 1562 int error; /* error return value */ 1563 int lev; 1564 union xfs_btree_ptr ptr; 1565 1566 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1567 XFS_BTREE_TRACE_ARGI(cur, level); 1568 1569 ASSERT(level < cur->bc_nlevels); 1570 1571 /* Read-ahead to the left at this level. */ 1572 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1573 1574 /* We're done if we remain in the block after the decrement. */ 1575 if (--cur->bc_ptrs[level] > 0) 1576 goto out1; 1577 1578 /* Get a pointer to the btree block. */ 1579 block = xfs_btree_get_block(cur, level, &bp); 1580 1581 #ifdef DEBUG 1582 error = xfs_btree_check_block(cur, block, level, bp); 1583 if (error) 1584 goto error0; 1585 #endif 1586 1587 /* Fail if we just went off the left edge of the tree. */ 1588 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1589 if (xfs_btree_ptr_is_null(cur, &ptr)) 1590 goto out0; 1591 1592 XFS_BTREE_STATS_INC(cur, decrement); 1593 1594 /* 1595 * March up the tree decrementing pointers. 1596 * Stop when we don't go off the left edge of a block. 1597 */ 1598 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1599 if (--cur->bc_ptrs[lev] > 0) 1600 break; 1601 /* Read-ahead the left block for the next loop. */ 1602 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1603 } 1604 1605 /* 1606 * If we went off the root then we are seriously confused. 1607 * or the root of the tree is in an inode. 1608 */ 1609 if (lev == cur->bc_nlevels) { 1610 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1611 goto out0; 1612 ASSERT(0); 1613 error = -EFSCORRUPTED; 1614 goto error0; 1615 } 1616 ASSERT(lev < cur->bc_nlevels); 1617 1618 /* 1619 * Now walk back down the tree, fixing up the cursor's buffer 1620 * pointers and key numbers. 1621 */ 1622 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1623 union xfs_btree_ptr *ptrp; 1624 1625 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1626 --lev; 1627 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1628 if (error) 1629 goto error0; 1630 xfs_btree_setbuf(cur, lev, bp); 1631 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); 1632 } 1633 out1: 1634 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1635 *stat = 1; 1636 return 0; 1637 1638 out0: 1639 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1640 *stat = 0; 1641 return 0; 1642 1643 error0: 1644 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1645 return error; 1646 } 1647 1648 STATIC int 1649 xfs_btree_lookup_get_block( 1650 struct xfs_btree_cur *cur, /* btree cursor */ 1651 int level, /* level in the btree */ 1652 union xfs_btree_ptr *pp, /* ptr to btree block */ 1653 struct xfs_btree_block **blkp) /* return btree block */ 1654 { 1655 struct xfs_buf *bp; /* buffer pointer for btree block */ 1656 int error = 0; 1657 1658 /* special case the root block if in an inode */ 1659 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1660 (level == cur->bc_nlevels - 1)) { 1661 *blkp = xfs_btree_get_iroot(cur); 1662 return 0; 1663 } 1664 1665 /* 1666 * If the old buffer at this level for the disk address we are 1667 * looking for re-use it. 1668 * 1669 * Otherwise throw it away and get a new one. 1670 */ 1671 bp = cur->bc_bufs[level]; 1672 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { 1673 *blkp = XFS_BUF_TO_BLOCK(bp); 1674 return 0; 1675 } 1676 1677 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1678 if (error) 1679 return error; 1680 1681 xfs_btree_setbuf(cur, level, bp); 1682 return 0; 1683 } 1684 1685 /* 1686 * Get current search key. For level 0 we don't actually have a key 1687 * structure so we make one up from the record. For all other levels 1688 * we just return the right key. 1689 */ 1690 STATIC union xfs_btree_key * 1691 xfs_lookup_get_search_key( 1692 struct xfs_btree_cur *cur, 1693 int level, 1694 int keyno, 1695 struct xfs_btree_block *block, 1696 union xfs_btree_key *kp) 1697 { 1698 if (level == 0) { 1699 cur->bc_ops->init_key_from_rec(kp, 1700 xfs_btree_rec_addr(cur, keyno, block)); 1701 return kp; 1702 } 1703 1704 return xfs_btree_key_addr(cur, keyno, block); 1705 } 1706 1707 /* 1708 * Lookup the record. The cursor is made to point to it, based on dir. 1709 * stat is set to 0 if can't find any such record, 1 for success. 1710 */ 1711 int /* error */ 1712 xfs_btree_lookup( 1713 struct xfs_btree_cur *cur, /* btree cursor */ 1714 xfs_lookup_t dir, /* <=, ==, or >= */ 1715 int *stat) /* success/failure */ 1716 { 1717 struct xfs_btree_block *block; /* current btree block */ 1718 __int64_t diff; /* difference for the current key */ 1719 int error; /* error return value */ 1720 int keyno; /* current key number */ 1721 int level; /* level in the btree */ 1722 union xfs_btree_ptr *pp; /* ptr to btree block */ 1723 union xfs_btree_ptr ptr; /* ptr to btree block */ 1724 1725 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1726 XFS_BTREE_TRACE_ARGI(cur, dir); 1727 1728 XFS_BTREE_STATS_INC(cur, lookup); 1729 1730 block = NULL; 1731 keyno = 0; 1732 1733 /* initialise start pointer from cursor */ 1734 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1735 pp = &ptr; 1736 1737 /* 1738 * Iterate over each level in the btree, starting at the root. 1739 * For each level above the leaves, find the key we need, based 1740 * on the lookup record, then follow the corresponding block 1741 * pointer down to the next level. 1742 */ 1743 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1744 /* Get the block we need to do the lookup on. */ 1745 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1746 if (error) 1747 goto error0; 1748 1749 if (diff == 0) { 1750 /* 1751 * If we already had a key match at a higher level, we 1752 * know we need to use the first entry in this block. 1753 */ 1754 keyno = 1; 1755 } else { 1756 /* Otherwise search this block. Do a binary search. */ 1757 1758 int high; /* high entry number */ 1759 int low; /* low entry number */ 1760 1761 /* Set low and high entry numbers, 1-based. */ 1762 low = 1; 1763 high = xfs_btree_get_numrecs(block); 1764 if (!high) { 1765 /* Block is empty, must be an empty leaf. */ 1766 ASSERT(level == 0 && cur->bc_nlevels == 1); 1767 1768 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; 1769 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1770 *stat = 0; 1771 return 0; 1772 } 1773 1774 /* Binary search the block. */ 1775 while (low <= high) { 1776 union xfs_btree_key key; 1777 union xfs_btree_key *kp; 1778 1779 XFS_BTREE_STATS_INC(cur, compare); 1780 1781 /* keyno is average of low and high. */ 1782 keyno = (low + high) >> 1; 1783 1784 /* Get current search key */ 1785 kp = xfs_lookup_get_search_key(cur, level, 1786 keyno, block, &key); 1787 1788 /* 1789 * Compute difference to get next direction: 1790 * - less than, move right 1791 * - greater than, move left 1792 * - equal, we're done 1793 */ 1794 diff = cur->bc_ops->key_diff(cur, kp); 1795 if (diff < 0) 1796 low = keyno + 1; 1797 else if (diff > 0) 1798 high = keyno - 1; 1799 else 1800 break; 1801 } 1802 } 1803 1804 /* 1805 * If there are more levels, set up for the next level 1806 * by getting the block number and filling in the cursor. 1807 */ 1808 if (level > 0) { 1809 /* 1810 * If we moved left, need the previous key number, 1811 * unless there isn't one. 1812 */ 1813 if (diff > 0 && --keyno < 1) 1814 keyno = 1; 1815 pp = xfs_btree_ptr_addr(cur, keyno, block); 1816 1817 #ifdef DEBUG 1818 error = xfs_btree_check_ptr(cur, pp, 0, level); 1819 if (error) 1820 goto error0; 1821 #endif 1822 cur->bc_ptrs[level] = keyno; 1823 } 1824 } 1825 1826 /* Done with the search. See if we need to adjust the results. */ 1827 if (dir != XFS_LOOKUP_LE && diff < 0) { 1828 keyno++; 1829 /* 1830 * If ge search and we went off the end of the block, but it's 1831 * not the last block, we're in the wrong block. 1832 */ 1833 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1834 if (dir == XFS_LOOKUP_GE && 1835 keyno > xfs_btree_get_numrecs(block) && 1836 !xfs_btree_ptr_is_null(cur, &ptr)) { 1837 int i; 1838 1839 cur->bc_ptrs[0] = keyno; 1840 error = xfs_btree_increment(cur, 0, &i); 1841 if (error) 1842 goto error0; 1843 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); 1844 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1845 *stat = 1; 1846 return 0; 1847 } 1848 } else if (dir == XFS_LOOKUP_LE && diff > 0) 1849 keyno--; 1850 cur->bc_ptrs[0] = keyno; 1851 1852 /* Return if we succeeded or not. */ 1853 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 1854 *stat = 0; 1855 else if (dir != XFS_LOOKUP_EQ || diff == 0) 1856 *stat = 1; 1857 else 1858 *stat = 0; 1859 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1860 return 0; 1861 1862 error0: 1863 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1864 return error; 1865 } 1866 1867 /* 1868 * Update keys at all levels from here to the root along the cursor's path. 1869 */ 1870 STATIC int 1871 xfs_btree_updkey( 1872 struct xfs_btree_cur *cur, 1873 union xfs_btree_key *keyp, 1874 int level) 1875 { 1876 struct xfs_btree_block *block; 1877 struct xfs_buf *bp; 1878 union xfs_btree_key *kp; 1879 int ptr; 1880 1881 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1882 XFS_BTREE_TRACE_ARGIK(cur, level, keyp); 1883 1884 ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1); 1885 1886 /* 1887 * Go up the tree from this level toward the root. 1888 * At each level, update the key value to the value input. 1889 * Stop when we reach a level where the cursor isn't pointing 1890 * at the first entry in the block. 1891 */ 1892 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 1893 #ifdef DEBUG 1894 int error; 1895 #endif 1896 block = xfs_btree_get_block(cur, level, &bp); 1897 #ifdef DEBUG 1898 error = xfs_btree_check_block(cur, block, level, bp); 1899 if (error) { 1900 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1901 return error; 1902 } 1903 #endif 1904 ptr = cur->bc_ptrs[level]; 1905 kp = xfs_btree_key_addr(cur, ptr, block); 1906 xfs_btree_copy_keys(cur, kp, keyp, 1); 1907 xfs_btree_log_keys(cur, bp, ptr, ptr); 1908 } 1909 1910 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1911 return 0; 1912 } 1913 1914 /* 1915 * Update the record referred to by cur to the value in the 1916 * given record. This either works (return 0) or gets an 1917 * EFSCORRUPTED error. 1918 */ 1919 int 1920 xfs_btree_update( 1921 struct xfs_btree_cur *cur, 1922 union xfs_btree_rec *rec) 1923 { 1924 struct xfs_btree_block *block; 1925 struct xfs_buf *bp; 1926 int error; 1927 int ptr; 1928 union xfs_btree_rec *rp; 1929 1930 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1931 XFS_BTREE_TRACE_ARGR(cur, rec); 1932 1933 /* Pick up the current block. */ 1934 block = xfs_btree_get_block(cur, 0, &bp); 1935 1936 #ifdef DEBUG 1937 error = xfs_btree_check_block(cur, block, 0, bp); 1938 if (error) 1939 goto error0; 1940 #endif 1941 /* Get the address of the rec to be updated. */ 1942 ptr = cur->bc_ptrs[0]; 1943 rp = xfs_btree_rec_addr(cur, ptr, block); 1944 1945 /* Fill in the new contents and log them. */ 1946 xfs_btree_copy_recs(cur, rp, rec, 1); 1947 xfs_btree_log_recs(cur, bp, ptr, ptr); 1948 1949 /* 1950 * If we are tracking the last record in the tree and 1951 * we are at the far right edge of the tree, update it. 1952 */ 1953 if (xfs_btree_is_lastrec(cur, block, 0)) { 1954 cur->bc_ops->update_lastrec(cur, block, rec, 1955 ptr, LASTREC_UPDATE); 1956 } 1957 1958 /* Updating first rec in leaf. Pass new key value up to our parent. */ 1959 if (ptr == 1) { 1960 union xfs_btree_key key; 1961 1962 cur->bc_ops->init_key_from_rec(&key, rec); 1963 error = xfs_btree_updkey(cur, &key, 1); 1964 if (error) 1965 goto error0; 1966 } 1967 1968 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1969 return 0; 1970 1971 error0: 1972 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1973 return error; 1974 } 1975 1976 /* 1977 * Move 1 record left from cur/level if possible. 1978 * Update cur to reflect the new path. 1979 */ 1980 STATIC int /* error */ 1981 xfs_btree_lshift( 1982 struct xfs_btree_cur *cur, 1983 int level, 1984 int *stat) /* success/failure */ 1985 { 1986 union xfs_btree_key key; /* btree key */ 1987 struct xfs_buf *lbp; /* left buffer pointer */ 1988 struct xfs_btree_block *left; /* left btree block */ 1989 int lrecs; /* left record count */ 1990 struct xfs_buf *rbp; /* right buffer pointer */ 1991 struct xfs_btree_block *right; /* right btree block */ 1992 int rrecs; /* right record count */ 1993 union xfs_btree_ptr lptr; /* left btree pointer */ 1994 union xfs_btree_key *rkp = NULL; /* right btree key */ 1995 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 1996 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 1997 int error; /* error return value */ 1998 1999 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2000 XFS_BTREE_TRACE_ARGI(cur, level); 2001 2002 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2003 level == cur->bc_nlevels - 1) 2004 goto out0; 2005 2006 /* Set up variables for this block as "right". */ 2007 right = xfs_btree_get_block(cur, level, &rbp); 2008 2009 #ifdef DEBUG 2010 error = xfs_btree_check_block(cur, right, level, rbp); 2011 if (error) 2012 goto error0; 2013 #endif 2014 2015 /* If we've got no left sibling then we can't shift an entry left. */ 2016 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2017 if (xfs_btree_ptr_is_null(cur, &lptr)) 2018 goto out0; 2019 2020 /* 2021 * If the cursor entry is the one that would be moved, don't 2022 * do it... it's too complicated. 2023 */ 2024 if (cur->bc_ptrs[level] <= 1) 2025 goto out0; 2026 2027 /* Set up the left neighbor as "left". */ 2028 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2029 if (error) 2030 goto error0; 2031 2032 /* If it's full, it can't take another entry. */ 2033 lrecs = xfs_btree_get_numrecs(left); 2034 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2035 goto out0; 2036 2037 rrecs = xfs_btree_get_numrecs(right); 2038 2039 /* 2040 * We add one entry to the left side and remove one for the right side. 2041 * Account for it here, the changes will be updated on disk and logged 2042 * later. 2043 */ 2044 lrecs++; 2045 rrecs--; 2046 2047 XFS_BTREE_STATS_INC(cur, lshift); 2048 XFS_BTREE_STATS_ADD(cur, moves, 1); 2049 2050 /* 2051 * If non-leaf, copy a key and a ptr to the left block. 2052 * Log the changes to the left block. 2053 */ 2054 if (level > 0) { 2055 /* It's a non-leaf. Move keys and pointers. */ 2056 union xfs_btree_key *lkp; /* left btree key */ 2057 union xfs_btree_ptr *lpp; /* left address pointer */ 2058 2059 lkp = xfs_btree_key_addr(cur, lrecs, left); 2060 rkp = xfs_btree_key_addr(cur, 1, right); 2061 2062 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2063 rpp = xfs_btree_ptr_addr(cur, 1, right); 2064 #ifdef DEBUG 2065 error = xfs_btree_check_ptr(cur, rpp, 0, level); 2066 if (error) 2067 goto error0; 2068 #endif 2069 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2070 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2071 2072 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2073 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2074 2075 ASSERT(cur->bc_ops->keys_inorder(cur, 2076 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2077 } else { 2078 /* It's a leaf. Move records. */ 2079 union xfs_btree_rec *lrp; /* left record pointer */ 2080 2081 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2082 rrp = xfs_btree_rec_addr(cur, 1, right); 2083 2084 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2085 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2086 2087 ASSERT(cur->bc_ops->recs_inorder(cur, 2088 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2089 } 2090 2091 xfs_btree_set_numrecs(left, lrecs); 2092 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2093 2094 xfs_btree_set_numrecs(right, rrecs); 2095 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2096 2097 /* 2098 * Slide the contents of right down one entry. 2099 */ 2100 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2101 if (level > 0) { 2102 /* It's a nonleaf. operate on keys and ptrs */ 2103 #ifdef DEBUG 2104 int i; /* loop index */ 2105 2106 for (i = 0; i < rrecs; i++) { 2107 error = xfs_btree_check_ptr(cur, rpp, i + 1, level); 2108 if (error) 2109 goto error0; 2110 } 2111 #endif 2112 xfs_btree_shift_keys(cur, 2113 xfs_btree_key_addr(cur, 2, right), 2114 -1, rrecs); 2115 xfs_btree_shift_ptrs(cur, 2116 xfs_btree_ptr_addr(cur, 2, right), 2117 -1, rrecs); 2118 2119 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2120 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2121 } else { 2122 /* It's a leaf. operate on records */ 2123 xfs_btree_shift_recs(cur, 2124 xfs_btree_rec_addr(cur, 2, right), 2125 -1, rrecs); 2126 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2127 2128 /* 2129 * If it's the first record in the block, we'll need a key 2130 * structure to pass up to the next level (updkey). 2131 */ 2132 cur->bc_ops->init_key_from_rec(&key, 2133 xfs_btree_rec_addr(cur, 1, right)); 2134 rkp = &key; 2135 } 2136 2137 /* Update the parent key values of right. */ 2138 error = xfs_btree_updkey(cur, rkp, level + 1); 2139 if (error) 2140 goto error0; 2141 2142 /* Slide the cursor value left one. */ 2143 cur->bc_ptrs[level]--; 2144 2145 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2146 *stat = 1; 2147 return 0; 2148 2149 out0: 2150 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2151 *stat = 0; 2152 return 0; 2153 2154 error0: 2155 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2156 return error; 2157 } 2158 2159 /* 2160 * Move 1 record right from cur/level if possible. 2161 * Update cur to reflect the new path. 2162 */ 2163 STATIC int /* error */ 2164 xfs_btree_rshift( 2165 struct xfs_btree_cur *cur, 2166 int level, 2167 int *stat) /* success/failure */ 2168 { 2169 union xfs_btree_key key; /* btree key */ 2170 struct xfs_buf *lbp; /* left buffer pointer */ 2171 struct xfs_btree_block *left; /* left btree block */ 2172 struct xfs_buf *rbp; /* right buffer pointer */ 2173 struct xfs_btree_block *right; /* right btree block */ 2174 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2175 union xfs_btree_ptr rptr; /* right block pointer */ 2176 union xfs_btree_key *rkp; /* right btree key */ 2177 int rrecs; /* right record count */ 2178 int lrecs; /* left record count */ 2179 int error; /* error return value */ 2180 int i; /* loop counter */ 2181 2182 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2183 XFS_BTREE_TRACE_ARGI(cur, level); 2184 2185 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2186 (level == cur->bc_nlevels - 1)) 2187 goto out0; 2188 2189 /* Set up variables for this block as "left". */ 2190 left = xfs_btree_get_block(cur, level, &lbp); 2191 2192 #ifdef DEBUG 2193 error = xfs_btree_check_block(cur, left, level, lbp); 2194 if (error) 2195 goto error0; 2196 #endif 2197 2198 /* If we've got no right sibling then we can't shift an entry right. */ 2199 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2200 if (xfs_btree_ptr_is_null(cur, &rptr)) 2201 goto out0; 2202 2203 /* 2204 * If the cursor entry is the one that would be moved, don't 2205 * do it... it's too complicated. 2206 */ 2207 lrecs = xfs_btree_get_numrecs(left); 2208 if (cur->bc_ptrs[level] >= lrecs) 2209 goto out0; 2210 2211 /* Set up the right neighbor as "right". */ 2212 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2213 if (error) 2214 goto error0; 2215 2216 /* If it's full, it can't take another entry. */ 2217 rrecs = xfs_btree_get_numrecs(right); 2218 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2219 goto out0; 2220 2221 XFS_BTREE_STATS_INC(cur, rshift); 2222 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2223 2224 /* 2225 * Make a hole at the start of the right neighbor block, then 2226 * copy the last left block entry to the hole. 2227 */ 2228 if (level > 0) { 2229 /* It's a nonleaf. make a hole in the keys and ptrs */ 2230 union xfs_btree_key *lkp; 2231 union xfs_btree_ptr *lpp; 2232 union xfs_btree_ptr *rpp; 2233 2234 lkp = xfs_btree_key_addr(cur, lrecs, left); 2235 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2236 rkp = xfs_btree_key_addr(cur, 1, right); 2237 rpp = xfs_btree_ptr_addr(cur, 1, right); 2238 2239 #ifdef DEBUG 2240 for (i = rrecs - 1; i >= 0; i--) { 2241 error = xfs_btree_check_ptr(cur, rpp, i, level); 2242 if (error) 2243 goto error0; 2244 } 2245 #endif 2246 2247 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2248 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2249 2250 #ifdef DEBUG 2251 error = xfs_btree_check_ptr(cur, lpp, 0, level); 2252 if (error) 2253 goto error0; 2254 #endif 2255 2256 /* Now put the new data in, and log it. */ 2257 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2258 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2259 2260 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2261 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2262 2263 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2264 xfs_btree_key_addr(cur, 2, right))); 2265 } else { 2266 /* It's a leaf. make a hole in the records */ 2267 union xfs_btree_rec *lrp; 2268 union xfs_btree_rec *rrp; 2269 2270 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2271 rrp = xfs_btree_rec_addr(cur, 1, right); 2272 2273 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2274 2275 /* Now put the new data in, and log it. */ 2276 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2277 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2278 2279 cur->bc_ops->init_key_from_rec(&key, rrp); 2280 rkp = &key; 2281 2282 ASSERT(cur->bc_ops->recs_inorder(cur, rrp, 2283 xfs_btree_rec_addr(cur, 2, right))); 2284 } 2285 2286 /* 2287 * Decrement and log left's numrecs, bump and log right's numrecs. 2288 */ 2289 xfs_btree_set_numrecs(left, --lrecs); 2290 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2291 2292 xfs_btree_set_numrecs(right, ++rrecs); 2293 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2294 2295 /* 2296 * Using a temporary cursor, update the parent key values of the 2297 * block on the right. 2298 */ 2299 error = xfs_btree_dup_cursor(cur, &tcur); 2300 if (error) 2301 goto error0; 2302 i = xfs_btree_lastrec(tcur, level); 2303 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 2304 2305 error = xfs_btree_increment(tcur, level, &i); 2306 if (error) 2307 goto error1; 2308 2309 error = xfs_btree_updkey(tcur, rkp, level + 1); 2310 if (error) 2311 goto error1; 2312 2313 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2314 2315 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2316 *stat = 1; 2317 return 0; 2318 2319 out0: 2320 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2321 *stat = 0; 2322 return 0; 2323 2324 error0: 2325 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2326 return error; 2327 2328 error1: 2329 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); 2330 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2331 return error; 2332 } 2333 2334 /* 2335 * Split cur/level block in half. 2336 * Return new block number and the key to its first 2337 * record (to be inserted into parent). 2338 */ 2339 STATIC int /* error */ 2340 __xfs_btree_split( 2341 struct xfs_btree_cur *cur, 2342 int level, 2343 union xfs_btree_ptr *ptrp, 2344 union xfs_btree_key *key, 2345 struct xfs_btree_cur **curp, 2346 int *stat) /* success/failure */ 2347 { 2348 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2349 struct xfs_buf *lbp; /* left buffer pointer */ 2350 struct xfs_btree_block *left; /* left btree block */ 2351 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2352 struct xfs_buf *rbp; /* right buffer pointer */ 2353 struct xfs_btree_block *right; /* right btree block */ 2354 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2355 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2356 struct xfs_btree_block *rrblock; /* right-right btree block */ 2357 int lrecs; 2358 int rrecs; 2359 int src_index; 2360 int error; /* error return value */ 2361 #ifdef DEBUG 2362 int i; 2363 #endif 2364 2365 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2366 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); 2367 2368 XFS_BTREE_STATS_INC(cur, split); 2369 2370 /* Set up left block (current one). */ 2371 left = xfs_btree_get_block(cur, level, &lbp); 2372 2373 #ifdef DEBUG 2374 error = xfs_btree_check_block(cur, left, level, lbp); 2375 if (error) 2376 goto error0; 2377 #endif 2378 2379 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2380 2381 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2382 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2383 if (error) 2384 goto error0; 2385 if (*stat == 0) 2386 goto out0; 2387 XFS_BTREE_STATS_INC(cur, alloc); 2388 2389 /* Set up the new block as "right". */ 2390 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); 2391 if (error) 2392 goto error0; 2393 2394 /* Fill in the btree header for the new right block. */ 2395 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2396 2397 /* 2398 * Split the entries between the old and the new block evenly. 2399 * Make sure that if there's an odd number of entries now, that 2400 * each new block will have the same number of entries. 2401 */ 2402 lrecs = xfs_btree_get_numrecs(left); 2403 rrecs = lrecs / 2; 2404 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) 2405 rrecs++; 2406 src_index = (lrecs - rrecs + 1); 2407 2408 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2409 2410 /* 2411 * Copy btree block entries from the left block over to the 2412 * new block, the right. Update the right block and log the 2413 * changes. 2414 */ 2415 if (level > 0) { 2416 /* It's a non-leaf. Move keys and pointers. */ 2417 union xfs_btree_key *lkp; /* left btree key */ 2418 union xfs_btree_ptr *lpp; /* left address pointer */ 2419 union xfs_btree_key *rkp; /* right btree key */ 2420 union xfs_btree_ptr *rpp; /* right address pointer */ 2421 2422 lkp = xfs_btree_key_addr(cur, src_index, left); 2423 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2424 rkp = xfs_btree_key_addr(cur, 1, right); 2425 rpp = xfs_btree_ptr_addr(cur, 1, right); 2426 2427 #ifdef DEBUG 2428 for (i = src_index; i < rrecs; i++) { 2429 error = xfs_btree_check_ptr(cur, lpp, i, level); 2430 if (error) 2431 goto error0; 2432 } 2433 #endif 2434 2435 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2436 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2437 2438 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2439 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2440 2441 /* Grab the keys to the entries moved to the right block */ 2442 xfs_btree_copy_keys(cur, key, rkp, 1); 2443 } else { 2444 /* It's a leaf. Move records. */ 2445 union xfs_btree_rec *lrp; /* left record pointer */ 2446 union xfs_btree_rec *rrp; /* right record pointer */ 2447 2448 lrp = xfs_btree_rec_addr(cur, src_index, left); 2449 rrp = xfs_btree_rec_addr(cur, 1, right); 2450 2451 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2452 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2453 2454 cur->bc_ops->init_key_from_rec(key, 2455 xfs_btree_rec_addr(cur, 1, right)); 2456 } 2457 2458 2459 /* 2460 * Find the left block number by looking in the buffer. 2461 * Adjust numrecs, sibling pointers. 2462 */ 2463 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2464 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2465 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2466 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2467 2468 lrecs -= rrecs; 2469 xfs_btree_set_numrecs(left, lrecs); 2470 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2471 2472 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2473 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2474 2475 /* 2476 * If there's a block to the new block's right, make that block 2477 * point back to right instead of to left. 2478 */ 2479 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2480 error = xfs_btree_read_buf_block(cur, &rrptr, 2481 0, &rrblock, &rrbp); 2482 if (error) 2483 goto error0; 2484 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2485 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2486 } 2487 /* 2488 * If the cursor is really in the right block, move it there. 2489 * If it's just pointing past the last entry in left, then we'll 2490 * insert there, so don't change anything in that case. 2491 */ 2492 if (cur->bc_ptrs[level] > lrecs + 1) { 2493 xfs_btree_setbuf(cur, level, rbp); 2494 cur->bc_ptrs[level] -= lrecs; 2495 } 2496 /* 2497 * If there are more levels, we'll need another cursor which refers 2498 * the right block, no matter where this cursor was. 2499 */ 2500 if (level + 1 < cur->bc_nlevels) { 2501 error = xfs_btree_dup_cursor(cur, curp); 2502 if (error) 2503 goto error0; 2504 (*curp)->bc_ptrs[level + 1]++; 2505 } 2506 *ptrp = rptr; 2507 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2508 *stat = 1; 2509 return 0; 2510 out0: 2511 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2512 *stat = 0; 2513 return 0; 2514 2515 error0: 2516 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2517 return error; 2518 } 2519 2520 struct xfs_btree_split_args { 2521 struct xfs_btree_cur *cur; 2522 int level; 2523 union xfs_btree_ptr *ptrp; 2524 union xfs_btree_key *key; 2525 struct xfs_btree_cur **curp; 2526 int *stat; /* success/failure */ 2527 int result; 2528 bool kswapd; /* allocation in kswapd context */ 2529 struct completion *done; 2530 struct work_struct work; 2531 }; 2532 2533 /* 2534 * Stack switching interfaces for allocation 2535 */ 2536 static void 2537 xfs_btree_split_worker( 2538 struct work_struct *work) 2539 { 2540 struct xfs_btree_split_args *args = container_of(work, 2541 struct xfs_btree_split_args, work); 2542 unsigned long pflags; 2543 unsigned long new_pflags = PF_FSTRANS; 2544 2545 /* 2546 * we are in a transaction context here, but may also be doing work 2547 * in kswapd context, and hence we may need to inherit that state 2548 * temporarily to ensure that we don't block waiting for memory reclaim 2549 * in any way. 2550 */ 2551 if (args->kswapd) 2552 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2553 2554 current_set_flags_nested(&pflags, new_pflags); 2555 2556 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2557 args->key, args->curp, args->stat); 2558 complete(args->done); 2559 2560 current_restore_flags_nested(&pflags, new_pflags); 2561 } 2562 2563 /* 2564 * BMBT split requests often come in with little stack to work on. Push 2565 * them off to a worker thread so there is lots of stack to use. For the other 2566 * btree types, just call directly to avoid the context switch overhead here. 2567 */ 2568 STATIC int /* error */ 2569 xfs_btree_split( 2570 struct xfs_btree_cur *cur, 2571 int level, 2572 union xfs_btree_ptr *ptrp, 2573 union xfs_btree_key *key, 2574 struct xfs_btree_cur **curp, 2575 int *stat) /* success/failure */ 2576 { 2577 struct xfs_btree_split_args args; 2578 DECLARE_COMPLETION_ONSTACK(done); 2579 2580 if (cur->bc_btnum != XFS_BTNUM_BMAP) 2581 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2582 2583 args.cur = cur; 2584 args.level = level; 2585 args.ptrp = ptrp; 2586 args.key = key; 2587 args.curp = curp; 2588 args.stat = stat; 2589 args.done = &done; 2590 args.kswapd = current_is_kswapd(); 2591 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2592 queue_work(xfs_alloc_wq, &args.work); 2593 wait_for_completion(&done); 2594 destroy_work_on_stack(&args.work); 2595 return args.result; 2596 } 2597 2598 2599 /* 2600 * Copy the old inode root contents into a real block and make the 2601 * broot point to it. 2602 */ 2603 int /* error */ 2604 xfs_btree_new_iroot( 2605 struct xfs_btree_cur *cur, /* btree cursor */ 2606 int *logflags, /* logging flags for inode */ 2607 int *stat) /* return status - 0 fail */ 2608 { 2609 struct xfs_buf *cbp; /* buffer for cblock */ 2610 struct xfs_btree_block *block; /* btree block */ 2611 struct xfs_btree_block *cblock; /* child btree block */ 2612 union xfs_btree_key *ckp; /* child key pointer */ 2613 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2614 union xfs_btree_key *kp; /* pointer to btree key */ 2615 union xfs_btree_ptr *pp; /* pointer to block addr */ 2616 union xfs_btree_ptr nptr; /* new block addr */ 2617 int level; /* btree level */ 2618 int error; /* error return code */ 2619 #ifdef DEBUG 2620 int i; /* loop counter */ 2621 #endif 2622 2623 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2624 XFS_BTREE_STATS_INC(cur, newroot); 2625 2626 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2627 2628 level = cur->bc_nlevels - 1; 2629 2630 block = xfs_btree_get_iroot(cur); 2631 pp = xfs_btree_ptr_addr(cur, 1, block); 2632 2633 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2634 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 2635 if (error) 2636 goto error0; 2637 if (*stat == 0) { 2638 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2639 return 0; 2640 } 2641 XFS_BTREE_STATS_INC(cur, alloc); 2642 2643 /* Copy the root into a real block. */ 2644 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); 2645 if (error) 2646 goto error0; 2647 2648 /* 2649 * we can't just memcpy() the root in for CRC enabled btree blocks. 2650 * In that case have to also ensure the blkno remains correct 2651 */ 2652 memcpy(cblock, block, xfs_btree_block_len(cur)); 2653 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 2654 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 2655 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); 2656 else 2657 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); 2658 } 2659 2660 be16_add_cpu(&block->bb_level, 1); 2661 xfs_btree_set_numrecs(block, 1); 2662 cur->bc_nlevels++; 2663 cur->bc_ptrs[level + 1] = 1; 2664 2665 kp = xfs_btree_key_addr(cur, 1, block); 2666 ckp = xfs_btree_key_addr(cur, 1, cblock); 2667 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 2668 2669 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 2670 #ifdef DEBUG 2671 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 2672 error = xfs_btree_check_ptr(cur, pp, i, level); 2673 if (error) 2674 goto error0; 2675 } 2676 #endif 2677 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 2678 2679 #ifdef DEBUG 2680 error = xfs_btree_check_ptr(cur, &nptr, 0, level); 2681 if (error) 2682 goto error0; 2683 #endif 2684 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 2685 2686 xfs_iroot_realloc(cur->bc_private.b.ip, 2687 1 - xfs_btree_get_numrecs(cblock), 2688 cur->bc_private.b.whichfork); 2689 2690 xfs_btree_setbuf(cur, level, cbp); 2691 2692 /* 2693 * Do all this logging at the end so that 2694 * the root is at the right level. 2695 */ 2696 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 2697 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 2698 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 2699 2700 *logflags |= 2701 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); 2702 *stat = 1; 2703 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2704 return 0; 2705 error0: 2706 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2707 return error; 2708 } 2709 2710 /* 2711 * Allocate a new root block, fill it in. 2712 */ 2713 STATIC int /* error */ 2714 xfs_btree_new_root( 2715 struct xfs_btree_cur *cur, /* btree cursor */ 2716 int *stat) /* success/failure */ 2717 { 2718 struct xfs_btree_block *block; /* one half of the old root block */ 2719 struct xfs_buf *bp; /* buffer containing block */ 2720 int error; /* error return value */ 2721 struct xfs_buf *lbp; /* left buffer pointer */ 2722 struct xfs_btree_block *left; /* left btree block */ 2723 struct xfs_buf *nbp; /* new (root) buffer */ 2724 struct xfs_btree_block *new; /* new (root) btree block */ 2725 int nptr; /* new value for key index, 1 or 2 */ 2726 struct xfs_buf *rbp; /* right buffer pointer */ 2727 struct xfs_btree_block *right; /* right btree block */ 2728 union xfs_btree_ptr rptr; 2729 union xfs_btree_ptr lptr; 2730 2731 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2732 XFS_BTREE_STATS_INC(cur, newroot); 2733 2734 /* initialise our start point from the cursor */ 2735 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 2736 2737 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2738 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 2739 if (error) 2740 goto error0; 2741 if (*stat == 0) 2742 goto out0; 2743 XFS_BTREE_STATS_INC(cur, alloc); 2744 2745 /* Set up the new block. */ 2746 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); 2747 if (error) 2748 goto error0; 2749 2750 /* Set the root in the holding structure increasing the level by 1. */ 2751 cur->bc_ops->set_root(cur, &lptr, 1); 2752 2753 /* 2754 * At the previous root level there are now two blocks: the old root, 2755 * and the new block generated when it was split. We don't know which 2756 * one the cursor is pointing at, so we set up variables "left" and 2757 * "right" for each case. 2758 */ 2759 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 2760 2761 #ifdef DEBUG 2762 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 2763 if (error) 2764 goto error0; 2765 #endif 2766 2767 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 2768 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 2769 /* Our block is left, pick up the right block. */ 2770 lbp = bp; 2771 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2772 left = block; 2773 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2774 if (error) 2775 goto error0; 2776 bp = rbp; 2777 nptr = 1; 2778 } else { 2779 /* Our block is right, pick up the left block. */ 2780 rbp = bp; 2781 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 2782 right = block; 2783 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2784 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2785 if (error) 2786 goto error0; 2787 bp = lbp; 2788 nptr = 2; 2789 } 2790 /* Fill in the new block's btree header and log it. */ 2791 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 2792 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 2793 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 2794 !xfs_btree_ptr_is_null(cur, &rptr)); 2795 2796 /* Fill in the key data in the new root. */ 2797 if (xfs_btree_get_level(left) > 0) { 2798 xfs_btree_copy_keys(cur, 2799 xfs_btree_key_addr(cur, 1, new), 2800 xfs_btree_key_addr(cur, 1, left), 1); 2801 xfs_btree_copy_keys(cur, 2802 xfs_btree_key_addr(cur, 2, new), 2803 xfs_btree_key_addr(cur, 1, right), 1); 2804 } else { 2805 cur->bc_ops->init_key_from_rec( 2806 xfs_btree_key_addr(cur, 1, new), 2807 xfs_btree_rec_addr(cur, 1, left)); 2808 cur->bc_ops->init_key_from_rec( 2809 xfs_btree_key_addr(cur, 2, new), 2810 xfs_btree_rec_addr(cur, 1, right)); 2811 } 2812 xfs_btree_log_keys(cur, nbp, 1, 2); 2813 2814 /* Fill in the pointer data in the new root. */ 2815 xfs_btree_copy_ptrs(cur, 2816 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 2817 xfs_btree_copy_ptrs(cur, 2818 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 2819 xfs_btree_log_ptrs(cur, nbp, 1, 2); 2820 2821 /* Fix up the cursor. */ 2822 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 2823 cur->bc_ptrs[cur->bc_nlevels] = nptr; 2824 cur->bc_nlevels++; 2825 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2826 *stat = 1; 2827 return 0; 2828 error0: 2829 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2830 return error; 2831 out0: 2832 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2833 *stat = 0; 2834 return 0; 2835 } 2836 2837 STATIC int 2838 xfs_btree_make_block_unfull( 2839 struct xfs_btree_cur *cur, /* btree cursor */ 2840 int level, /* btree level */ 2841 int numrecs,/* # of recs in block */ 2842 int *oindex,/* old tree index */ 2843 int *index, /* new tree index */ 2844 union xfs_btree_ptr *nptr, /* new btree ptr */ 2845 struct xfs_btree_cur **ncur, /* new btree cursor */ 2846 union xfs_btree_rec *nrec, /* new record */ 2847 int *stat) 2848 { 2849 union xfs_btree_key key; /* new btree key value */ 2850 int error = 0; 2851 2852 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2853 level == cur->bc_nlevels - 1) { 2854 struct xfs_inode *ip = cur->bc_private.b.ip; 2855 2856 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 2857 /* A root block that can be made bigger. */ 2858 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 2859 } else { 2860 /* A root block that needs replacing */ 2861 int logflags = 0; 2862 2863 error = xfs_btree_new_iroot(cur, &logflags, stat); 2864 if (error || *stat == 0) 2865 return error; 2866 2867 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 2868 } 2869 2870 return 0; 2871 } 2872 2873 /* First, try shifting an entry to the right neighbor. */ 2874 error = xfs_btree_rshift(cur, level, stat); 2875 if (error || *stat) 2876 return error; 2877 2878 /* Next, try shifting an entry to the left neighbor. */ 2879 error = xfs_btree_lshift(cur, level, stat); 2880 if (error) 2881 return error; 2882 2883 if (*stat) { 2884 *oindex = *index = cur->bc_ptrs[level]; 2885 return 0; 2886 } 2887 2888 /* 2889 * Next, try splitting the current block in half. 2890 * 2891 * If this works we have to re-set our variables because we 2892 * could be in a different block now. 2893 */ 2894 error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); 2895 if (error || *stat == 0) 2896 return error; 2897 2898 2899 *index = cur->bc_ptrs[level]; 2900 cur->bc_ops->init_rec_from_key(&key, nrec); 2901 return 0; 2902 } 2903 2904 /* 2905 * Insert one record/level. Return information to the caller 2906 * allowing the next level up to proceed if necessary. 2907 */ 2908 STATIC int 2909 xfs_btree_insrec( 2910 struct xfs_btree_cur *cur, /* btree cursor */ 2911 int level, /* level to insert record at */ 2912 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 2913 union xfs_btree_rec *recp, /* i/o: record data inserted */ 2914 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 2915 int *stat) /* success/failure */ 2916 { 2917 struct xfs_btree_block *block; /* btree block */ 2918 struct xfs_buf *bp; /* buffer for block */ 2919 union xfs_btree_key key; /* btree key */ 2920 union xfs_btree_ptr nptr; /* new block ptr */ 2921 struct xfs_btree_cur *ncur; /* new btree cursor */ 2922 union xfs_btree_rec nrec; /* new record count */ 2923 int optr; /* old key/record index */ 2924 int ptr; /* key/record index */ 2925 int numrecs;/* number of records */ 2926 int error; /* error return value */ 2927 #ifdef DEBUG 2928 int i; 2929 #endif 2930 2931 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2932 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); 2933 2934 ncur = NULL; 2935 2936 /* 2937 * If we have an external root pointer, and we've made it to the 2938 * root level, allocate a new root block and we're done. 2939 */ 2940 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2941 (level >= cur->bc_nlevels)) { 2942 error = xfs_btree_new_root(cur, stat); 2943 xfs_btree_set_ptr_null(cur, ptrp); 2944 2945 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2946 return error; 2947 } 2948 2949 /* If we're off the left edge, return failure. */ 2950 ptr = cur->bc_ptrs[level]; 2951 if (ptr == 0) { 2952 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2953 *stat = 0; 2954 return 0; 2955 } 2956 2957 /* Make a key out of the record data to be inserted, and save it. */ 2958 cur->bc_ops->init_key_from_rec(&key, recp); 2959 2960 optr = ptr; 2961 2962 XFS_BTREE_STATS_INC(cur, insrec); 2963 2964 /* Get pointers to the btree buffer and block. */ 2965 block = xfs_btree_get_block(cur, level, &bp); 2966 numrecs = xfs_btree_get_numrecs(block); 2967 2968 #ifdef DEBUG 2969 error = xfs_btree_check_block(cur, block, level, bp); 2970 if (error) 2971 goto error0; 2972 2973 /* Check that the new entry is being inserted in the right place. */ 2974 if (ptr <= numrecs) { 2975 if (level == 0) { 2976 ASSERT(cur->bc_ops->recs_inorder(cur, recp, 2977 xfs_btree_rec_addr(cur, ptr, block))); 2978 } else { 2979 ASSERT(cur->bc_ops->keys_inorder(cur, &key, 2980 xfs_btree_key_addr(cur, ptr, block))); 2981 } 2982 } 2983 #endif 2984 2985 /* 2986 * If the block is full, we can't insert the new entry until we 2987 * make the block un-full. 2988 */ 2989 xfs_btree_set_ptr_null(cur, &nptr); 2990 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 2991 error = xfs_btree_make_block_unfull(cur, level, numrecs, 2992 &optr, &ptr, &nptr, &ncur, &nrec, stat); 2993 if (error || *stat == 0) 2994 goto error0; 2995 } 2996 2997 /* 2998 * The current block may have changed if the block was 2999 * previously full and we have just made space in it. 3000 */ 3001 block = xfs_btree_get_block(cur, level, &bp); 3002 numrecs = xfs_btree_get_numrecs(block); 3003 3004 #ifdef DEBUG 3005 error = xfs_btree_check_block(cur, block, level, bp); 3006 if (error) 3007 return error; 3008 #endif 3009 3010 /* 3011 * At this point we know there's room for our new entry in the block 3012 * we're pointing at. 3013 */ 3014 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3015 3016 if (level > 0) { 3017 /* It's a nonleaf. make a hole in the keys and ptrs */ 3018 union xfs_btree_key *kp; 3019 union xfs_btree_ptr *pp; 3020 3021 kp = xfs_btree_key_addr(cur, ptr, block); 3022 pp = xfs_btree_ptr_addr(cur, ptr, block); 3023 3024 #ifdef DEBUG 3025 for (i = numrecs - ptr; i >= 0; i--) { 3026 error = xfs_btree_check_ptr(cur, pp, i, level); 3027 if (error) 3028 return error; 3029 } 3030 #endif 3031 3032 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3033 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3034 3035 #ifdef DEBUG 3036 error = xfs_btree_check_ptr(cur, ptrp, 0, level); 3037 if (error) 3038 goto error0; 3039 #endif 3040 3041 /* Now put the new data in, bump numrecs and log it. */ 3042 xfs_btree_copy_keys(cur, kp, &key, 1); 3043 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3044 numrecs++; 3045 xfs_btree_set_numrecs(block, numrecs); 3046 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3047 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3048 #ifdef DEBUG 3049 if (ptr < numrecs) { 3050 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3051 xfs_btree_key_addr(cur, ptr + 1, block))); 3052 } 3053 #endif 3054 } else { 3055 /* It's a leaf. make a hole in the records */ 3056 union xfs_btree_rec *rp; 3057 3058 rp = xfs_btree_rec_addr(cur, ptr, block); 3059 3060 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3061 3062 /* Now put the new data in, bump numrecs and log it. */ 3063 xfs_btree_copy_recs(cur, rp, recp, 1); 3064 xfs_btree_set_numrecs(block, ++numrecs); 3065 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3066 #ifdef DEBUG 3067 if (ptr < numrecs) { 3068 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3069 xfs_btree_rec_addr(cur, ptr + 1, block))); 3070 } 3071 #endif 3072 } 3073 3074 /* Log the new number of records in the btree header. */ 3075 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3076 3077 /* If we inserted at the start of a block, update the parents' keys. */ 3078 if (optr == 1) { 3079 error = xfs_btree_updkey(cur, &key, level + 1); 3080 if (error) 3081 goto error0; 3082 } 3083 3084 /* 3085 * If we are tracking the last record in the tree and 3086 * we are at the far right edge of the tree, update it. 3087 */ 3088 if (xfs_btree_is_lastrec(cur, block, level)) { 3089 cur->bc_ops->update_lastrec(cur, block, recp, 3090 ptr, LASTREC_INSREC); 3091 } 3092 3093 /* 3094 * Return the new block number, if any. 3095 * If there is one, give back a record value and a cursor too. 3096 */ 3097 *ptrp = nptr; 3098 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3099 *recp = nrec; 3100 *curp = ncur; 3101 } 3102 3103 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3104 *stat = 1; 3105 return 0; 3106 3107 error0: 3108 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3109 return error; 3110 } 3111 3112 /* 3113 * Insert the record at the point referenced by cur. 3114 * 3115 * A multi-level split of the tree on insert will invalidate the original 3116 * cursor. All callers of this function should assume that the cursor is 3117 * no longer valid and revalidate it. 3118 */ 3119 int 3120 xfs_btree_insert( 3121 struct xfs_btree_cur *cur, 3122 int *stat) 3123 { 3124 int error; /* error return value */ 3125 int i; /* result value, 0 for failure */ 3126 int level; /* current level number in btree */ 3127 union xfs_btree_ptr nptr; /* new block number (split result) */ 3128 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3129 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3130 union xfs_btree_rec rec; /* record to insert */ 3131 3132 level = 0; 3133 ncur = NULL; 3134 pcur = cur; 3135 3136 xfs_btree_set_ptr_null(cur, &nptr); 3137 cur->bc_ops->init_rec_from_cur(cur, &rec); 3138 3139 /* 3140 * Loop going up the tree, starting at the leaf level. 3141 * Stop when we don't get a split block, that must mean that 3142 * the insert is finished with this level. 3143 */ 3144 do { 3145 /* 3146 * Insert nrec/nptr into this level of the tree. 3147 * Note if we fail, nptr will be null. 3148 */ 3149 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); 3150 if (error) { 3151 if (pcur != cur) 3152 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3153 goto error0; 3154 } 3155 3156 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3157 level++; 3158 3159 /* 3160 * See if the cursor we just used is trash. 3161 * Can't trash the caller's cursor, but otherwise we should 3162 * if ncur is a new cursor or we're about to be done. 3163 */ 3164 if (pcur != cur && 3165 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3166 /* Save the state from the cursor before we trash it */ 3167 if (cur->bc_ops->update_cursor) 3168 cur->bc_ops->update_cursor(pcur, cur); 3169 cur->bc_nlevels = pcur->bc_nlevels; 3170 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3171 } 3172 /* If we got a new cursor, switch to it. */ 3173 if (ncur) { 3174 pcur = ncur; 3175 ncur = NULL; 3176 } 3177 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3178 3179 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3180 *stat = i; 3181 return 0; 3182 error0: 3183 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3184 return error; 3185 } 3186 3187 /* 3188 * Try to merge a non-leaf block back into the inode root. 3189 * 3190 * Note: the killroot names comes from the fact that we're effectively 3191 * killing the old root block. But because we can't just delete the 3192 * inode we have to copy the single block it was pointing to into the 3193 * inode. 3194 */ 3195 STATIC int 3196 xfs_btree_kill_iroot( 3197 struct xfs_btree_cur *cur) 3198 { 3199 int whichfork = cur->bc_private.b.whichfork; 3200 struct xfs_inode *ip = cur->bc_private.b.ip; 3201 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3202 struct xfs_btree_block *block; 3203 struct xfs_btree_block *cblock; 3204 union xfs_btree_key *kp; 3205 union xfs_btree_key *ckp; 3206 union xfs_btree_ptr *pp; 3207 union xfs_btree_ptr *cpp; 3208 struct xfs_buf *cbp; 3209 int level; 3210 int index; 3211 int numrecs; 3212 #ifdef DEBUG 3213 union xfs_btree_ptr ptr; 3214 int i; 3215 #endif 3216 3217 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3218 3219 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3220 ASSERT(cur->bc_nlevels > 1); 3221 3222 /* 3223 * Don't deal with the root block needs to be a leaf case. 3224 * We're just going to turn the thing back into extents anyway. 3225 */ 3226 level = cur->bc_nlevels - 1; 3227 if (level == 1) 3228 goto out0; 3229 3230 /* 3231 * Give up if the root has multiple children. 3232 */ 3233 block = xfs_btree_get_iroot(cur); 3234 if (xfs_btree_get_numrecs(block) != 1) 3235 goto out0; 3236 3237 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3238 numrecs = xfs_btree_get_numrecs(cblock); 3239 3240 /* 3241 * Only do this if the next level will fit. 3242 * Then the data must be copied up to the inode, 3243 * instead of freeing the root you free the next level. 3244 */ 3245 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3246 goto out0; 3247 3248 XFS_BTREE_STATS_INC(cur, killroot); 3249 3250 #ifdef DEBUG 3251 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3252 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3253 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3254 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3255 #endif 3256 3257 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3258 if (index) { 3259 xfs_iroot_realloc(cur->bc_private.b.ip, index, 3260 cur->bc_private.b.whichfork); 3261 block = ifp->if_broot; 3262 } 3263 3264 be16_add_cpu(&block->bb_numrecs, index); 3265 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3266 3267 kp = xfs_btree_key_addr(cur, 1, block); 3268 ckp = xfs_btree_key_addr(cur, 1, cblock); 3269 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3270 3271 pp = xfs_btree_ptr_addr(cur, 1, block); 3272 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3273 #ifdef DEBUG 3274 for (i = 0; i < numrecs; i++) { 3275 int error; 3276 3277 error = xfs_btree_check_ptr(cur, cpp, i, level - 1); 3278 if (error) { 3279 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3280 return error; 3281 } 3282 } 3283 #endif 3284 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3285 3286 cur->bc_ops->free_block(cur, cbp); 3287 XFS_BTREE_STATS_INC(cur, free); 3288 3289 cur->bc_bufs[level - 1] = NULL; 3290 be16_add_cpu(&block->bb_level, -1); 3291 xfs_trans_log_inode(cur->bc_tp, ip, 3292 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 3293 cur->bc_nlevels--; 3294 out0: 3295 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3296 return 0; 3297 } 3298 3299 /* 3300 * Kill the current root node, and replace it with it's only child node. 3301 */ 3302 STATIC int 3303 xfs_btree_kill_root( 3304 struct xfs_btree_cur *cur, 3305 struct xfs_buf *bp, 3306 int level, 3307 union xfs_btree_ptr *newroot) 3308 { 3309 int error; 3310 3311 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3312 XFS_BTREE_STATS_INC(cur, killroot); 3313 3314 /* 3315 * Update the root pointer, decreasing the level by 1 and then 3316 * free the old root. 3317 */ 3318 cur->bc_ops->set_root(cur, newroot, -1); 3319 3320 error = cur->bc_ops->free_block(cur, bp); 3321 if (error) { 3322 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3323 return error; 3324 } 3325 3326 XFS_BTREE_STATS_INC(cur, free); 3327 3328 cur->bc_bufs[level] = NULL; 3329 cur->bc_ra[level] = 0; 3330 cur->bc_nlevels--; 3331 3332 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3333 return 0; 3334 } 3335 3336 STATIC int 3337 xfs_btree_dec_cursor( 3338 struct xfs_btree_cur *cur, 3339 int level, 3340 int *stat) 3341 { 3342 int error; 3343 int i; 3344 3345 if (level > 0) { 3346 error = xfs_btree_decrement(cur, level, &i); 3347 if (error) 3348 return error; 3349 } 3350 3351 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3352 *stat = 1; 3353 return 0; 3354 } 3355 3356 /* 3357 * Single level of the btree record deletion routine. 3358 * Delete record pointed to by cur/level. 3359 * Remove the record from its block then rebalance the tree. 3360 * Return 0 for error, 1 for done, 2 to go on to the next level. 3361 */ 3362 STATIC int /* error */ 3363 xfs_btree_delrec( 3364 struct xfs_btree_cur *cur, /* btree cursor */ 3365 int level, /* level removing record from */ 3366 int *stat) /* fail/done/go-on */ 3367 { 3368 struct xfs_btree_block *block; /* btree block */ 3369 union xfs_btree_ptr cptr; /* current block ptr */ 3370 struct xfs_buf *bp; /* buffer for block */ 3371 int error; /* error return value */ 3372 int i; /* loop counter */ 3373 union xfs_btree_key key; /* storage for keyp */ 3374 union xfs_btree_key *keyp = &key; /* passed to the next level */ 3375 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3376 struct xfs_buf *lbp; /* left buffer pointer */ 3377 struct xfs_btree_block *left; /* left btree block */ 3378 int lrecs = 0; /* left record count */ 3379 int ptr; /* key/record index */ 3380 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3381 struct xfs_buf *rbp; /* right buffer pointer */ 3382 struct xfs_btree_block *right; /* right btree block */ 3383 struct xfs_btree_block *rrblock; /* right-right btree block */ 3384 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3385 int rrecs = 0; /* right record count */ 3386 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3387 int numrecs; /* temporary numrec count */ 3388 3389 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3390 XFS_BTREE_TRACE_ARGI(cur, level); 3391 3392 tcur = NULL; 3393 3394 /* Get the index of the entry being deleted, check for nothing there. */ 3395 ptr = cur->bc_ptrs[level]; 3396 if (ptr == 0) { 3397 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3398 *stat = 0; 3399 return 0; 3400 } 3401 3402 /* Get the buffer & block containing the record or key/ptr. */ 3403 block = xfs_btree_get_block(cur, level, &bp); 3404 numrecs = xfs_btree_get_numrecs(block); 3405 3406 #ifdef DEBUG 3407 error = xfs_btree_check_block(cur, block, level, bp); 3408 if (error) 3409 goto error0; 3410 #endif 3411 3412 /* Fail if we're off the end of the block. */ 3413 if (ptr > numrecs) { 3414 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3415 *stat = 0; 3416 return 0; 3417 } 3418 3419 XFS_BTREE_STATS_INC(cur, delrec); 3420 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3421 3422 /* Excise the entries being deleted. */ 3423 if (level > 0) { 3424 /* It's a nonleaf. operate on keys and ptrs */ 3425 union xfs_btree_key *lkp; 3426 union xfs_btree_ptr *lpp; 3427 3428 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3429 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3430 3431 #ifdef DEBUG 3432 for (i = 0; i < numrecs - ptr; i++) { 3433 error = xfs_btree_check_ptr(cur, lpp, i, level); 3434 if (error) 3435 goto error0; 3436 } 3437 #endif 3438 3439 if (ptr < numrecs) { 3440 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3441 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3442 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3443 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3444 } 3445 3446 /* 3447 * If it's the first record in the block, we'll need to pass a 3448 * key up to the next level (updkey). 3449 */ 3450 if (ptr == 1) 3451 keyp = xfs_btree_key_addr(cur, 1, block); 3452 } else { 3453 /* It's a leaf. operate on records */ 3454 if (ptr < numrecs) { 3455 xfs_btree_shift_recs(cur, 3456 xfs_btree_rec_addr(cur, ptr + 1, block), 3457 -1, numrecs - ptr); 3458 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3459 } 3460 3461 /* 3462 * If it's the first record in the block, we'll need a key 3463 * structure to pass up to the next level (updkey). 3464 */ 3465 if (ptr == 1) { 3466 cur->bc_ops->init_key_from_rec(&key, 3467 xfs_btree_rec_addr(cur, 1, block)); 3468 keyp = &key; 3469 } 3470 } 3471 3472 /* 3473 * Decrement and log the number of entries in the block. 3474 */ 3475 xfs_btree_set_numrecs(block, --numrecs); 3476 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3477 3478 /* 3479 * If we are tracking the last record in the tree and 3480 * we are at the far right edge of the tree, update it. 3481 */ 3482 if (xfs_btree_is_lastrec(cur, block, level)) { 3483 cur->bc_ops->update_lastrec(cur, block, NULL, 3484 ptr, LASTREC_DELREC); 3485 } 3486 3487 /* 3488 * We're at the root level. First, shrink the root block in-memory. 3489 * Try to get rid of the next level down. If we can't then there's 3490 * nothing left to do. 3491 */ 3492 if (level == cur->bc_nlevels - 1) { 3493 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3494 xfs_iroot_realloc(cur->bc_private.b.ip, -1, 3495 cur->bc_private.b.whichfork); 3496 3497 error = xfs_btree_kill_iroot(cur); 3498 if (error) 3499 goto error0; 3500 3501 error = xfs_btree_dec_cursor(cur, level, stat); 3502 if (error) 3503 goto error0; 3504 *stat = 1; 3505 return 0; 3506 } 3507 3508 /* 3509 * If this is the root level, and there's only one entry left, 3510 * and it's NOT the leaf level, then we can get rid of this 3511 * level. 3512 */ 3513 if (numrecs == 1 && level > 0) { 3514 union xfs_btree_ptr *pp; 3515 /* 3516 * pp is still set to the first pointer in the block. 3517 * Make it the new root of the btree. 3518 */ 3519 pp = xfs_btree_ptr_addr(cur, 1, block); 3520 error = xfs_btree_kill_root(cur, bp, level, pp); 3521 if (error) 3522 goto error0; 3523 } else if (level > 0) { 3524 error = xfs_btree_dec_cursor(cur, level, stat); 3525 if (error) 3526 goto error0; 3527 } 3528 *stat = 1; 3529 return 0; 3530 } 3531 3532 /* 3533 * If we deleted the leftmost entry in the block, update the 3534 * key values above us in the tree. 3535 */ 3536 if (ptr == 1) { 3537 error = xfs_btree_updkey(cur, keyp, level + 1); 3538 if (error) 3539 goto error0; 3540 } 3541 3542 /* 3543 * If the number of records remaining in the block is at least 3544 * the minimum, we're done. 3545 */ 3546 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3547 error = xfs_btree_dec_cursor(cur, level, stat); 3548 if (error) 3549 goto error0; 3550 return 0; 3551 } 3552 3553 /* 3554 * Otherwise, we have to move some records around to keep the 3555 * tree balanced. Look at the left and right sibling blocks to 3556 * see if we can re-balance by moving only one record. 3557 */ 3558 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3559 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3560 3561 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3562 /* 3563 * One child of root, need to get a chance to copy its contents 3564 * into the root and delete it. Can't go up to next level, 3565 * there's nothing to delete there. 3566 */ 3567 if (xfs_btree_ptr_is_null(cur, &rptr) && 3568 xfs_btree_ptr_is_null(cur, &lptr) && 3569 level == cur->bc_nlevels - 2) { 3570 error = xfs_btree_kill_iroot(cur); 3571 if (!error) 3572 error = xfs_btree_dec_cursor(cur, level, stat); 3573 if (error) 3574 goto error0; 3575 return 0; 3576 } 3577 } 3578 3579 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3580 !xfs_btree_ptr_is_null(cur, &lptr)); 3581 3582 /* 3583 * Duplicate the cursor so our btree manipulations here won't 3584 * disrupt the next level up. 3585 */ 3586 error = xfs_btree_dup_cursor(cur, &tcur); 3587 if (error) 3588 goto error0; 3589 3590 /* 3591 * If there's a right sibling, see if it's ok to shift an entry 3592 * out of it. 3593 */ 3594 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3595 /* 3596 * Move the temp cursor to the last entry in the next block. 3597 * Actually any entry but the first would suffice. 3598 */ 3599 i = xfs_btree_lastrec(tcur, level); 3600 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3601 3602 error = xfs_btree_increment(tcur, level, &i); 3603 if (error) 3604 goto error0; 3605 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3606 3607 i = xfs_btree_lastrec(tcur, level); 3608 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3609 3610 /* Grab a pointer to the block. */ 3611 right = xfs_btree_get_block(tcur, level, &rbp); 3612 #ifdef DEBUG 3613 error = xfs_btree_check_block(tcur, right, level, rbp); 3614 if (error) 3615 goto error0; 3616 #endif 3617 /* Grab the current block number, for future use. */ 3618 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3619 3620 /* 3621 * If right block is full enough so that removing one entry 3622 * won't make it too empty, and left-shifting an entry out 3623 * of right to us works, we're done. 3624 */ 3625 if (xfs_btree_get_numrecs(right) - 1 >= 3626 cur->bc_ops->get_minrecs(tcur, level)) { 3627 error = xfs_btree_lshift(tcur, level, &i); 3628 if (error) 3629 goto error0; 3630 if (i) { 3631 ASSERT(xfs_btree_get_numrecs(block) >= 3632 cur->bc_ops->get_minrecs(tcur, level)); 3633 3634 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3635 tcur = NULL; 3636 3637 error = xfs_btree_dec_cursor(cur, level, stat); 3638 if (error) 3639 goto error0; 3640 return 0; 3641 } 3642 } 3643 3644 /* 3645 * Otherwise, grab the number of records in right for 3646 * future reference, and fix up the temp cursor to point 3647 * to our block again (last record). 3648 */ 3649 rrecs = xfs_btree_get_numrecs(right); 3650 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3651 i = xfs_btree_firstrec(tcur, level); 3652 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3653 3654 error = xfs_btree_decrement(tcur, level, &i); 3655 if (error) 3656 goto error0; 3657 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3658 } 3659 } 3660 3661 /* 3662 * If there's a left sibling, see if it's ok to shift an entry 3663 * out of it. 3664 */ 3665 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3666 /* 3667 * Move the temp cursor to the first entry in the 3668 * previous block. 3669 */ 3670 i = xfs_btree_firstrec(tcur, level); 3671 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3672 3673 error = xfs_btree_decrement(tcur, level, &i); 3674 if (error) 3675 goto error0; 3676 i = xfs_btree_firstrec(tcur, level); 3677 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3678 3679 /* Grab a pointer to the block. */ 3680 left = xfs_btree_get_block(tcur, level, &lbp); 3681 #ifdef DEBUG 3682 error = xfs_btree_check_block(cur, left, level, lbp); 3683 if (error) 3684 goto error0; 3685 #endif 3686 /* Grab the current block number, for future use. */ 3687 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 3688 3689 /* 3690 * If left block is full enough so that removing one entry 3691 * won't make it too empty, and right-shifting an entry out 3692 * of left to us works, we're done. 3693 */ 3694 if (xfs_btree_get_numrecs(left) - 1 >= 3695 cur->bc_ops->get_minrecs(tcur, level)) { 3696 error = xfs_btree_rshift(tcur, level, &i); 3697 if (error) 3698 goto error0; 3699 if (i) { 3700 ASSERT(xfs_btree_get_numrecs(block) >= 3701 cur->bc_ops->get_minrecs(tcur, level)); 3702 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3703 tcur = NULL; 3704 if (level == 0) 3705 cur->bc_ptrs[0]++; 3706 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3707 *stat = 1; 3708 return 0; 3709 } 3710 } 3711 3712 /* 3713 * Otherwise, grab the number of records in right for 3714 * future reference. 3715 */ 3716 lrecs = xfs_btree_get_numrecs(left); 3717 } 3718 3719 /* Delete the temp cursor, we're done with it. */ 3720 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3721 tcur = NULL; 3722 3723 /* If here, we need to do a join to keep the tree balanced. */ 3724 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 3725 3726 if (!xfs_btree_ptr_is_null(cur, &lptr) && 3727 lrecs + xfs_btree_get_numrecs(block) <= 3728 cur->bc_ops->get_maxrecs(cur, level)) { 3729 /* 3730 * Set "right" to be the starting block, 3731 * "left" to be the left neighbor. 3732 */ 3733 rptr = cptr; 3734 right = block; 3735 rbp = bp; 3736 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3737 if (error) 3738 goto error0; 3739 3740 /* 3741 * If that won't work, see if we can join with the right neighbor block. 3742 */ 3743 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 3744 rrecs + xfs_btree_get_numrecs(block) <= 3745 cur->bc_ops->get_maxrecs(cur, level)) { 3746 /* 3747 * Set "left" to be the starting block, 3748 * "right" to be the right neighbor. 3749 */ 3750 lptr = cptr; 3751 left = block; 3752 lbp = bp; 3753 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3754 if (error) 3755 goto error0; 3756 3757 /* 3758 * Otherwise, we can't fix the imbalance. 3759 * Just return. This is probably a logic error, but it's not fatal. 3760 */ 3761 } else { 3762 error = xfs_btree_dec_cursor(cur, level, stat); 3763 if (error) 3764 goto error0; 3765 return 0; 3766 } 3767 3768 rrecs = xfs_btree_get_numrecs(right); 3769 lrecs = xfs_btree_get_numrecs(left); 3770 3771 /* 3772 * We're now going to join "left" and "right" by moving all the stuff 3773 * in "right" to "left" and deleting "right". 3774 */ 3775 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 3776 if (level > 0) { 3777 /* It's a non-leaf. Move keys and pointers. */ 3778 union xfs_btree_key *lkp; /* left btree key */ 3779 union xfs_btree_ptr *lpp; /* left address pointer */ 3780 union xfs_btree_key *rkp; /* right btree key */ 3781 union xfs_btree_ptr *rpp; /* right address pointer */ 3782 3783 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 3784 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 3785 rkp = xfs_btree_key_addr(cur, 1, right); 3786 rpp = xfs_btree_ptr_addr(cur, 1, right); 3787 #ifdef DEBUG 3788 for (i = 1; i < rrecs; i++) { 3789 error = xfs_btree_check_ptr(cur, rpp, i, level); 3790 if (error) 3791 goto error0; 3792 } 3793 #endif 3794 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 3795 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 3796 3797 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 3798 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 3799 } else { 3800 /* It's a leaf. Move records. */ 3801 union xfs_btree_rec *lrp; /* left record pointer */ 3802 union xfs_btree_rec *rrp; /* right record pointer */ 3803 3804 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 3805 rrp = xfs_btree_rec_addr(cur, 1, right); 3806 3807 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 3808 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 3809 } 3810 3811 XFS_BTREE_STATS_INC(cur, join); 3812 3813 /* 3814 * Fix up the number of records and right block pointer in the 3815 * surviving block, and log it. 3816 */ 3817 xfs_btree_set_numrecs(left, lrecs + rrecs); 3818 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), 3819 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 3820 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 3821 3822 /* If there is a right sibling, point it to the remaining block. */ 3823 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 3824 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 3825 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 3826 if (error) 3827 goto error0; 3828 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 3829 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 3830 } 3831 3832 /* Free the deleted block. */ 3833 error = cur->bc_ops->free_block(cur, rbp); 3834 if (error) 3835 goto error0; 3836 XFS_BTREE_STATS_INC(cur, free); 3837 3838 /* 3839 * If we joined with the left neighbor, set the buffer in the 3840 * cursor to the left block, and fix up the index. 3841 */ 3842 if (bp != lbp) { 3843 cur->bc_bufs[level] = lbp; 3844 cur->bc_ptrs[level] += lrecs; 3845 cur->bc_ra[level] = 0; 3846 } 3847 /* 3848 * If we joined with the right neighbor and there's a level above 3849 * us, increment the cursor at that level. 3850 */ 3851 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 3852 (level + 1 < cur->bc_nlevels)) { 3853 error = xfs_btree_increment(cur, level + 1, &i); 3854 if (error) 3855 goto error0; 3856 } 3857 3858 /* 3859 * Readjust the ptr at this level if it's not a leaf, since it's 3860 * still pointing at the deletion point, which makes the cursor 3861 * inconsistent. If this makes the ptr 0, the caller fixes it up. 3862 * We can't use decrement because it would change the next level up. 3863 */ 3864 if (level > 0) 3865 cur->bc_ptrs[level]--; 3866 3867 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3868 /* Return value means the next level up has something to do. */ 3869 *stat = 2; 3870 return 0; 3871 3872 error0: 3873 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3874 if (tcur) 3875 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 3876 return error; 3877 } 3878 3879 /* 3880 * Delete the record pointed to by cur. 3881 * The cursor refers to the place where the record was (could be inserted) 3882 * when the operation returns. 3883 */ 3884 int /* error */ 3885 xfs_btree_delete( 3886 struct xfs_btree_cur *cur, 3887 int *stat) /* success/failure */ 3888 { 3889 int error; /* error return value */ 3890 int level; 3891 int i; 3892 3893 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3894 3895 /* 3896 * Go up the tree, starting at leaf level. 3897 * 3898 * If 2 is returned then a join was done; go to the next level. 3899 * Otherwise we are done. 3900 */ 3901 for (level = 0, i = 2; i == 2; level++) { 3902 error = xfs_btree_delrec(cur, level, &i); 3903 if (error) 3904 goto error0; 3905 } 3906 3907 if (i == 0) { 3908 for (level = 1; level < cur->bc_nlevels; level++) { 3909 if (cur->bc_ptrs[level] == 0) { 3910 error = xfs_btree_decrement(cur, level, &i); 3911 if (error) 3912 goto error0; 3913 break; 3914 } 3915 } 3916 } 3917 3918 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3919 *stat = i; 3920 return 0; 3921 error0: 3922 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3923 return error; 3924 } 3925 3926 /* 3927 * Get the data from the pointed-to record. 3928 */ 3929 int /* error */ 3930 xfs_btree_get_rec( 3931 struct xfs_btree_cur *cur, /* btree cursor */ 3932 union xfs_btree_rec **recp, /* output: btree record */ 3933 int *stat) /* output: success/failure */ 3934 { 3935 struct xfs_btree_block *block; /* btree block */ 3936 struct xfs_buf *bp; /* buffer pointer */ 3937 int ptr; /* record number */ 3938 #ifdef DEBUG 3939 int error; /* error return value */ 3940 #endif 3941 3942 ptr = cur->bc_ptrs[0]; 3943 block = xfs_btree_get_block(cur, 0, &bp); 3944 3945 #ifdef DEBUG 3946 error = xfs_btree_check_block(cur, block, 0, bp); 3947 if (error) 3948 return error; 3949 #endif 3950 3951 /* 3952 * Off the right end or left end, return failure. 3953 */ 3954 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 3955 *stat = 0; 3956 return 0; 3957 } 3958 3959 /* 3960 * Point to the record and extract its data. 3961 */ 3962 *recp = xfs_btree_rec_addr(cur, ptr, block); 3963 *stat = 1; 3964 return 0; 3965 } 3966 3967 /* 3968 * Change the owner of a btree. 3969 * 3970 * The mechanism we use here is ordered buffer logging. Because we don't know 3971 * how many buffers were are going to need to modify, we don't really want to 3972 * have to make transaction reservations for the worst case of every buffer in a 3973 * full size btree as that may be more space that we can fit in the log.... 3974 * 3975 * We do the btree walk in the most optimal manner possible - we have sibling 3976 * pointers so we can just walk all the blocks on each level from left to right 3977 * in a single pass, and then move to the next level and do the same. We can 3978 * also do readahead on the sibling pointers to get IO moving more quickly, 3979 * though for slow disks this is unlikely to make much difference to performance 3980 * as the amount of CPU work we have to do before moving to the next block is 3981 * relatively small. 3982 * 3983 * For each btree block that we load, modify the owner appropriately, set the 3984 * buffer as an ordered buffer and log it appropriately. We need to ensure that 3985 * we mark the region we change dirty so that if the buffer is relogged in 3986 * a subsequent transaction the changes we make here as an ordered buffer are 3987 * correctly relogged in that transaction. If we are in recovery context, then 3988 * just queue the modified buffer as delayed write buffer so the transaction 3989 * recovery completion writes the changes to disk. 3990 */ 3991 static int 3992 xfs_btree_block_change_owner( 3993 struct xfs_btree_cur *cur, 3994 int level, 3995 __uint64_t new_owner, 3996 struct list_head *buffer_list) 3997 { 3998 struct xfs_btree_block *block; 3999 struct xfs_buf *bp; 4000 union xfs_btree_ptr rptr; 4001 4002 /* do right sibling readahead */ 4003 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 4004 4005 /* modify the owner */ 4006 block = xfs_btree_get_block(cur, level, &bp); 4007 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 4008 block->bb_u.l.bb_owner = cpu_to_be64(new_owner); 4009 else 4010 block->bb_u.s.bb_owner = cpu_to_be32(new_owner); 4011 4012 /* 4013 * If the block is a root block hosted in an inode, we might not have a 4014 * buffer pointer here and we shouldn't attempt to log the change as the 4015 * information is already held in the inode and discarded when the root 4016 * block is formatted into the on-disk inode fork. We still change it, 4017 * though, so everything is consistent in memory. 4018 */ 4019 if (bp) { 4020 if (cur->bc_tp) { 4021 xfs_trans_ordered_buf(cur->bc_tp, bp); 4022 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4023 } else { 4024 xfs_buf_delwri_queue(bp, buffer_list); 4025 } 4026 } else { 4027 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4028 ASSERT(level == cur->bc_nlevels - 1); 4029 } 4030 4031 /* now read rh sibling block for next iteration */ 4032 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4033 if (xfs_btree_ptr_is_null(cur, &rptr)) 4034 return -ENOENT; 4035 4036 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4037 } 4038 4039 int 4040 xfs_btree_change_owner( 4041 struct xfs_btree_cur *cur, 4042 __uint64_t new_owner, 4043 struct list_head *buffer_list) 4044 { 4045 union xfs_btree_ptr lptr; 4046 int level; 4047 struct xfs_btree_block *block = NULL; 4048 int error = 0; 4049 4050 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4051 4052 /* for each level */ 4053 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4054 /* grab the left hand block */ 4055 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4056 if (error) 4057 return error; 4058 4059 /* readahead the left most block for the next level down */ 4060 if (level > 0) { 4061 union xfs_btree_ptr *ptr; 4062 4063 ptr = xfs_btree_ptr_addr(cur, 1, block); 4064 xfs_btree_readahead_ptr(cur, ptr, 1); 4065 4066 /* save for the next iteration of the loop */ 4067 lptr = *ptr; 4068 } 4069 4070 /* for each buffer in the level */ 4071 do { 4072 error = xfs_btree_block_change_owner(cur, level, 4073 new_owner, 4074 buffer_list); 4075 } while (!error); 4076 4077 if (error != -ENOENT) 4078 return error; 4079 } 4080 4081 return 0; 4082 } 4083 4084 /** 4085 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format 4086 * btree block 4087 * 4088 * @bp: buffer containing the btree block 4089 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount 4090 * @pag_max_level: pointer to the per-ag max level field 4091 */ 4092 bool 4093 xfs_btree_sblock_v5hdr_verify( 4094 struct xfs_buf *bp) 4095 { 4096 struct xfs_mount *mp = bp->b_target->bt_mount; 4097 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4098 struct xfs_perag *pag = bp->b_pag; 4099 4100 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4101 return false; 4102 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) 4103 return false; 4104 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn)) 4105 return false; 4106 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno) 4107 return false; 4108 return true; 4109 } 4110 4111 /** 4112 * xfs_btree_sblock_verify() -- verify a short-format btree block 4113 * 4114 * @bp: buffer containing the btree block 4115 * @max_recs: maximum records allowed in this btree node 4116 */ 4117 bool 4118 xfs_btree_sblock_verify( 4119 struct xfs_buf *bp, 4120 unsigned int max_recs) 4121 { 4122 struct xfs_mount *mp = bp->b_target->bt_mount; 4123 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4124 4125 /* numrecs verification */ 4126 if (be16_to_cpu(block->bb_numrecs) > max_recs) 4127 return false; 4128 4129 /* sibling pointer verification */ 4130 if (!block->bb_u.s.bb_leftsib || 4131 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks && 4132 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK))) 4133 return false; 4134 if (!block->bb_u.s.bb_rightsib || 4135 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks && 4136 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK))) 4137 return false; 4138 4139 return true; 4140 } 4141