1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_shared.h" 21 #include "xfs_format.h" 22 #include "xfs_log_format.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_mount.h" 26 #include "xfs_inode.h" 27 #include "xfs_trans.h" 28 #include "xfs_inode_item.h" 29 #include "xfs_buf_item.h" 30 #include "xfs_btree.h" 31 #include "xfs_error.h" 32 #include "xfs_trace.h" 33 #include "xfs_cksum.h" 34 #include "xfs_alloc.h" 35 36 /* 37 * Cursor allocation zone. 38 */ 39 kmem_zone_t *xfs_btree_cur_zone; 40 41 /* 42 * Btree magic numbers. 43 */ 44 static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { 45 { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, 46 XFS_FIBT_MAGIC }, 47 { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, 48 XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } 49 }; 50 #define xfs_btree_magic(cur) \ 51 xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] 52 53 54 STATIC int /* error (0 or EFSCORRUPTED) */ 55 xfs_btree_check_lblock( 56 struct xfs_btree_cur *cur, /* btree cursor */ 57 struct xfs_btree_block *block, /* btree long form block pointer */ 58 int level, /* level of the btree block */ 59 struct xfs_buf *bp) /* buffer for block, if any */ 60 { 61 int lblock_ok = 1; /* block passes checks */ 62 struct xfs_mount *mp; /* file system mount point */ 63 64 mp = cur->bc_mp; 65 66 if (xfs_sb_version_hascrc(&mp->m_sb)) { 67 lblock_ok = lblock_ok && 68 uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) && 69 block->bb_u.l.bb_blkno == cpu_to_be64( 70 bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 71 } 72 73 lblock_ok = lblock_ok && 74 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && 75 be16_to_cpu(block->bb_level) == level && 76 be16_to_cpu(block->bb_numrecs) <= 77 cur->bc_ops->get_maxrecs(cur, level) && 78 block->bb_u.l.bb_leftsib && 79 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK) || 80 XFS_FSB_SANITY_CHECK(mp, 81 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 82 block->bb_u.l.bb_rightsib && 83 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK) || 84 XFS_FSB_SANITY_CHECK(mp, 85 be64_to_cpu(block->bb_u.l.bb_rightsib))); 86 87 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, 88 XFS_ERRTAG_BTREE_CHECK_LBLOCK, 89 XFS_RANDOM_BTREE_CHECK_LBLOCK))) { 90 if (bp) 91 trace_xfs_btree_corrupt(bp, _RET_IP_); 92 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 93 return -EFSCORRUPTED; 94 } 95 return 0; 96 } 97 98 STATIC int /* error (0 or EFSCORRUPTED) */ 99 xfs_btree_check_sblock( 100 struct xfs_btree_cur *cur, /* btree cursor */ 101 struct xfs_btree_block *block, /* btree short form block pointer */ 102 int level, /* level of the btree block */ 103 struct xfs_buf *bp) /* buffer containing block */ 104 { 105 struct xfs_mount *mp; /* file system mount point */ 106 struct xfs_buf *agbp; /* buffer for ag. freespace struct */ 107 struct xfs_agf *agf; /* ag. freespace structure */ 108 xfs_agblock_t agflen; /* native ag. freespace length */ 109 int sblock_ok = 1; /* block passes checks */ 110 111 mp = cur->bc_mp; 112 agbp = cur->bc_private.a.agbp; 113 agf = XFS_BUF_TO_AGF(agbp); 114 agflen = be32_to_cpu(agf->agf_length); 115 116 if (xfs_sb_version_hascrc(&mp->m_sb)) { 117 sblock_ok = sblock_ok && 118 uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) && 119 block->bb_u.s.bb_blkno == cpu_to_be64( 120 bp ? bp->b_bn : XFS_BUF_DADDR_NULL); 121 } 122 123 sblock_ok = sblock_ok && 124 be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && 125 be16_to_cpu(block->bb_level) == level && 126 be16_to_cpu(block->bb_numrecs) <= 127 cur->bc_ops->get_maxrecs(cur, level) && 128 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || 129 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && 130 block->bb_u.s.bb_leftsib && 131 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || 132 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && 133 block->bb_u.s.bb_rightsib; 134 135 if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, 136 XFS_ERRTAG_BTREE_CHECK_SBLOCK, 137 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 138 if (bp) 139 trace_xfs_btree_corrupt(bp, _RET_IP_); 140 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); 141 return -EFSCORRUPTED; 142 } 143 return 0; 144 } 145 146 /* 147 * Debug routine: check that block header is ok. 148 */ 149 int 150 xfs_btree_check_block( 151 struct xfs_btree_cur *cur, /* btree cursor */ 152 struct xfs_btree_block *block, /* generic btree block pointer */ 153 int level, /* level of the btree block */ 154 struct xfs_buf *bp) /* buffer containing block, if any */ 155 { 156 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 157 return xfs_btree_check_lblock(cur, block, level, bp); 158 else 159 return xfs_btree_check_sblock(cur, block, level, bp); 160 } 161 162 /* 163 * Check that (long) pointer is ok. 164 */ 165 int /* error (0 or EFSCORRUPTED) */ 166 xfs_btree_check_lptr( 167 struct xfs_btree_cur *cur, /* btree cursor */ 168 xfs_fsblock_t bno, /* btree block disk address */ 169 int level) /* btree block level */ 170 { 171 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 172 level > 0 && 173 bno != NULLFSBLOCK && 174 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno)); 175 return 0; 176 } 177 178 #ifdef DEBUG 179 /* 180 * Check that (short) pointer is ok. 181 */ 182 STATIC int /* error (0 or EFSCORRUPTED) */ 183 xfs_btree_check_sptr( 184 struct xfs_btree_cur *cur, /* btree cursor */ 185 xfs_agblock_t bno, /* btree block disk address */ 186 int level) /* btree block level */ 187 { 188 xfs_agblock_t agblocks = cur->bc_mp->m_sb.sb_agblocks; 189 190 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 191 level > 0 && 192 bno != NULLAGBLOCK && 193 bno != 0 && 194 bno < agblocks); 195 return 0; 196 } 197 198 /* 199 * Check that block ptr is ok. 200 */ 201 STATIC int /* error (0 or EFSCORRUPTED) */ 202 xfs_btree_check_ptr( 203 struct xfs_btree_cur *cur, /* btree cursor */ 204 union xfs_btree_ptr *ptr, /* btree block disk address */ 205 int index, /* offset from ptr to check */ 206 int level) /* btree block level */ 207 { 208 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 209 return xfs_btree_check_lptr(cur, 210 be64_to_cpu((&ptr->l)[index]), level); 211 } else { 212 return xfs_btree_check_sptr(cur, 213 be32_to_cpu((&ptr->s)[index]), level); 214 } 215 } 216 #endif 217 218 /* 219 * Calculate CRC on the whole btree block and stuff it into the 220 * long-form btree header. 221 * 222 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 223 * it into the buffer so recovery knows what the last modifcation was that made 224 * it to disk. 225 */ 226 void 227 xfs_btree_lblock_calc_crc( 228 struct xfs_buf *bp) 229 { 230 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 231 struct xfs_buf_log_item *bip = bp->b_fspriv; 232 233 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 234 return; 235 if (bip) 236 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 237 xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 238 } 239 240 bool 241 xfs_btree_lblock_verify_crc( 242 struct xfs_buf *bp) 243 { 244 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 245 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 246 247 return true; 248 } 249 250 /* 251 * Calculate CRC on the whole btree block and stuff it into the 252 * short-form btree header. 253 * 254 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 255 * it into the buffer so recovery knows what the last modifcation was that made 256 * it to disk. 257 */ 258 void 259 xfs_btree_sblock_calc_crc( 260 struct xfs_buf *bp) 261 { 262 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 263 struct xfs_buf_log_item *bip = bp->b_fspriv; 264 265 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 266 return; 267 if (bip) 268 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 269 xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 270 } 271 272 bool 273 xfs_btree_sblock_verify_crc( 274 struct xfs_buf *bp) 275 { 276 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 277 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 278 279 return true; 280 } 281 282 /* 283 * Delete the btree cursor. 284 */ 285 void 286 xfs_btree_del_cursor( 287 xfs_btree_cur_t *cur, /* btree cursor */ 288 int error) /* del because of error */ 289 { 290 int i; /* btree level */ 291 292 /* 293 * Clear the buffer pointers, and release the buffers. 294 * If we're doing this in the face of an error, we 295 * need to make sure to inspect all of the entries 296 * in the bc_bufs array for buffers to be unlocked. 297 * This is because some of the btree code works from 298 * level n down to 0, and if we get an error along 299 * the way we won't have initialized all the entries 300 * down to 0. 301 */ 302 for (i = 0; i < cur->bc_nlevels; i++) { 303 if (cur->bc_bufs[i]) 304 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); 305 else if (!error) 306 break; 307 } 308 /* 309 * Can't free a bmap cursor without having dealt with the 310 * allocated indirect blocks' accounting. 311 */ 312 ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || 313 cur->bc_private.b.allocated == 0); 314 /* 315 * Free the cursor. 316 */ 317 kmem_zone_free(xfs_btree_cur_zone, cur); 318 } 319 320 /* 321 * Duplicate the btree cursor. 322 * Allocate a new one, copy the record, re-get the buffers. 323 */ 324 int /* error */ 325 xfs_btree_dup_cursor( 326 xfs_btree_cur_t *cur, /* input cursor */ 327 xfs_btree_cur_t **ncur) /* output cursor */ 328 { 329 xfs_buf_t *bp; /* btree block's buffer pointer */ 330 int error; /* error return value */ 331 int i; /* level number of btree block */ 332 xfs_mount_t *mp; /* mount structure for filesystem */ 333 xfs_btree_cur_t *new; /* new cursor value */ 334 xfs_trans_t *tp; /* transaction pointer, can be NULL */ 335 336 tp = cur->bc_tp; 337 mp = cur->bc_mp; 338 339 /* 340 * Allocate a new cursor like the old one. 341 */ 342 new = cur->bc_ops->dup_cursor(cur); 343 344 /* 345 * Copy the record currently in the cursor. 346 */ 347 new->bc_rec = cur->bc_rec; 348 349 /* 350 * For each level current, re-get the buffer and copy the ptr value. 351 */ 352 for (i = 0; i < new->bc_nlevels; i++) { 353 new->bc_ptrs[i] = cur->bc_ptrs[i]; 354 new->bc_ra[i] = cur->bc_ra[i]; 355 bp = cur->bc_bufs[i]; 356 if (bp) { 357 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, 358 XFS_BUF_ADDR(bp), mp->m_bsize, 359 0, &bp, 360 cur->bc_ops->buf_ops); 361 if (error) { 362 xfs_btree_del_cursor(new, error); 363 *ncur = NULL; 364 return error; 365 } 366 } 367 new->bc_bufs[i] = bp; 368 } 369 *ncur = new; 370 return 0; 371 } 372 373 /* 374 * XFS btree block layout and addressing: 375 * 376 * There are two types of blocks in the btree: leaf and non-leaf blocks. 377 * 378 * The leaf record start with a header then followed by records containing 379 * the values. A non-leaf block also starts with the same header, and 380 * then first contains lookup keys followed by an equal number of pointers 381 * to the btree blocks at the previous level. 382 * 383 * +--------+-------+-------+-------+-------+-------+-------+ 384 * Leaf: | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N | 385 * +--------+-------+-------+-------+-------+-------+-------+ 386 * 387 * +--------+-------+-------+-------+-------+-------+-------+ 388 * Non-Leaf: | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N | 389 * +--------+-------+-------+-------+-------+-------+-------+ 390 * 391 * The header is called struct xfs_btree_block for reasons better left unknown 392 * and comes in different versions for short (32bit) and long (64bit) block 393 * pointers. The record and key structures are defined by the btree instances 394 * and opaque to the btree core. The block pointers are simple disk endian 395 * integers, available in a short (32bit) and long (64bit) variant. 396 * 397 * The helpers below calculate the offset of a given record, key or pointer 398 * into a btree block (xfs_btree_*_offset) or return a pointer to the given 399 * record, key or pointer (xfs_btree_*_addr). Note that all addressing 400 * inside the btree block is done using indices starting at one, not zero! 401 */ 402 403 /* 404 * Return size of the btree block header for this btree instance. 405 */ 406 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) 407 { 408 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 409 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 410 return XFS_BTREE_LBLOCK_CRC_LEN; 411 return XFS_BTREE_LBLOCK_LEN; 412 } 413 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) 414 return XFS_BTREE_SBLOCK_CRC_LEN; 415 return XFS_BTREE_SBLOCK_LEN; 416 } 417 418 /* 419 * Return size of btree block pointers for this btree instance. 420 */ 421 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) 422 { 423 return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 424 sizeof(__be64) : sizeof(__be32); 425 } 426 427 /* 428 * Calculate offset of the n-th record in a btree block. 429 */ 430 STATIC size_t 431 xfs_btree_rec_offset( 432 struct xfs_btree_cur *cur, 433 int n) 434 { 435 return xfs_btree_block_len(cur) + 436 (n - 1) * cur->bc_ops->rec_len; 437 } 438 439 /* 440 * Calculate offset of the n-th key in a btree block. 441 */ 442 STATIC size_t 443 xfs_btree_key_offset( 444 struct xfs_btree_cur *cur, 445 int n) 446 { 447 return xfs_btree_block_len(cur) + 448 (n - 1) * cur->bc_ops->key_len; 449 } 450 451 /* 452 * Calculate offset of the n-th block pointer in a btree block. 453 */ 454 STATIC size_t 455 xfs_btree_ptr_offset( 456 struct xfs_btree_cur *cur, 457 int n, 458 int level) 459 { 460 return xfs_btree_block_len(cur) + 461 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + 462 (n - 1) * xfs_btree_ptr_len(cur); 463 } 464 465 /* 466 * Return a pointer to the n-th record in the btree block. 467 */ 468 STATIC union xfs_btree_rec * 469 xfs_btree_rec_addr( 470 struct xfs_btree_cur *cur, 471 int n, 472 struct xfs_btree_block *block) 473 { 474 return (union xfs_btree_rec *) 475 ((char *)block + xfs_btree_rec_offset(cur, n)); 476 } 477 478 /* 479 * Return a pointer to the n-th key in the btree block. 480 */ 481 STATIC union xfs_btree_key * 482 xfs_btree_key_addr( 483 struct xfs_btree_cur *cur, 484 int n, 485 struct xfs_btree_block *block) 486 { 487 return (union xfs_btree_key *) 488 ((char *)block + xfs_btree_key_offset(cur, n)); 489 } 490 491 /* 492 * Return a pointer to the n-th block pointer in the btree block. 493 */ 494 STATIC union xfs_btree_ptr * 495 xfs_btree_ptr_addr( 496 struct xfs_btree_cur *cur, 497 int n, 498 struct xfs_btree_block *block) 499 { 500 int level = xfs_btree_get_level(block); 501 502 ASSERT(block->bb_level != 0); 503 504 return (union xfs_btree_ptr *) 505 ((char *)block + xfs_btree_ptr_offset(cur, n, level)); 506 } 507 508 /* 509 * Get the root block which is stored in the inode. 510 * 511 * For now this btree implementation assumes the btree root is always 512 * stored in the if_broot field of an inode fork. 513 */ 514 STATIC struct xfs_btree_block * 515 xfs_btree_get_iroot( 516 struct xfs_btree_cur *cur) 517 { 518 struct xfs_ifork *ifp; 519 520 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork); 521 return (struct xfs_btree_block *)ifp->if_broot; 522 } 523 524 /* 525 * Retrieve the block pointer from the cursor at the given level. 526 * This may be an inode btree root or from a buffer. 527 */ 528 STATIC struct xfs_btree_block * /* generic btree block pointer */ 529 xfs_btree_get_block( 530 struct xfs_btree_cur *cur, /* btree cursor */ 531 int level, /* level in btree */ 532 struct xfs_buf **bpp) /* buffer containing the block */ 533 { 534 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 535 (level == cur->bc_nlevels - 1)) { 536 *bpp = NULL; 537 return xfs_btree_get_iroot(cur); 538 } 539 540 *bpp = cur->bc_bufs[level]; 541 return XFS_BUF_TO_BLOCK(*bpp); 542 } 543 544 /* 545 * Get a buffer for the block, return it with no data read. 546 * Long-form addressing. 547 */ 548 xfs_buf_t * /* buffer for fsbno */ 549 xfs_btree_get_bufl( 550 xfs_mount_t *mp, /* file system mount point */ 551 xfs_trans_t *tp, /* transaction pointer */ 552 xfs_fsblock_t fsbno, /* file system block number */ 553 uint lock) /* lock flags for get_buf */ 554 { 555 xfs_daddr_t d; /* real disk block address */ 556 557 ASSERT(fsbno != NULLFSBLOCK); 558 d = XFS_FSB_TO_DADDR(mp, fsbno); 559 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 560 } 561 562 /* 563 * Get a buffer for the block, return it with no data read. 564 * Short-form addressing. 565 */ 566 xfs_buf_t * /* buffer for agno/agbno */ 567 xfs_btree_get_bufs( 568 xfs_mount_t *mp, /* file system mount point */ 569 xfs_trans_t *tp, /* transaction pointer */ 570 xfs_agnumber_t agno, /* allocation group number */ 571 xfs_agblock_t agbno, /* allocation group block number */ 572 uint lock) /* lock flags for get_buf */ 573 { 574 xfs_daddr_t d; /* real disk block address */ 575 576 ASSERT(agno != NULLAGNUMBER); 577 ASSERT(agbno != NULLAGBLOCK); 578 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 579 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 580 } 581 582 /* 583 * Check for the cursor referring to the last block at the given level. 584 */ 585 int /* 1=is last block, 0=not last block */ 586 xfs_btree_islastblock( 587 xfs_btree_cur_t *cur, /* btree cursor */ 588 int level) /* level to check */ 589 { 590 struct xfs_btree_block *block; /* generic btree block pointer */ 591 xfs_buf_t *bp; /* buffer containing block */ 592 593 block = xfs_btree_get_block(cur, level, &bp); 594 xfs_btree_check_block(cur, block, level, bp); 595 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 596 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); 597 else 598 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); 599 } 600 601 /* 602 * Change the cursor to point to the first record at the given level. 603 * Other levels are unaffected. 604 */ 605 STATIC int /* success=1, failure=0 */ 606 xfs_btree_firstrec( 607 xfs_btree_cur_t *cur, /* btree cursor */ 608 int level) /* level to change */ 609 { 610 struct xfs_btree_block *block; /* generic btree block pointer */ 611 xfs_buf_t *bp; /* buffer containing block */ 612 613 /* 614 * Get the block pointer for this level. 615 */ 616 block = xfs_btree_get_block(cur, level, &bp); 617 xfs_btree_check_block(cur, block, level, bp); 618 /* 619 * It's empty, there is no such record. 620 */ 621 if (!block->bb_numrecs) 622 return 0; 623 /* 624 * Set the ptr value to 1, that's the first record/key. 625 */ 626 cur->bc_ptrs[level] = 1; 627 return 1; 628 } 629 630 /* 631 * Change the cursor to point to the last record in the current block 632 * at the given level. Other levels are unaffected. 633 */ 634 STATIC int /* success=1, failure=0 */ 635 xfs_btree_lastrec( 636 xfs_btree_cur_t *cur, /* btree cursor */ 637 int level) /* level to change */ 638 { 639 struct xfs_btree_block *block; /* generic btree block pointer */ 640 xfs_buf_t *bp; /* buffer containing block */ 641 642 /* 643 * Get the block pointer for this level. 644 */ 645 block = xfs_btree_get_block(cur, level, &bp); 646 xfs_btree_check_block(cur, block, level, bp); 647 /* 648 * It's empty, there is no such record. 649 */ 650 if (!block->bb_numrecs) 651 return 0; 652 /* 653 * Set the ptr value to numrecs, that's the last record/key. 654 */ 655 cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs); 656 return 1; 657 } 658 659 /* 660 * Compute first and last byte offsets for the fields given. 661 * Interprets the offsets table, which contains struct field offsets. 662 */ 663 void 664 xfs_btree_offsets( 665 __int64_t fields, /* bitmask of fields */ 666 const short *offsets, /* table of field offsets */ 667 int nbits, /* number of bits to inspect */ 668 int *first, /* output: first byte offset */ 669 int *last) /* output: last byte offset */ 670 { 671 int i; /* current bit number */ 672 __int64_t imask; /* mask for current bit number */ 673 674 ASSERT(fields != 0); 675 /* 676 * Find the lowest bit, so the first byte offset. 677 */ 678 for (i = 0, imask = 1LL; ; i++, imask <<= 1) { 679 if (imask & fields) { 680 *first = offsets[i]; 681 break; 682 } 683 } 684 /* 685 * Find the highest bit, so the last byte offset. 686 */ 687 for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) { 688 if (imask & fields) { 689 *last = offsets[i + 1] - 1; 690 break; 691 } 692 } 693 } 694 695 /* 696 * Get a buffer for the block, return it read in. 697 * Long-form addressing. 698 */ 699 int 700 xfs_btree_read_bufl( 701 struct xfs_mount *mp, /* file system mount point */ 702 struct xfs_trans *tp, /* transaction pointer */ 703 xfs_fsblock_t fsbno, /* file system block number */ 704 uint lock, /* lock flags for read_buf */ 705 struct xfs_buf **bpp, /* buffer for fsbno */ 706 int refval, /* ref count value for buffer */ 707 const struct xfs_buf_ops *ops) 708 { 709 struct xfs_buf *bp; /* return value */ 710 xfs_daddr_t d; /* real disk block address */ 711 int error; 712 713 ASSERT(fsbno != NULLFSBLOCK); 714 d = XFS_FSB_TO_DADDR(mp, fsbno); 715 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 716 mp->m_bsize, lock, &bp, ops); 717 if (error) 718 return error; 719 if (bp) 720 xfs_buf_set_ref(bp, refval); 721 *bpp = bp; 722 return 0; 723 } 724 725 /* 726 * Read-ahead the block, don't wait for it, don't return a buffer. 727 * Long-form addressing. 728 */ 729 /* ARGSUSED */ 730 void 731 xfs_btree_reada_bufl( 732 struct xfs_mount *mp, /* file system mount point */ 733 xfs_fsblock_t fsbno, /* file system block number */ 734 xfs_extlen_t count, /* count of filesystem blocks */ 735 const struct xfs_buf_ops *ops) 736 { 737 xfs_daddr_t d; 738 739 ASSERT(fsbno != NULLFSBLOCK); 740 d = XFS_FSB_TO_DADDR(mp, fsbno); 741 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 742 } 743 744 /* 745 * Read-ahead the block, don't wait for it, don't return a buffer. 746 * Short-form addressing. 747 */ 748 /* ARGSUSED */ 749 void 750 xfs_btree_reada_bufs( 751 struct xfs_mount *mp, /* file system mount point */ 752 xfs_agnumber_t agno, /* allocation group number */ 753 xfs_agblock_t agbno, /* allocation group block number */ 754 xfs_extlen_t count, /* count of filesystem blocks */ 755 const struct xfs_buf_ops *ops) 756 { 757 xfs_daddr_t d; 758 759 ASSERT(agno != NULLAGNUMBER); 760 ASSERT(agbno != NULLAGBLOCK); 761 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 762 xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); 763 } 764 765 STATIC int 766 xfs_btree_readahead_lblock( 767 struct xfs_btree_cur *cur, 768 int lr, 769 struct xfs_btree_block *block) 770 { 771 int rval = 0; 772 xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); 773 xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); 774 775 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { 776 xfs_btree_reada_bufl(cur->bc_mp, left, 1, 777 cur->bc_ops->buf_ops); 778 rval++; 779 } 780 781 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { 782 xfs_btree_reada_bufl(cur->bc_mp, right, 1, 783 cur->bc_ops->buf_ops); 784 rval++; 785 } 786 787 return rval; 788 } 789 790 STATIC int 791 xfs_btree_readahead_sblock( 792 struct xfs_btree_cur *cur, 793 int lr, 794 struct xfs_btree_block *block) 795 { 796 int rval = 0; 797 xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); 798 xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); 799 800 801 if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { 802 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 803 left, 1, cur->bc_ops->buf_ops); 804 rval++; 805 } 806 807 if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { 808 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, 809 right, 1, cur->bc_ops->buf_ops); 810 rval++; 811 } 812 813 return rval; 814 } 815 816 /* 817 * Read-ahead btree blocks, at the given level. 818 * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA. 819 */ 820 STATIC int 821 xfs_btree_readahead( 822 struct xfs_btree_cur *cur, /* btree cursor */ 823 int lev, /* level in btree */ 824 int lr) /* left/right bits */ 825 { 826 struct xfs_btree_block *block; 827 828 /* 829 * No readahead needed if we are at the root level and the 830 * btree root is stored in the inode. 831 */ 832 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 833 (lev == cur->bc_nlevels - 1)) 834 return 0; 835 836 if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev]) 837 return 0; 838 839 cur->bc_ra[lev] |= lr; 840 block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]); 841 842 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 843 return xfs_btree_readahead_lblock(cur, lr, block); 844 return xfs_btree_readahead_sblock(cur, lr, block); 845 } 846 847 STATIC xfs_daddr_t 848 xfs_btree_ptr_to_daddr( 849 struct xfs_btree_cur *cur, 850 union xfs_btree_ptr *ptr) 851 { 852 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 853 ASSERT(ptr->l != cpu_to_be64(NULLFSBLOCK)); 854 855 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); 856 } else { 857 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); 858 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); 859 860 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, 861 be32_to_cpu(ptr->s)); 862 } 863 } 864 865 /* 866 * Readahead @count btree blocks at the given @ptr location. 867 * 868 * We don't need to care about long or short form btrees here as we have a 869 * method of converting the ptr directly to a daddr available to us. 870 */ 871 STATIC void 872 xfs_btree_readahead_ptr( 873 struct xfs_btree_cur *cur, 874 union xfs_btree_ptr *ptr, 875 xfs_extlen_t count) 876 { 877 xfs_buf_readahead(cur->bc_mp->m_ddev_targp, 878 xfs_btree_ptr_to_daddr(cur, ptr), 879 cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); 880 } 881 882 /* 883 * Set the buffer for level "lev" in the cursor to bp, releasing 884 * any previous buffer. 885 */ 886 STATIC void 887 xfs_btree_setbuf( 888 xfs_btree_cur_t *cur, /* btree cursor */ 889 int lev, /* level in btree */ 890 xfs_buf_t *bp) /* new buffer to set */ 891 { 892 struct xfs_btree_block *b; /* btree block */ 893 894 if (cur->bc_bufs[lev]) 895 xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); 896 cur->bc_bufs[lev] = bp; 897 cur->bc_ra[lev] = 0; 898 899 b = XFS_BUF_TO_BLOCK(bp); 900 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 901 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) 902 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 903 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) 904 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 905 } else { 906 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) 907 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 908 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) 909 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 910 } 911 } 912 913 STATIC int 914 xfs_btree_ptr_is_null( 915 struct xfs_btree_cur *cur, 916 union xfs_btree_ptr *ptr) 917 { 918 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 919 return ptr->l == cpu_to_be64(NULLFSBLOCK); 920 else 921 return ptr->s == cpu_to_be32(NULLAGBLOCK); 922 } 923 924 STATIC void 925 xfs_btree_set_ptr_null( 926 struct xfs_btree_cur *cur, 927 union xfs_btree_ptr *ptr) 928 { 929 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 930 ptr->l = cpu_to_be64(NULLFSBLOCK); 931 else 932 ptr->s = cpu_to_be32(NULLAGBLOCK); 933 } 934 935 /* 936 * Get/set/init sibling pointers 937 */ 938 STATIC void 939 xfs_btree_get_sibling( 940 struct xfs_btree_cur *cur, 941 struct xfs_btree_block *block, 942 union xfs_btree_ptr *ptr, 943 int lr) 944 { 945 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 946 947 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 948 if (lr == XFS_BB_RIGHTSIB) 949 ptr->l = block->bb_u.l.bb_rightsib; 950 else 951 ptr->l = block->bb_u.l.bb_leftsib; 952 } else { 953 if (lr == XFS_BB_RIGHTSIB) 954 ptr->s = block->bb_u.s.bb_rightsib; 955 else 956 ptr->s = block->bb_u.s.bb_leftsib; 957 } 958 } 959 960 STATIC void 961 xfs_btree_set_sibling( 962 struct xfs_btree_cur *cur, 963 struct xfs_btree_block *block, 964 union xfs_btree_ptr *ptr, 965 int lr) 966 { 967 ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); 968 969 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 970 if (lr == XFS_BB_RIGHTSIB) 971 block->bb_u.l.bb_rightsib = ptr->l; 972 else 973 block->bb_u.l.bb_leftsib = ptr->l; 974 } else { 975 if (lr == XFS_BB_RIGHTSIB) 976 block->bb_u.s.bb_rightsib = ptr->s; 977 else 978 block->bb_u.s.bb_leftsib = ptr->s; 979 } 980 } 981 982 void 983 xfs_btree_init_block_int( 984 struct xfs_mount *mp, 985 struct xfs_btree_block *buf, 986 xfs_daddr_t blkno, 987 __u32 magic, 988 __u16 level, 989 __u16 numrecs, 990 __u64 owner, 991 unsigned int flags) 992 { 993 buf->bb_magic = cpu_to_be32(magic); 994 buf->bb_level = cpu_to_be16(level); 995 buf->bb_numrecs = cpu_to_be16(numrecs); 996 997 if (flags & XFS_BTREE_LONG_PTRS) { 998 buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); 999 buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); 1000 if (flags & XFS_BTREE_CRC_BLOCKS) { 1001 buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); 1002 buf->bb_u.l.bb_owner = cpu_to_be64(owner); 1003 uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid); 1004 buf->bb_u.l.bb_pad = 0; 1005 buf->bb_u.l.bb_lsn = 0; 1006 } 1007 } else { 1008 /* owner is a 32 bit value on short blocks */ 1009 __u32 __owner = (__u32)owner; 1010 1011 buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); 1012 buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); 1013 if (flags & XFS_BTREE_CRC_BLOCKS) { 1014 buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); 1015 buf->bb_u.s.bb_owner = cpu_to_be32(__owner); 1016 uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid); 1017 buf->bb_u.s.bb_lsn = 0; 1018 } 1019 } 1020 } 1021 1022 void 1023 xfs_btree_init_block( 1024 struct xfs_mount *mp, 1025 struct xfs_buf *bp, 1026 __u32 magic, 1027 __u16 level, 1028 __u16 numrecs, 1029 __u64 owner, 1030 unsigned int flags) 1031 { 1032 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1033 magic, level, numrecs, owner, flags); 1034 } 1035 1036 STATIC void 1037 xfs_btree_init_block_cur( 1038 struct xfs_btree_cur *cur, 1039 struct xfs_buf *bp, 1040 int level, 1041 int numrecs) 1042 { 1043 __u64 owner; 1044 1045 /* 1046 * we can pull the owner from the cursor right now as the different 1047 * owners align directly with the pointer size of the btree. This may 1048 * change in future, but is safe for current users of the generic btree 1049 * code. 1050 */ 1051 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1052 owner = cur->bc_private.b.ip->i_ino; 1053 else 1054 owner = cur->bc_private.a.agno; 1055 1056 xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1057 xfs_btree_magic(cur), level, numrecs, 1058 owner, cur->bc_flags); 1059 } 1060 1061 /* 1062 * Return true if ptr is the last record in the btree and 1063 * we need to track updates to this record. The decision 1064 * will be further refined in the update_lastrec method. 1065 */ 1066 STATIC int 1067 xfs_btree_is_lastrec( 1068 struct xfs_btree_cur *cur, 1069 struct xfs_btree_block *block, 1070 int level) 1071 { 1072 union xfs_btree_ptr ptr; 1073 1074 if (level > 0) 1075 return 0; 1076 if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) 1077 return 0; 1078 1079 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1080 if (!xfs_btree_ptr_is_null(cur, &ptr)) 1081 return 0; 1082 return 1; 1083 } 1084 1085 STATIC void 1086 xfs_btree_buf_to_ptr( 1087 struct xfs_btree_cur *cur, 1088 struct xfs_buf *bp, 1089 union xfs_btree_ptr *ptr) 1090 { 1091 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 1092 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, 1093 XFS_BUF_ADDR(bp))); 1094 else { 1095 ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, 1096 XFS_BUF_ADDR(bp))); 1097 } 1098 } 1099 1100 STATIC void 1101 xfs_btree_set_refs( 1102 struct xfs_btree_cur *cur, 1103 struct xfs_buf *bp) 1104 { 1105 switch (cur->bc_btnum) { 1106 case XFS_BTNUM_BNO: 1107 case XFS_BTNUM_CNT: 1108 xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); 1109 break; 1110 case XFS_BTNUM_INO: 1111 case XFS_BTNUM_FINO: 1112 xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); 1113 break; 1114 case XFS_BTNUM_BMAP: 1115 xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); 1116 break; 1117 default: 1118 ASSERT(0); 1119 } 1120 } 1121 1122 STATIC int 1123 xfs_btree_get_buf_block( 1124 struct xfs_btree_cur *cur, 1125 union xfs_btree_ptr *ptr, 1126 int flags, 1127 struct xfs_btree_block **block, 1128 struct xfs_buf **bpp) 1129 { 1130 struct xfs_mount *mp = cur->bc_mp; 1131 xfs_daddr_t d; 1132 1133 /* need to sort out how callers deal with failures first */ 1134 ASSERT(!(flags & XBF_TRYLOCK)); 1135 1136 d = xfs_btree_ptr_to_daddr(cur, ptr); 1137 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1138 mp->m_bsize, flags); 1139 1140 if (!*bpp) 1141 return -ENOMEM; 1142 1143 (*bpp)->b_ops = cur->bc_ops->buf_ops; 1144 *block = XFS_BUF_TO_BLOCK(*bpp); 1145 return 0; 1146 } 1147 1148 /* 1149 * Read in the buffer at the given ptr and return the buffer and 1150 * the block pointer within the buffer. 1151 */ 1152 STATIC int 1153 xfs_btree_read_buf_block( 1154 struct xfs_btree_cur *cur, 1155 union xfs_btree_ptr *ptr, 1156 int flags, 1157 struct xfs_btree_block **block, 1158 struct xfs_buf **bpp) 1159 { 1160 struct xfs_mount *mp = cur->bc_mp; 1161 xfs_daddr_t d; 1162 int error; 1163 1164 /* need to sort out how callers deal with failures first */ 1165 ASSERT(!(flags & XBF_TRYLOCK)); 1166 1167 d = xfs_btree_ptr_to_daddr(cur, ptr); 1168 error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, 1169 mp->m_bsize, flags, bpp, 1170 cur->bc_ops->buf_ops); 1171 if (error) 1172 return error; 1173 1174 xfs_btree_set_refs(cur, *bpp); 1175 *block = XFS_BUF_TO_BLOCK(*bpp); 1176 return 0; 1177 } 1178 1179 /* 1180 * Copy keys from one btree block to another. 1181 */ 1182 STATIC void 1183 xfs_btree_copy_keys( 1184 struct xfs_btree_cur *cur, 1185 union xfs_btree_key *dst_key, 1186 union xfs_btree_key *src_key, 1187 int numkeys) 1188 { 1189 ASSERT(numkeys >= 0); 1190 memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len); 1191 } 1192 1193 /* 1194 * Copy records from one btree block to another. 1195 */ 1196 STATIC void 1197 xfs_btree_copy_recs( 1198 struct xfs_btree_cur *cur, 1199 union xfs_btree_rec *dst_rec, 1200 union xfs_btree_rec *src_rec, 1201 int numrecs) 1202 { 1203 ASSERT(numrecs >= 0); 1204 memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len); 1205 } 1206 1207 /* 1208 * Copy block pointers from one btree block to another. 1209 */ 1210 STATIC void 1211 xfs_btree_copy_ptrs( 1212 struct xfs_btree_cur *cur, 1213 union xfs_btree_ptr *dst_ptr, 1214 union xfs_btree_ptr *src_ptr, 1215 int numptrs) 1216 { 1217 ASSERT(numptrs >= 0); 1218 memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); 1219 } 1220 1221 /* 1222 * Shift keys one index left/right inside a single btree block. 1223 */ 1224 STATIC void 1225 xfs_btree_shift_keys( 1226 struct xfs_btree_cur *cur, 1227 union xfs_btree_key *key, 1228 int dir, 1229 int numkeys) 1230 { 1231 char *dst_key; 1232 1233 ASSERT(numkeys >= 0); 1234 ASSERT(dir == 1 || dir == -1); 1235 1236 dst_key = (char *)key + (dir * cur->bc_ops->key_len); 1237 memmove(dst_key, key, numkeys * cur->bc_ops->key_len); 1238 } 1239 1240 /* 1241 * Shift records one index left/right inside a single btree block. 1242 */ 1243 STATIC void 1244 xfs_btree_shift_recs( 1245 struct xfs_btree_cur *cur, 1246 union xfs_btree_rec *rec, 1247 int dir, 1248 int numrecs) 1249 { 1250 char *dst_rec; 1251 1252 ASSERT(numrecs >= 0); 1253 ASSERT(dir == 1 || dir == -1); 1254 1255 dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len); 1256 memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len); 1257 } 1258 1259 /* 1260 * Shift block pointers one index left/right inside a single btree block. 1261 */ 1262 STATIC void 1263 xfs_btree_shift_ptrs( 1264 struct xfs_btree_cur *cur, 1265 union xfs_btree_ptr *ptr, 1266 int dir, 1267 int numptrs) 1268 { 1269 char *dst_ptr; 1270 1271 ASSERT(numptrs >= 0); 1272 ASSERT(dir == 1 || dir == -1); 1273 1274 dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); 1275 memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); 1276 } 1277 1278 /* 1279 * Log key values from the btree block. 1280 */ 1281 STATIC void 1282 xfs_btree_log_keys( 1283 struct xfs_btree_cur *cur, 1284 struct xfs_buf *bp, 1285 int first, 1286 int last) 1287 { 1288 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1289 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1290 1291 if (bp) { 1292 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1293 xfs_trans_log_buf(cur->bc_tp, bp, 1294 xfs_btree_key_offset(cur, first), 1295 xfs_btree_key_offset(cur, last + 1) - 1); 1296 } else { 1297 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1298 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1299 } 1300 1301 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1302 } 1303 1304 /* 1305 * Log record values from the btree block. 1306 */ 1307 void 1308 xfs_btree_log_recs( 1309 struct xfs_btree_cur *cur, 1310 struct xfs_buf *bp, 1311 int first, 1312 int last) 1313 { 1314 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1315 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1316 1317 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1318 xfs_trans_log_buf(cur->bc_tp, bp, 1319 xfs_btree_rec_offset(cur, first), 1320 xfs_btree_rec_offset(cur, last + 1) - 1); 1321 1322 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1323 } 1324 1325 /* 1326 * Log block pointer fields from a btree block (nonleaf). 1327 */ 1328 STATIC void 1329 xfs_btree_log_ptrs( 1330 struct xfs_btree_cur *cur, /* btree cursor */ 1331 struct xfs_buf *bp, /* buffer containing btree block */ 1332 int first, /* index of first pointer to log */ 1333 int last) /* index of last pointer to log */ 1334 { 1335 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1336 XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); 1337 1338 if (bp) { 1339 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 1340 int level = xfs_btree_get_level(block); 1341 1342 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1343 xfs_trans_log_buf(cur->bc_tp, bp, 1344 xfs_btree_ptr_offset(cur, first, level), 1345 xfs_btree_ptr_offset(cur, last + 1, level) - 1); 1346 } else { 1347 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1348 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1349 } 1350 1351 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1352 } 1353 1354 /* 1355 * Log fields from a btree block header. 1356 */ 1357 void 1358 xfs_btree_log_block( 1359 struct xfs_btree_cur *cur, /* btree cursor */ 1360 struct xfs_buf *bp, /* buffer containing btree block */ 1361 int fields) /* mask of fields: XFS_BB_... */ 1362 { 1363 int first; /* first byte offset logged */ 1364 int last; /* last byte offset logged */ 1365 static const short soffsets[] = { /* table of offsets (short) */ 1366 offsetof(struct xfs_btree_block, bb_magic), 1367 offsetof(struct xfs_btree_block, bb_level), 1368 offsetof(struct xfs_btree_block, bb_numrecs), 1369 offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), 1370 offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), 1371 offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), 1372 offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), 1373 offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), 1374 offsetof(struct xfs_btree_block, bb_u.s.bb_owner), 1375 offsetof(struct xfs_btree_block, bb_u.s.bb_crc), 1376 XFS_BTREE_SBLOCK_CRC_LEN 1377 }; 1378 static const short loffsets[] = { /* table of offsets (long) */ 1379 offsetof(struct xfs_btree_block, bb_magic), 1380 offsetof(struct xfs_btree_block, bb_level), 1381 offsetof(struct xfs_btree_block, bb_numrecs), 1382 offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), 1383 offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), 1384 offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), 1385 offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), 1386 offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), 1387 offsetof(struct xfs_btree_block, bb_u.l.bb_owner), 1388 offsetof(struct xfs_btree_block, bb_u.l.bb_crc), 1389 offsetof(struct xfs_btree_block, bb_u.l.bb_pad), 1390 XFS_BTREE_LBLOCK_CRC_LEN 1391 }; 1392 1393 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1394 XFS_BTREE_TRACE_ARGBI(cur, bp, fields); 1395 1396 if (bp) { 1397 int nbits; 1398 1399 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 1400 /* 1401 * We don't log the CRC when updating a btree 1402 * block but instead recreate it during log 1403 * recovery. As the log buffers have checksums 1404 * of their own this is safe and avoids logging a crc 1405 * update in a lot of places. 1406 */ 1407 if (fields == XFS_BB_ALL_BITS) 1408 fields = XFS_BB_ALL_BITS_CRC; 1409 nbits = XFS_BB_NUM_BITS_CRC; 1410 } else { 1411 nbits = XFS_BB_NUM_BITS; 1412 } 1413 xfs_btree_offsets(fields, 1414 (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? 1415 loffsets : soffsets, 1416 nbits, &first, &last); 1417 xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); 1418 xfs_trans_log_buf(cur->bc_tp, bp, first, last); 1419 } else { 1420 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, 1421 xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 1422 } 1423 1424 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1425 } 1426 1427 /* 1428 * Increment cursor by one record at the level. 1429 * For nonzero levels the leaf-ward information is untouched. 1430 */ 1431 int /* error */ 1432 xfs_btree_increment( 1433 struct xfs_btree_cur *cur, 1434 int level, 1435 int *stat) /* success/failure */ 1436 { 1437 struct xfs_btree_block *block; 1438 union xfs_btree_ptr ptr; 1439 struct xfs_buf *bp; 1440 int error; /* error return value */ 1441 int lev; 1442 1443 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1444 XFS_BTREE_TRACE_ARGI(cur, level); 1445 1446 ASSERT(level < cur->bc_nlevels); 1447 1448 /* Read-ahead to the right at this level. */ 1449 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 1450 1451 /* Get a pointer to the btree block. */ 1452 block = xfs_btree_get_block(cur, level, &bp); 1453 1454 #ifdef DEBUG 1455 error = xfs_btree_check_block(cur, block, level, bp); 1456 if (error) 1457 goto error0; 1458 #endif 1459 1460 /* We're done if we remain in the block after the increment. */ 1461 if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block)) 1462 goto out1; 1463 1464 /* Fail if we just went off the right edge of the tree. */ 1465 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1466 if (xfs_btree_ptr_is_null(cur, &ptr)) 1467 goto out0; 1468 1469 XFS_BTREE_STATS_INC(cur, increment); 1470 1471 /* 1472 * March up the tree incrementing pointers. 1473 * Stop when we don't go off the right edge of a block. 1474 */ 1475 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1476 block = xfs_btree_get_block(cur, lev, &bp); 1477 1478 #ifdef DEBUG 1479 error = xfs_btree_check_block(cur, block, lev, bp); 1480 if (error) 1481 goto error0; 1482 #endif 1483 1484 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block)) 1485 break; 1486 1487 /* Read-ahead the right block for the next loop. */ 1488 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA); 1489 } 1490 1491 /* 1492 * If we went off the root then we are either seriously 1493 * confused or have the tree root in an inode. 1494 */ 1495 if (lev == cur->bc_nlevels) { 1496 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1497 goto out0; 1498 ASSERT(0); 1499 error = -EFSCORRUPTED; 1500 goto error0; 1501 } 1502 ASSERT(lev < cur->bc_nlevels); 1503 1504 /* 1505 * Now walk back down the tree, fixing up the cursor's buffer 1506 * pointers and key numbers. 1507 */ 1508 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1509 union xfs_btree_ptr *ptrp; 1510 1511 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1512 --lev; 1513 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1514 if (error) 1515 goto error0; 1516 1517 xfs_btree_setbuf(cur, lev, bp); 1518 cur->bc_ptrs[lev] = 1; 1519 } 1520 out1: 1521 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1522 *stat = 1; 1523 return 0; 1524 1525 out0: 1526 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1527 *stat = 0; 1528 return 0; 1529 1530 error0: 1531 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1532 return error; 1533 } 1534 1535 /* 1536 * Decrement cursor by one record at the level. 1537 * For nonzero levels the leaf-ward information is untouched. 1538 */ 1539 int /* error */ 1540 xfs_btree_decrement( 1541 struct xfs_btree_cur *cur, 1542 int level, 1543 int *stat) /* success/failure */ 1544 { 1545 struct xfs_btree_block *block; 1546 xfs_buf_t *bp; 1547 int error; /* error return value */ 1548 int lev; 1549 union xfs_btree_ptr ptr; 1550 1551 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1552 XFS_BTREE_TRACE_ARGI(cur, level); 1553 1554 ASSERT(level < cur->bc_nlevels); 1555 1556 /* Read-ahead to the left at this level. */ 1557 xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA); 1558 1559 /* We're done if we remain in the block after the decrement. */ 1560 if (--cur->bc_ptrs[level] > 0) 1561 goto out1; 1562 1563 /* Get a pointer to the btree block. */ 1564 block = xfs_btree_get_block(cur, level, &bp); 1565 1566 #ifdef DEBUG 1567 error = xfs_btree_check_block(cur, block, level, bp); 1568 if (error) 1569 goto error0; 1570 #endif 1571 1572 /* Fail if we just went off the left edge of the tree. */ 1573 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 1574 if (xfs_btree_ptr_is_null(cur, &ptr)) 1575 goto out0; 1576 1577 XFS_BTREE_STATS_INC(cur, decrement); 1578 1579 /* 1580 * March up the tree decrementing pointers. 1581 * Stop when we don't go off the left edge of a block. 1582 */ 1583 for (lev = level + 1; lev < cur->bc_nlevels; lev++) { 1584 if (--cur->bc_ptrs[lev] > 0) 1585 break; 1586 /* Read-ahead the left block for the next loop. */ 1587 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA); 1588 } 1589 1590 /* 1591 * If we went off the root then we are seriously confused. 1592 * or the root of the tree is in an inode. 1593 */ 1594 if (lev == cur->bc_nlevels) { 1595 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) 1596 goto out0; 1597 ASSERT(0); 1598 error = -EFSCORRUPTED; 1599 goto error0; 1600 } 1601 ASSERT(lev < cur->bc_nlevels); 1602 1603 /* 1604 * Now walk back down the tree, fixing up the cursor's buffer 1605 * pointers and key numbers. 1606 */ 1607 for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) { 1608 union xfs_btree_ptr *ptrp; 1609 1610 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); 1611 --lev; 1612 error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); 1613 if (error) 1614 goto error0; 1615 xfs_btree_setbuf(cur, lev, bp); 1616 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block); 1617 } 1618 out1: 1619 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1620 *stat = 1; 1621 return 0; 1622 1623 out0: 1624 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1625 *stat = 0; 1626 return 0; 1627 1628 error0: 1629 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1630 return error; 1631 } 1632 1633 STATIC int 1634 xfs_btree_lookup_get_block( 1635 struct xfs_btree_cur *cur, /* btree cursor */ 1636 int level, /* level in the btree */ 1637 union xfs_btree_ptr *pp, /* ptr to btree block */ 1638 struct xfs_btree_block **blkp) /* return btree block */ 1639 { 1640 struct xfs_buf *bp; /* buffer pointer for btree block */ 1641 int error = 0; 1642 1643 /* special case the root block if in an inode */ 1644 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1645 (level == cur->bc_nlevels - 1)) { 1646 *blkp = xfs_btree_get_iroot(cur); 1647 return 0; 1648 } 1649 1650 /* 1651 * If the old buffer at this level for the disk address we are 1652 * looking for re-use it. 1653 * 1654 * Otherwise throw it away and get a new one. 1655 */ 1656 bp = cur->bc_bufs[level]; 1657 if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) { 1658 *blkp = XFS_BUF_TO_BLOCK(bp); 1659 return 0; 1660 } 1661 1662 error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); 1663 if (error) 1664 return error; 1665 1666 xfs_btree_setbuf(cur, level, bp); 1667 return 0; 1668 } 1669 1670 /* 1671 * Get current search key. For level 0 we don't actually have a key 1672 * structure so we make one up from the record. For all other levels 1673 * we just return the right key. 1674 */ 1675 STATIC union xfs_btree_key * 1676 xfs_lookup_get_search_key( 1677 struct xfs_btree_cur *cur, 1678 int level, 1679 int keyno, 1680 struct xfs_btree_block *block, 1681 union xfs_btree_key *kp) 1682 { 1683 if (level == 0) { 1684 cur->bc_ops->init_key_from_rec(kp, 1685 xfs_btree_rec_addr(cur, keyno, block)); 1686 return kp; 1687 } 1688 1689 return xfs_btree_key_addr(cur, keyno, block); 1690 } 1691 1692 /* 1693 * Lookup the record. The cursor is made to point to it, based on dir. 1694 * stat is set to 0 if can't find any such record, 1 for success. 1695 */ 1696 int /* error */ 1697 xfs_btree_lookup( 1698 struct xfs_btree_cur *cur, /* btree cursor */ 1699 xfs_lookup_t dir, /* <=, ==, or >= */ 1700 int *stat) /* success/failure */ 1701 { 1702 struct xfs_btree_block *block; /* current btree block */ 1703 __int64_t diff; /* difference for the current key */ 1704 int error; /* error return value */ 1705 int keyno; /* current key number */ 1706 int level; /* level in the btree */ 1707 union xfs_btree_ptr *pp; /* ptr to btree block */ 1708 union xfs_btree_ptr ptr; /* ptr to btree block */ 1709 1710 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1711 XFS_BTREE_TRACE_ARGI(cur, dir); 1712 1713 XFS_BTREE_STATS_INC(cur, lookup); 1714 1715 block = NULL; 1716 keyno = 0; 1717 1718 /* initialise start pointer from cursor */ 1719 cur->bc_ops->init_ptr_from_cur(cur, &ptr); 1720 pp = &ptr; 1721 1722 /* 1723 * Iterate over each level in the btree, starting at the root. 1724 * For each level above the leaves, find the key we need, based 1725 * on the lookup record, then follow the corresponding block 1726 * pointer down to the next level. 1727 */ 1728 for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) { 1729 /* Get the block we need to do the lookup on. */ 1730 error = xfs_btree_lookup_get_block(cur, level, pp, &block); 1731 if (error) 1732 goto error0; 1733 1734 if (diff == 0) { 1735 /* 1736 * If we already had a key match at a higher level, we 1737 * know we need to use the first entry in this block. 1738 */ 1739 keyno = 1; 1740 } else { 1741 /* Otherwise search this block. Do a binary search. */ 1742 1743 int high; /* high entry number */ 1744 int low; /* low entry number */ 1745 1746 /* Set low and high entry numbers, 1-based. */ 1747 low = 1; 1748 high = xfs_btree_get_numrecs(block); 1749 if (!high) { 1750 /* Block is empty, must be an empty leaf. */ 1751 ASSERT(level == 0 && cur->bc_nlevels == 1); 1752 1753 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE; 1754 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1755 *stat = 0; 1756 return 0; 1757 } 1758 1759 /* Binary search the block. */ 1760 while (low <= high) { 1761 union xfs_btree_key key; 1762 union xfs_btree_key *kp; 1763 1764 XFS_BTREE_STATS_INC(cur, compare); 1765 1766 /* keyno is average of low and high. */ 1767 keyno = (low + high) >> 1; 1768 1769 /* Get current search key */ 1770 kp = xfs_lookup_get_search_key(cur, level, 1771 keyno, block, &key); 1772 1773 /* 1774 * Compute difference to get next direction: 1775 * - less than, move right 1776 * - greater than, move left 1777 * - equal, we're done 1778 */ 1779 diff = cur->bc_ops->key_diff(cur, kp); 1780 if (diff < 0) 1781 low = keyno + 1; 1782 else if (diff > 0) 1783 high = keyno - 1; 1784 else 1785 break; 1786 } 1787 } 1788 1789 /* 1790 * If there are more levels, set up for the next level 1791 * by getting the block number and filling in the cursor. 1792 */ 1793 if (level > 0) { 1794 /* 1795 * If we moved left, need the previous key number, 1796 * unless there isn't one. 1797 */ 1798 if (diff > 0 && --keyno < 1) 1799 keyno = 1; 1800 pp = xfs_btree_ptr_addr(cur, keyno, block); 1801 1802 #ifdef DEBUG 1803 error = xfs_btree_check_ptr(cur, pp, 0, level); 1804 if (error) 1805 goto error0; 1806 #endif 1807 cur->bc_ptrs[level] = keyno; 1808 } 1809 } 1810 1811 /* Done with the search. See if we need to adjust the results. */ 1812 if (dir != XFS_LOOKUP_LE && diff < 0) { 1813 keyno++; 1814 /* 1815 * If ge search and we went off the end of the block, but it's 1816 * not the last block, we're in the wrong block. 1817 */ 1818 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 1819 if (dir == XFS_LOOKUP_GE && 1820 keyno > xfs_btree_get_numrecs(block) && 1821 !xfs_btree_ptr_is_null(cur, &ptr)) { 1822 int i; 1823 1824 cur->bc_ptrs[0] = keyno; 1825 error = xfs_btree_increment(cur, 0, &i); 1826 if (error) 1827 goto error0; 1828 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1); 1829 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1830 *stat = 1; 1831 return 0; 1832 } 1833 } else if (dir == XFS_LOOKUP_LE && diff > 0) 1834 keyno--; 1835 cur->bc_ptrs[0] = keyno; 1836 1837 /* Return if we succeeded or not. */ 1838 if (keyno == 0 || keyno > xfs_btree_get_numrecs(block)) 1839 *stat = 0; 1840 else if (dir != XFS_LOOKUP_EQ || diff == 0) 1841 *stat = 1; 1842 else 1843 *stat = 0; 1844 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1845 return 0; 1846 1847 error0: 1848 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1849 return error; 1850 } 1851 1852 /* 1853 * Update keys at all levels from here to the root along the cursor's path. 1854 */ 1855 STATIC int 1856 xfs_btree_updkey( 1857 struct xfs_btree_cur *cur, 1858 union xfs_btree_key *keyp, 1859 int level) 1860 { 1861 struct xfs_btree_block *block; 1862 struct xfs_buf *bp; 1863 union xfs_btree_key *kp; 1864 int ptr; 1865 1866 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1867 XFS_BTREE_TRACE_ARGIK(cur, level, keyp); 1868 1869 ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1); 1870 1871 /* 1872 * Go up the tree from this level toward the root. 1873 * At each level, update the key value to the value input. 1874 * Stop when we reach a level where the cursor isn't pointing 1875 * at the first entry in the block. 1876 */ 1877 for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) { 1878 #ifdef DEBUG 1879 int error; 1880 #endif 1881 block = xfs_btree_get_block(cur, level, &bp); 1882 #ifdef DEBUG 1883 error = xfs_btree_check_block(cur, block, level, bp); 1884 if (error) { 1885 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1886 return error; 1887 } 1888 #endif 1889 ptr = cur->bc_ptrs[level]; 1890 kp = xfs_btree_key_addr(cur, ptr, block); 1891 xfs_btree_copy_keys(cur, kp, keyp, 1); 1892 xfs_btree_log_keys(cur, bp, ptr, ptr); 1893 } 1894 1895 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1896 return 0; 1897 } 1898 1899 /* 1900 * Update the record referred to by cur to the value in the 1901 * given record. This either works (return 0) or gets an 1902 * EFSCORRUPTED error. 1903 */ 1904 int 1905 xfs_btree_update( 1906 struct xfs_btree_cur *cur, 1907 union xfs_btree_rec *rec) 1908 { 1909 struct xfs_btree_block *block; 1910 struct xfs_buf *bp; 1911 int error; 1912 int ptr; 1913 union xfs_btree_rec *rp; 1914 1915 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1916 XFS_BTREE_TRACE_ARGR(cur, rec); 1917 1918 /* Pick up the current block. */ 1919 block = xfs_btree_get_block(cur, 0, &bp); 1920 1921 #ifdef DEBUG 1922 error = xfs_btree_check_block(cur, block, 0, bp); 1923 if (error) 1924 goto error0; 1925 #endif 1926 /* Get the address of the rec to be updated. */ 1927 ptr = cur->bc_ptrs[0]; 1928 rp = xfs_btree_rec_addr(cur, ptr, block); 1929 1930 /* Fill in the new contents and log them. */ 1931 xfs_btree_copy_recs(cur, rp, rec, 1); 1932 xfs_btree_log_recs(cur, bp, ptr, ptr); 1933 1934 /* 1935 * If we are tracking the last record in the tree and 1936 * we are at the far right edge of the tree, update it. 1937 */ 1938 if (xfs_btree_is_lastrec(cur, block, 0)) { 1939 cur->bc_ops->update_lastrec(cur, block, rec, 1940 ptr, LASTREC_UPDATE); 1941 } 1942 1943 /* Updating first rec in leaf. Pass new key value up to our parent. */ 1944 if (ptr == 1) { 1945 union xfs_btree_key key; 1946 1947 cur->bc_ops->init_key_from_rec(&key, rec); 1948 error = xfs_btree_updkey(cur, &key, 1); 1949 if (error) 1950 goto error0; 1951 } 1952 1953 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 1954 return 0; 1955 1956 error0: 1957 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 1958 return error; 1959 } 1960 1961 /* 1962 * Move 1 record left from cur/level if possible. 1963 * Update cur to reflect the new path. 1964 */ 1965 STATIC int /* error */ 1966 xfs_btree_lshift( 1967 struct xfs_btree_cur *cur, 1968 int level, 1969 int *stat) /* success/failure */ 1970 { 1971 union xfs_btree_key key; /* btree key */ 1972 struct xfs_buf *lbp; /* left buffer pointer */ 1973 struct xfs_btree_block *left; /* left btree block */ 1974 int lrecs; /* left record count */ 1975 struct xfs_buf *rbp; /* right buffer pointer */ 1976 struct xfs_btree_block *right; /* right btree block */ 1977 int rrecs; /* right record count */ 1978 union xfs_btree_ptr lptr; /* left btree pointer */ 1979 union xfs_btree_key *rkp = NULL; /* right btree key */ 1980 union xfs_btree_ptr *rpp = NULL; /* right address pointer */ 1981 union xfs_btree_rec *rrp = NULL; /* right record pointer */ 1982 int error; /* error return value */ 1983 1984 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 1985 XFS_BTREE_TRACE_ARGI(cur, level); 1986 1987 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 1988 level == cur->bc_nlevels - 1) 1989 goto out0; 1990 1991 /* Set up variables for this block as "right". */ 1992 right = xfs_btree_get_block(cur, level, &rbp); 1993 1994 #ifdef DEBUG 1995 error = xfs_btree_check_block(cur, right, level, rbp); 1996 if (error) 1997 goto error0; 1998 #endif 1999 2000 /* If we've got no left sibling then we can't shift an entry left. */ 2001 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2002 if (xfs_btree_ptr_is_null(cur, &lptr)) 2003 goto out0; 2004 2005 /* 2006 * If the cursor entry is the one that would be moved, don't 2007 * do it... it's too complicated. 2008 */ 2009 if (cur->bc_ptrs[level] <= 1) 2010 goto out0; 2011 2012 /* Set up the left neighbor as "left". */ 2013 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2014 if (error) 2015 goto error0; 2016 2017 /* If it's full, it can't take another entry. */ 2018 lrecs = xfs_btree_get_numrecs(left); 2019 if (lrecs == cur->bc_ops->get_maxrecs(cur, level)) 2020 goto out0; 2021 2022 rrecs = xfs_btree_get_numrecs(right); 2023 2024 /* 2025 * We add one entry to the left side and remove one for the right side. 2026 * Account for it here, the changes will be updated on disk and logged 2027 * later. 2028 */ 2029 lrecs++; 2030 rrecs--; 2031 2032 XFS_BTREE_STATS_INC(cur, lshift); 2033 XFS_BTREE_STATS_ADD(cur, moves, 1); 2034 2035 /* 2036 * If non-leaf, copy a key and a ptr to the left block. 2037 * Log the changes to the left block. 2038 */ 2039 if (level > 0) { 2040 /* It's a non-leaf. Move keys and pointers. */ 2041 union xfs_btree_key *lkp; /* left btree key */ 2042 union xfs_btree_ptr *lpp; /* left address pointer */ 2043 2044 lkp = xfs_btree_key_addr(cur, lrecs, left); 2045 rkp = xfs_btree_key_addr(cur, 1, right); 2046 2047 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2048 rpp = xfs_btree_ptr_addr(cur, 1, right); 2049 #ifdef DEBUG 2050 error = xfs_btree_check_ptr(cur, rpp, 0, level); 2051 if (error) 2052 goto error0; 2053 #endif 2054 xfs_btree_copy_keys(cur, lkp, rkp, 1); 2055 xfs_btree_copy_ptrs(cur, lpp, rpp, 1); 2056 2057 xfs_btree_log_keys(cur, lbp, lrecs, lrecs); 2058 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs); 2059 2060 ASSERT(cur->bc_ops->keys_inorder(cur, 2061 xfs_btree_key_addr(cur, lrecs - 1, left), lkp)); 2062 } else { 2063 /* It's a leaf. Move records. */ 2064 union xfs_btree_rec *lrp; /* left record pointer */ 2065 2066 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2067 rrp = xfs_btree_rec_addr(cur, 1, right); 2068 2069 xfs_btree_copy_recs(cur, lrp, rrp, 1); 2070 xfs_btree_log_recs(cur, lbp, lrecs, lrecs); 2071 2072 ASSERT(cur->bc_ops->recs_inorder(cur, 2073 xfs_btree_rec_addr(cur, lrecs - 1, left), lrp)); 2074 } 2075 2076 xfs_btree_set_numrecs(left, lrecs); 2077 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2078 2079 xfs_btree_set_numrecs(right, rrecs); 2080 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2081 2082 /* 2083 * Slide the contents of right down one entry. 2084 */ 2085 XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1); 2086 if (level > 0) { 2087 /* It's a nonleaf. operate on keys and ptrs */ 2088 #ifdef DEBUG 2089 int i; /* loop index */ 2090 2091 for (i = 0; i < rrecs; i++) { 2092 error = xfs_btree_check_ptr(cur, rpp, i + 1, level); 2093 if (error) 2094 goto error0; 2095 } 2096 #endif 2097 xfs_btree_shift_keys(cur, 2098 xfs_btree_key_addr(cur, 2, right), 2099 -1, rrecs); 2100 xfs_btree_shift_ptrs(cur, 2101 xfs_btree_ptr_addr(cur, 2, right), 2102 -1, rrecs); 2103 2104 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2105 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2106 } else { 2107 /* It's a leaf. operate on records */ 2108 xfs_btree_shift_recs(cur, 2109 xfs_btree_rec_addr(cur, 2, right), 2110 -1, rrecs); 2111 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2112 2113 /* 2114 * If it's the first record in the block, we'll need a key 2115 * structure to pass up to the next level (updkey). 2116 */ 2117 cur->bc_ops->init_key_from_rec(&key, 2118 xfs_btree_rec_addr(cur, 1, right)); 2119 rkp = &key; 2120 } 2121 2122 /* Update the parent key values of right. */ 2123 error = xfs_btree_updkey(cur, rkp, level + 1); 2124 if (error) 2125 goto error0; 2126 2127 /* Slide the cursor value left one. */ 2128 cur->bc_ptrs[level]--; 2129 2130 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2131 *stat = 1; 2132 return 0; 2133 2134 out0: 2135 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2136 *stat = 0; 2137 return 0; 2138 2139 error0: 2140 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2141 return error; 2142 } 2143 2144 /* 2145 * Move 1 record right from cur/level if possible. 2146 * Update cur to reflect the new path. 2147 */ 2148 STATIC int /* error */ 2149 xfs_btree_rshift( 2150 struct xfs_btree_cur *cur, 2151 int level, 2152 int *stat) /* success/failure */ 2153 { 2154 union xfs_btree_key key; /* btree key */ 2155 struct xfs_buf *lbp; /* left buffer pointer */ 2156 struct xfs_btree_block *left; /* left btree block */ 2157 struct xfs_buf *rbp; /* right buffer pointer */ 2158 struct xfs_btree_block *right; /* right btree block */ 2159 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 2160 union xfs_btree_ptr rptr; /* right block pointer */ 2161 union xfs_btree_key *rkp; /* right btree key */ 2162 int rrecs; /* right record count */ 2163 int lrecs; /* left record count */ 2164 int error; /* error return value */ 2165 int i; /* loop counter */ 2166 2167 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2168 XFS_BTREE_TRACE_ARGI(cur, level); 2169 2170 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2171 (level == cur->bc_nlevels - 1)) 2172 goto out0; 2173 2174 /* Set up variables for this block as "left". */ 2175 left = xfs_btree_get_block(cur, level, &lbp); 2176 2177 #ifdef DEBUG 2178 error = xfs_btree_check_block(cur, left, level, lbp); 2179 if (error) 2180 goto error0; 2181 #endif 2182 2183 /* If we've got no right sibling then we can't shift an entry right. */ 2184 xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2185 if (xfs_btree_ptr_is_null(cur, &rptr)) 2186 goto out0; 2187 2188 /* 2189 * If the cursor entry is the one that would be moved, don't 2190 * do it... it's too complicated. 2191 */ 2192 lrecs = xfs_btree_get_numrecs(left); 2193 if (cur->bc_ptrs[level] >= lrecs) 2194 goto out0; 2195 2196 /* Set up the right neighbor as "right". */ 2197 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2198 if (error) 2199 goto error0; 2200 2201 /* If it's full, it can't take another entry. */ 2202 rrecs = xfs_btree_get_numrecs(right); 2203 if (rrecs == cur->bc_ops->get_maxrecs(cur, level)) 2204 goto out0; 2205 2206 XFS_BTREE_STATS_INC(cur, rshift); 2207 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2208 2209 /* 2210 * Make a hole at the start of the right neighbor block, then 2211 * copy the last left block entry to the hole. 2212 */ 2213 if (level > 0) { 2214 /* It's a nonleaf. make a hole in the keys and ptrs */ 2215 union xfs_btree_key *lkp; 2216 union xfs_btree_ptr *lpp; 2217 union xfs_btree_ptr *rpp; 2218 2219 lkp = xfs_btree_key_addr(cur, lrecs, left); 2220 lpp = xfs_btree_ptr_addr(cur, lrecs, left); 2221 rkp = xfs_btree_key_addr(cur, 1, right); 2222 rpp = xfs_btree_ptr_addr(cur, 1, right); 2223 2224 #ifdef DEBUG 2225 for (i = rrecs - 1; i >= 0; i--) { 2226 error = xfs_btree_check_ptr(cur, rpp, i, level); 2227 if (error) 2228 goto error0; 2229 } 2230 #endif 2231 2232 xfs_btree_shift_keys(cur, rkp, 1, rrecs); 2233 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs); 2234 2235 #ifdef DEBUG 2236 error = xfs_btree_check_ptr(cur, lpp, 0, level); 2237 if (error) 2238 goto error0; 2239 #endif 2240 2241 /* Now put the new data in, and log it. */ 2242 xfs_btree_copy_keys(cur, rkp, lkp, 1); 2243 xfs_btree_copy_ptrs(cur, rpp, lpp, 1); 2244 2245 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1); 2246 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1); 2247 2248 ASSERT(cur->bc_ops->keys_inorder(cur, rkp, 2249 xfs_btree_key_addr(cur, 2, right))); 2250 } else { 2251 /* It's a leaf. make a hole in the records */ 2252 union xfs_btree_rec *lrp; 2253 union xfs_btree_rec *rrp; 2254 2255 lrp = xfs_btree_rec_addr(cur, lrecs, left); 2256 rrp = xfs_btree_rec_addr(cur, 1, right); 2257 2258 xfs_btree_shift_recs(cur, rrp, 1, rrecs); 2259 2260 /* Now put the new data in, and log it. */ 2261 xfs_btree_copy_recs(cur, rrp, lrp, 1); 2262 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1); 2263 2264 cur->bc_ops->init_key_from_rec(&key, rrp); 2265 rkp = &key; 2266 2267 ASSERT(cur->bc_ops->recs_inorder(cur, rrp, 2268 xfs_btree_rec_addr(cur, 2, right))); 2269 } 2270 2271 /* 2272 * Decrement and log left's numrecs, bump and log right's numrecs. 2273 */ 2274 xfs_btree_set_numrecs(left, --lrecs); 2275 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS); 2276 2277 xfs_btree_set_numrecs(right, ++rrecs); 2278 xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS); 2279 2280 /* 2281 * Using a temporary cursor, update the parent key values of the 2282 * block on the right. 2283 */ 2284 error = xfs_btree_dup_cursor(cur, &tcur); 2285 if (error) 2286 goto error0; 2287 i = xfs_btree_lastrec(tcur, level); 2288 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 2289 2290 error = xfs_btree_increment(tcur, level, &i); 2291 if (error) 2292 goto error1; 2293 2294 error = xfs_btree_updkey(tcur, rkp, level + 1); 2295 if (error) 2296 goto error1; 2297 2298 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 2299 2300 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2301 *stat = 1; 2302 return 0; 2303 2304 out0: 2305 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2306 *stat = 0; 2307 return 0; 2308 2309 error0: 2310 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2311 return error; 2312 2313 error1: 2314 XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR); 2315 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 2316 return error; 2317 } 2318 2319 /* 2320 * Split cur/level block in half. 2321 * Return new block number and the key to its first 2322 * record (to be inserted into parent). 2323 */ 2324 STATIC int /* error */ 2325 __xfs_btree_split( 2326 struct xfs_btree_cur *cur, 2327 int level, 2328 union xfs_btree_ptr *ptrp, 2329 union xfs_btree_key *key, 2330 struct xfs_btree_cur **curp, 2331 int *stat) /* success/failure */ 2332 { 2333 union xfs_btree_ptr lptr; /* left sibling block ptr */ 2334 struct xfs_buf *lbp; /* left buffer pointer */ 2335 struct xfs_btree_block *left; /* left btree block */ 2336 union xfs_btree_ptr rptr; /* right sibling block ptr */ 2337 struct xfs_buf *rbp; /* right buffer pointer */ 2338 struct xfs_btree_block *right; /* right btree block */ 2339 union xfs_btree_ptr rrptr; /* right-right sibling ptr */ 2340 struct xfs_buf *rrbp; /* right-right buffer pointer */ 2341 struct xfs_btree_block *rrblock; /* right-right btree block */ 2342 int lrecs; 2343 int rrecs; 2344 int src_index; 2345 int error; /* error return value */ 2346 #ifdef DEBUG 2347 int i; 2348 #endif 2349 2350 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2351 XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key); 2352 2353 XFS_BTREE_STATS_INC(cur, split); 2354 2355 /* Set up left block (current one). */ 2356 left = xfs_btree_get_block(cur, level, &lbp); 2357 2358 #ifdef DEBUG 2359 error = xfs_btree_check_block(cur, left, level, lbp); 2360 if (error) 2361 goto error0; 2362 #endif 2363 2364 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2365 2366 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2367 error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); 2368 if (error) 2369 goto error0; 2370 if (*stat == 0) 2371 goto out0; 2372 XFS_BTREE_STATS_INC(cur, alloc); 2373 2374 /* Set up the new block as "right". */ 2375 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); 2376 if (error) 2377 goto error0; 2378 2379 /* Fill in the btree header for the new right block. */ 2380 xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); 2381 2382 /* 2383 * Split the entries between the old and the new block evenly. 2384 * Make sure that if there's an odd number of entries now, that 2385 * each new block will have the same number of entries. 2386 */ 2387 lrecs = xfs_btree_get_numrecs(left); 2388 rrecs = lrecs / 2; 2389 if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1) 2390 rrecs++; 2391 src_index = (lrecs - rrecs + 1); 2392 2393 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 2394 2395 /* 2396 * Copy btree block entries from the left block over to the 2397 * new block, the right. Update the right block and log the 2398 * changes. 2399 */ 2400 if (level > 0) { 2401 /* It's a non-leaf. Move keys and pointers. */ 2402 union xfs_btree_key *lkp; /* left btree key */ 2403 union xfs_btree_ptr *lpp; /* left address pointer */ 2404 union xfs_btree_key *rkp; /* right btree key */ 2405 union xfs_btree_ptr *rpp; /* right address pointer */ 2406 2407 lkp = xfs_btree_key_addr(cur, src_index, left); 2408 lpp = xfs_btree_ptr_addr(cur, src_index, left); 2409 rkp = xfs_btree_key_addr(cur, 1, right); 2410 rpp = xfs_btree_ptr_addr(cur, 1, right); 2411 2412 #ifdef DEBUG 2413 for (i = src_index; i < rrecs; i++) { 2414 error = xfs_btree_check_ptr(cur, lpp, i, level); 2415 if (error) 2416 goto error0; 2417 } 2418 #endif 2419 2420 xfs_btree_copy_keys(cur, rkp, lkp, rrecs); 2421 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs); 2422 2423 xfs_btree_log_keys(cur, rbp, 1, rrecs); 2424 xfs_btree_log_ptrs(cur, rbp, 1, rrecs); 2425 2426 /* Grab the keys to the entries moved to the right block */ 2427 xfs_btree_copy_keys(cur, key, rkp, 1); 2428 } else { 2429 /* It's a leaf. Move records. */ 2430 union xfs_btree_rec *lrp; /* left record pointer */ 2431 union xfs_btree_rec *rrp; /* right record pointer */ 2432 2433 lrp = xfs_btree_rec_addr(cur, src_index, left); 2434 rrp = xfs_btree_rec_addr(cur, 1, right); 2435 2436 xfs_btree_copy_recs(cur, rrp, lrp, rrecs); 2437 xfs_btree_log_recs(cur, rbp, 1, rrecs); 2438 2439 cur->bc_ops->init_key_from_rec(key, 2440 xfs_btree_rec_addr(cur, 1, right)); 2441 } 2442 2443 2444 /* 2445 * Find the left block number by looking in the buffer. 2446 * Adjust numrecs, sibling pointers. 2447 */ 2448 xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB); 2449 xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB); 2450 xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2451 xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB); 2452 2453 lrecs -= rrecs; 2454 xfs_btree_set_numrecs(left, lrecs); 2455 xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs); 2456 2457 xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS); 2458 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 2459 2460 /* 2461 * If there's a block to the new block's right, make that block 2462 * point back to right instead of to left. 2463 */ 2464 if (!xfs_btree_ptr_is_null(cur, &rrptr)) { 2465 error = xfs_btree_read_buf_block(cur, &rrptr, 2466 0, &rrblock, &rrbp); 2467 if (error) 2468 goto error0; 2469 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB); 2470 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 2471 } 2472 /* 2473 * If the cursor is really in the right block, move it there. 2474 * If it's just pointing past the last entry in left, then we'll 2475 * insert there, so don't change anything in that case. 2476 */ 2477 if (cur->bc_ptrs[level] > lrecs + 1) { 2478 xfs_btree_setbuf(cur, level, rbp); 2479 cur->bc_ptrs[level] -= lrecs; 2480 } 2481 /* 2482 * If there are more levels, we'll need another cursor which refers 2483 * the right block, no matter where this cursor was. 2484 */ 2485 if (level + 1 < cur->bc_nlevels) { 2486 error = xfs_btree_dup_cursor(cur, curp); 2487 if (error) 2488 goto error0; 2489 (*curp)->bc_ptrs[level + 1]++; 2490 } 2491 *ptrp = rptr; 2492 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2493 *stat = 1; 2494 return 0; 2495 out0: 2496 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2497 *stat = 0; 2498 return 0; 2499 2500 error0: 2501 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2502 return error; 2503 } 2504 2505 struct xfs_btree_split_args { 2506 struct xfs_btree_cur *cur; 2507 int level; 2508 union xfs_btree_ptr *ptrp; 2509 union xfs_btree_key *key; 2510 struct xfs_btree_cur **curp; 2511 int *stat; /* success/failure */ 2512 int result; 2513 bool kswapd; /* allocation in kswapd context */ 2514 struct completion *done; 2515 struct work_struct work; 2516 }; 2517 2518 /* 2519 * Stack switching interfaces for allocation 2520 */ 2521 static void 2522 xfs_btree_split_worker( 2523 struct work_struct *work) 2524 { 2525 struct xfs_btree_split_args *args = container_of(work, 2526 struct xfs_btree_split_args, work); 2527 unsigned long pflags; 2528 unsigned long new_pflags = PF_FSTRANS; 2529 2530 /* 2531 * we are in a transaction context here, but may also be doing work 2532 * in kswapd context, and hence we may need to inherit that state 2533 * temporarily to ensure that we don't block waiting for memory reclaim 2534 * in any way. 2535 */ 2536 if (args->kswapd) 2537 new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; 2538 2539 current_set_flags_nested(&pflags, new_pflags); 2540 2541 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2542 args->key, args->curp, args->stat); 2543 complete(args->done); 2544 2545 current_restore_flags_nested(&pflags, new_pflags); 2546 } 2547 2548 /* 2549 * BMBT split requests often come in with little stack to work on. Push 2550 * them off to a worker thread so there is lots of stack to use. For the other 2551 * btree types, just call directly to avoid the context switch overhead here. 2552 */ 2553 STATIC int /* error */ 2554 xfs_btree_split( 2555 struct xfs_btree_cur *cur, 2556 int level, 2557 union xfs_btree_ptr *ptrp, 2558 union xfs_btree_key *key, 2559 struct xfs_btree_cur **curp, 2560 int *stat) /* success/failure */ 2561 { 2562 struct xfs_btree_split_args args; 2563 DECLARE_COMPLETION_ONSTACK(done); 2564 2565 if (cur->bc_btnum != XFS_BTNUM_BMAP) 2566 return __xfs_btree_split(cur, level, ptrp, key, curp, stat); 2567 2568 args.cur = cur; 2569 args.level = level; 2570 args.ptrp = ptrp; 2571 args.key = key; 2572 args.curp = curp; 2573 args.stat = stat; 2574 args.done = &done; 2575 args.kswapd = current_is_kswapd(); 2576 INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); 2577 queue_work(xfs_alloc_wq, &args.work); 2578 wait_for_completion(&done); 2579 destroy_work_on_stack(&args.work); 2580 return args.result; 2581 } 2582 2583 2584 /* 2585 * Copy the old inode root contents into a real block and make the 2586 * broot point to it. 2587 */ 2588 int /* error */ 2589 xfs_btree_new_iroot( 2590 struct xfs_btree_cur *cur, /* btree cursor */ 2591 int *logflags, /* logging flags for inode */ 2592 int *stat) /* return status - 0 fail */ 2593 { 2594 struct xfs_buf *cbp; /* buffer for cblock */ 2595 struct xfs_btree_block *block; /* btree block */ 2596 struct xfs_btree_block *cblock; /* child btree block */ 2597 union xfs_btree_key *ckp; /* child key pointer */ 2598 union xfs_btree_ptr *cpp; /* child ptr pointer */ 2599 union xfs_btree_key *kp; /* pointer to btree key */ 2600 union xfs_btree_ptr *pp; /* pointer to block addr */ 2601 union xfs_btree_ptr nptr; /* new block addr */ 2602 int level; /* btree level */ 2603 int error; /* error return code */ 2604 #ifdef DEBUG 2605 int i; /* loop counter */ 2606 #endif 2607 2608 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2609 XFS_BTREE_STATS_INC(cur, newroot); 2610 2611 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 2612 2613 level = cur->bc_nlevels - 1; 2614 2615 block = xfs_btree_get_iroot(cur); 2616 pp = xfs_btree_ptr_addr(cur, 1, block); 2617 2618 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2619 error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); 2620 if (error) 2621 goto error0; 2622 if (*stat == 0) { 2623 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2624 return 0; 2625 } 2626 XFS_BTREE_STATS_INC(cur, alloc); 2627 2628 /* Copy the root into a real block. */ 2629 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); 2630 if (error) 2631 goto error0; 2632 2633 /* 2634 * we can't just memcpy() the root in for CRC enabled btree blocks. 2635 * In that case have to also ensure the blkno remains correct 2636 */ 2637 memcpy(cblock, block, xfs_btree_block_len(cur)); 2638 if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { 2639 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 2640 cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); 2641 else 2642 cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); 2643 } 2644 2645 be16_add_cpu(&block->bb_level, 1); 2646 xfs_btree_set_numrecs(block, 1); 2647 cur->bc_nlevels++; 2648 cur->bc_ptrs[level + 1] = 1; 2649 2650 kp = xfs_btree_key_addr(cur, 1, block); 2651 ckp = xfs_btree_key_addr(cur, 1, cblock); 2652 xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock)); 2653 2654 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 2655 #ifdef DEBUG 2656 for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) { 2657 error = xfs_btree_check_ptr(cur, pp, i, level); 2658 if (error) 2659 goto error0; 2660 } 2661 #endif 2662 xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock)); 2663 2664 #ifdef DEBUG 2665 error = xfs_btree_check_ptr(cur, &nptr, 0, level); 2666 if (error) 2667 goto error0; 2668 #endif 2669 xfs_btree_copy_ptrs(cur, pp, &nptr, 1); 2670 2671 xfs_iroot_realloc(cur->bc_private.b.ip, 2672 1 - xfs_btree_get_numrecs(cblock), 2673 cur->bc_private.b.whichfork); 2674 2675 xfs_btree_setbuf(cur, level, cbp); 2676 2677 /* 2678 * Do all this logging at the end so that 2679 * the root is at the right level. 2680 */ 2681 xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS); 2682 xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 2683 xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs)); 2684 2685 *logflags |= 2686 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork); 2687 *stat = 1; 2688 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2689 return 0; 2690 error0: 2691 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2692 return error; 2693 } 2694 2695 /* 2696 * Allocate a new root block, fill it in. 2697 */ 2698 STATIC int /* error */ 2699 xfs_btree_new_root( 2700 struct xfs_btree_cur *cur, /* btree cursor */ 2701 int *stat) /* success/failure */ 2702 { 2703 struct xfs_btree_block *block; /* one half of the old root block */ 2704 struct xfs_buf *bp; /* buffer containing block */ 2705 int error; /* error return value */ 2706 struct xfs_buf *lbp; /* left buffer pointer */ 2707 struct xfs_btree_block *left; /* left btree block */ 2708 struct xfs_buf *nbp; /* new (root) buffer */ 2709 struct xfs_btree_block *new; /* new (root) btree block */ 2710 int nptr; /* new value for key index, 1 or 2 */ 2711 struct xfs_buf *rbp; /* right buffer pointer */ 2712 struct xfs_btree_block *right; /* right btree block */ 2713 union xfs_btree_ptr rptr; 2714 union xfs_btree_ptr lptr; 2715 2716 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2717 XFS_BTREE_STATS_INC(cur, newroot); 2718 2719 /* initialise our start point from the cursor */ 2720 cur->bc_ops->init_ptr_from_cur(cur, &rptr); 2721 2722 /* Allocate the new block. If we can't do it, we're toast. Give up. */ 2723 error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); 2724 if (error) 2725 goto error0; 2726 if (*stat == 0) 2727 goto out0; 2728 XFS_BTREE_STATS_INC(cur, alloc); 2729 2730 /* Set up the new block. */ 2731 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); 2732 if (error) 2733 goto error0; 2734 2735 /* Set the root in the holding structure increasing the level by 1. */ 2736 cur->bc_ops->set_root(cur, &lptr, 1); 2737 2738 /* 2739 * At the previous root level there are now two blocks: the old root, 2740 * and the new block generated when it was split. We don't know which 2741 * one the cursor is pointing at, so we set up variables "left" and 2742 * "right" for each case. 2743 */ 2744 block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp); 2745 2746 #ifdef DEBUG 2747 error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp); 2748 if (error) 2749 goto error0; 2750 #endif 2751 2752 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 2753 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 2754 /* Our block is left, pick up the right block. */ 2755 lbp = bp; 2756 xfs_btree_buf_to_ptr(cur, lbp, &lptr); 2757 left = block; 2758 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 2759 if (error) 2760 goto error0; 2761 bp = rbp; 2762 nptr = 1; 2763 } else { 2764 /* Our block is right, pick up the left block. */ 2765 rbp = bp; 2766 xfs_btree_buf_to_ptr(cur, rbp, &rptr); 2767 right = block; 2768 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); 2769 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 2770 if (error) 2771 goto error0; 2772 bp = lbp; 2773 nptr = 2; 2774 } 2775 /* Fill in the new block's btree header and log it. */ 2776 xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); 2777 xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); 2778 ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && 2779 !xfs_btree_ptr_is_null(cur, &rptr)); 2780 2781 /* Fill in the key data in the new root. */ 2782 if (xfs_btree_get_level(left) > 0) { 2783 xfs_btree_copy_keys(cur, 2784 xfs_btree_key_addr(cur, 1, new), 2785 xfs_btree_key_addr(cur, 1, left), 1); 2786 xfs_btree_copy_keys(cur, 2787 xfs_btree_key_addr(cur, 2, new), 2788 xfs_btree_key_addr(cur, 1, right), 1); 2789 } else { 2790 cur->bc_ops->init_key_from_rec( 2791 xfs_btree_key_addr(cur, 1, new), 2792 xfs_btree_rec_addr(cur, 1, left)); 2793 cur->bc_ops->init_key_from_rec( 2794 xfs_btree_key_addr(cur, 2, new), 2795 xfs_btree_rec_addr(cur, 1, right)); 2796 } 2797 xfs_btree_log_keys(cur, nbp, 1, 2); 2798 2799 /* Fill in the pointer data in the new root. */ 2800 xfs_btree_copy_ptrs(cur, 2801 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1); 2802 xfs_btree_copy_ptrs(cur, 2803 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1); 2804 xfs_btree_log_ptrs(cur, nbp, 1, 2); 2805 2806 /* Fix up the cursor. */ 2807 xfs_btree_setbuf(cur, cur->bc_nlevels, nbp); 2808 cur->bc_ptrs[cur->bc_nlevels] = nptr; 2809 cur->bc_nlevels++; 2810 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2811 *stat = 1; 2812 return 0; 2813 error0: 2814 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 2815 return error; 2816 out0: 2817 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2818 *stat = 0; 2819 return 0; 2820 } 2821 2822 STATIC int 2823 xfs_btree_make_block_unfull( 2824 struct xfs_btree_cur *cur, /* btree cursor */ 2825 int level, /* btree level */ 2826 int numrecs,/* # of recs in block */ 2827 int *oindex,/* old tree index */ 2828 int *index, /* new tree index */ 2829 union xfs_btree_ptr *nptr, /* new btree ptr */ 2830 struct xfs_btree_cur **ncur, /* new btree cursor */ 2831 union xfs_btree_rec *nrec, /* new record */ 2832 int *stat) 2833 { 2834 union xfs_btree_key key; /* new btree key value */ 2835 int error = 0; 2836 2837 if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2838 level == cur->bc_nlevels - 1) { 2839 struct xfs_inode *ip = cur->bc_private.b.ip; 2840 2841 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { 2842 /* A root block that can be made bigger. */ 2843 xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); 2844 } else { 2845 /* A root block that needs replacing */ 2846 int logflags = 0; 2847 2848 error = xfs_btree_new_iroot(cur, &logflags, stat); 2849 if (error || *stat == 0) 2850 return error; 2851 2852 xfs_trans_log_inode(cur->bc_tp, ip, logflags); 2853 } 2854 2855 return 0; 2856 } 2857 2858 /* First, try shifting an entry to the right neighbor. */ 2859 error = xfs_btree_rshift(cur, level, stat); 2860 if (error || *stat) 2861 return error; 2862 2863 /* Next, try shifting an entry to the left neighbor. */ 2864 error = xfs_btree_lshift(cur, level, stat); 2865 if (error) 2866 return error; 2867 2868 if (*stat) { 2869 *oindex = *index = cur->bc_ptrs[level]; 2870 return 0; 2871 } 2872 2873 /* 2874 * Next, try splitting the current block in half. 2875 * 2876 * If this works we have to re-set our variables because we 2877 * could be in a different block now. 2878 */ 2879 error = xfs_btree_split(cur, level, nptr, &key, ncur, stat); 2880 if (error || *stat == 0) 2881 return error; 2882 2883 2884 *index = cur->bc_ptrs[level]; 2885 cur->bc_ops->init_rec_from_key(&key, nrec); 2886 return 0; 2887 } 2888 2889 /* 2890 * Insert one record/level. Return information to the caller 2891 * allowing the next level up to proceed if necessary. 2892 */ 2893 STATIC int 2894 xfs_btree_insrec( 2895 struct xfs_btree_cur *cur, /* btree cursor */ 2896 int level, /* level to insert record at */ 2897 union xfs_btree_ptr *ptrp, /* i/o: block number inserted */ 2898 union xfs_btree_rec *recp, /* i/o: record data inserted */ 2899 struct xfs_btree_cur **curp, /* output: new cursor replacing cur */ 2900 int *stat) /* success/failure */ 2901 { 2902 struct xfs_btree_block *block; /* btree block */ 2903 struct xfs_buf *bp; /* buffer for block */ 2904 union xfs_btree_key key; /* btree key */ 2905 union xfs_btree_ptr nptr; /* new block ptr */ 2906 struct xfs_btree_cur *ncur; /* new btree cursor */ 2907 union xfs_btree_rec nrec; /* new record count */ 2908 int optr; /* old key/record index */ 2909 int ptr; /* key/record index */ 2910 int numrecs;/* number of records */ 2911 int error; /* error return value */ 2912 #ifdef DEBUG 2913 int i; 2914 #endif 2915 2916 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 2917 XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp); 2918 2919 ncur = NULL; 2920 2921 /* 2922 * If we have an external root pointer, and we've made it to the 2923 * root level, allocate a new root block and we're done. 2924 */ 2925 if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && 2926 (level >= cur->bc_nlevels)) { 2927 error = xfs_btree_new_root(cur, stat); 2928 xfs_btree_set_ptr_null(cur, ptrp); 2929 2930 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2931 return error; 2932 } 2933 2934 /* If we're off the left edge, return failure. */ 2935 ptr = cur->bc_ptrs[level]; 2936 if (ptr == 0) { 2937 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 2938 *stat = 0; 2939 return 0; 2940 } 2941 2942 /* Make a key out of the record data to be inserted, and save it. */ 2943 cur->bc_ops->init_key_from_rec(&key, recp); 2944 2945 optr = ptr; 2946 2947 XFS_BTREE_STATS_INC(cur, insrec); 2948 2949 /* Get pointers to the btree buffer and block. */ 2950 block = xfs_btree_get_block(cur, level, &bp); 2951 numrecs = xfs_btree_get_numrecs(block); 2952 2953 #ifdef DEBUG 2954 error = xfs_btree_check_block(cur, block, level, bp); 2955 if (error) 2956 goto error0; 2957 2958 /* Check that the new entry is being inserted in the right place. */ 2959 if (ptr <= numrecs) { 2960 if (level == 0) { 2961 ASSERT(cur->bc_ops->recs_inorder(cur, recp, 2962 xfs_btree_rec_addr(cur, ptr, block))); 2963 } else { 2964 ASSERT(cur->bc_ops->keys_inorder(cur, &key, 2965 xfs_btree_key_addr(cur, ptr, block))); 2966 } 2967 } 2968 #endif 2969 2970 /* 2971 * If the block is full, we can't insert the new entry until we 2972 * make the block un-full. 2973 */ 2974 xfs_btree_set_ptr_null(cur, &nptr); 2975 if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) { 2976 error = xfs_btree_make_block_unfull(cur, level, numrecs, 2977 &optr, &ptr, &nptr, &ncur, &nrec, stat); 2978 if (error || *stat == 0) 2979 goto error0; 2980 } 2981 2982 /* 2983 * The current block may have changed if the block was 2984 * previously full and we have just made space in it. 2985 */ 2986 block = xfs_btree_get_block(cur, level, &bp); 2987 numrecs = xfs_btree_get_numrecs(block); 2988 2989 #ifdef DEBUG 2990 error = xfs_btree_check_block(cur, block, level, bp); 2991 if (error) 2992 return error; 2993 #endif 2994 2995 /* 2996 * At this point we know there's room for our new entry in the block 2997 * we're pointing at. 2998 */ 2999 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1); 3000 3001 if (level > 0) { 3002 /* It's a nonleaf. make a hole in the keys and ptrs */ 3003 union xfs_btree_key *kp; 3004 union xfs_btree_ptr *pp; 3005 3006 kp = xfs_btree_key_addr(cur, ptr, block); 3007 pp = xfs_btree_ptr_addr(cur, ptr, block); 3008 3009 #ifdef DEBUG 3010 for (i = numrecs - ptr; i >= 0; i--) { 3011 error = xfs_btree_check_ptr(cur, pp, i, level); 3012 if (error) 3013 return error; 3014 } 3015 #endif 3016 3017 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1); 3018 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1); 3019 3020 #ifdef DEBUG 3021 error = xfs_btree_check_ptr(cur, ptrp, 0, level); 3022 if (error) 3023 goto error0; 3024 #endif 3025 3026 /* Now put the new data in, bump numrecs and log it. */ 3027 xfs_btree_copy_keys(cur, kp, &key, 1); 3028 xfs_btree_copy_ptrs(cur, pp, ptrp, 1); 3029 numrecs++; 3030 xfs_btree_set_numrecs(block, numrecs); 3031 xfs_btree_log_ptrs(cur, bp, ptr, numrecs); 3032 xfs_btree_log_keys(cur, bp, ptr, numrecs); 3033 #ifdef DEBUG 3034 if (ptr < numrecs) { 3035 ASSERT(cur->bc_ops->keys_inorder(cur, kp, 3036 xfs_btree_key_addr(cur, ptr + 1, block))); 3037 } 3038 #endif 3039 } else { 3040 /* It's a leaf. make a hole in the records */ 3041 union xfs_btree_rec *rp; 3042 3043 rp = xfs_btree_rec_addr(cur, ptr, block); 3044 3045 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1); 3046 3047 /* Now put the new data in, bump numrecs and log it. */ 3048 xfs_btree_copy_recs(cur, rp, recp, 1); 3049 xfs_btree_set_numrecs(block, ++numrecs); 3050 xfs_btree_log_recs(cur, bp, ptr, numrecs); 3051 #ifdef DEBUG 3052 if (ptr < numrecs) { 3053 ASSERT(cur->bc_ops->recs_inorder(cur, rp, 3054 xfs_btree_rec_addr(cur, ptr + 1, block))); 3055 } 3056 #endif 3057 } 3058 3059 /* Log the new number of records in the btree header. */ 3060 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3061 3062 /* If we inserted at the start of a block, update the parents' keys. */ 3063 if (optr == 1) { 3064 error = xfs_btree_updkey(cur, &key, level + 1); 3065 if (error) 3066 goto error0; 3067 } 3068 3069 /* 3070 * If we are tracking the last record in the tree and 3071 * we are at the far right edge of the tree, update it. 3072 */ 3073 if (xfs_btree_is_lastrec(cur, block, level)) { 3074 cur->bc_ops->update_lastrec(cur, block, recp, 3075 ptr, LASTREC_INSREC); 3076 } 3077 3078 /* 3079 * Return the new block number, if any. 3080 * If there is one, give back a record value and a cursor too. 3081 */ 3082 *ptrp = nptr; 3083 if (!xfs_btree_ptr_is_null(cur, &nptr)) { 3084 *recp = nrec; 3085 *curp = ncur; 3086 } 3087 3088 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3089 *stat = 1; 3090 return 0; 3091 3092 error0: 3093 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3094 return error; 3095 } 3096 3097 /* 3098 * Insert the record at the point referenced by cur. 3099 * 3100 * A multi-level split of the tree on insert will invalidate the original 3101 * cursor. All callers of this function should assume that the cursor is 3102 * no longer valid and revalidate it. 3103 */ 3104 int 3105 xfs_btree_insert( 3106 struct xfs_btree_cur *cur, 3107 int *stat) 3108 { 3109 int error; /* error return value */ 3110 int i; /* result value, 0 for failure */ 3111 int level; /* current level number in btree */ 3112 union xfs_btree_ptr nptr; /* new block number (split result) */ 3113 struct xfs_btree_cur *ncur; /* new cursor (split result) */ 3114 struct xfs_btree_cur *pcur; /* previous level's cursor */ 3115 union xfs_btree_rec rec; /* record to insert */ 3116 3117 level = 0; 3118 ncur = NULL; 3119 pcur = cur; 3120 3121 xfs_btree_set_ptr_null(cur, &nptr); 3122 cur->bc_ops->init_rec_from_cur(cur, &rec); 3123 3124 /* 3125 * Loop going up the tree, starting at the leaf level. 3126 * Stop when we don't get a split block, that must mean that 3127 * the insert is finished with this level. 3128 */ 3129 do { 3130 /* 3131 * Insert nrec/nptr into this level of the tree. 3132 * Note if we fail, nptr will be null. 3133 */ 3134 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i); 3135 if (error) { 3136 if (pcur != cur) 3137 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 3138 goto error0; 3139 } 3140 3141 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3142 level++; 3143 3144 /* 3145 * See if the cursor we just used is trash. 3146 * Can't trash the caller's cursor, but otherwise we should 3147 * if ncur is a new cursor or we're about to be done. 3148 */ 3149 if (pcur != cur && 3150 (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { 3151 /* Save the state from the cursor before we trash it */ 3152 if (cur->bc_ops->update_cursor) 3153 cur->bc_ops->update_cursor(pcur, cur); 3154 cur->bc_nlevels = pcur->bc_nlevels; 3155 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 3156 } 3157 /* If we got a new cursor, switch to it. */ 3158 if (ncur) { 3159 pcur = ncur; 3160 ncur = NULL; 3161 } 3162 } while (!xfs_btree_ptr_is_null(cur, &nptr)); 3163 3164 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3165 *stat = i; 3166 return 0; 3167 error0: 3168 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3169 return error; 3170 } 3171 3172 /* 3173 * Try to merge a non-leaf block back into the inode root. 3174 * 3175 * Note: the killroot names comes from the fact that we're effectively 3176 * killing the old root block. But because we can't just delete the 3177 * inode we have to copy the single block it was pointing to into the 3178 * inode. 3179 */ 3180 STATIC int 3181 xfs_btree_kill_iroot( 3182 struct xfs_btree_cur *cur) 3183 { 3184 int whichfork = cur->bc_private.b.whichfork; 3185 struct xfs_inode *ip = cur->bc_private.b.ip; 3186 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); 3187 struct xfs_btree_block *block; 3188 struct xfs_btree_block *cblock; 3189 union xfs_btree_key *kp; 3190 union xfs_btree_key *ckp; 3191 union xfs_btree_ptr *pp; 3192 union xfs_btree_ptr *cpp; 3193 struct xfs_buf *cbp; 3194 int level; 3195 int index; 3196 int numrecs; 3197 #ifdef DEBUG 3198 union xfs_btree_ptr ptr; 3199 int i; 3200 #endif 3201 3202 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3203 3204 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 3205 ASSERT(cur->bc_nlevels > 1); 3206 3207 /* 3208 * Don't deal with the root block needs to be a leaf case. 3209 * We're just going to turn the thing back into extents anyway. 3210 */ 3211 level = cur->bc_nlevels - 1; 3212 if (level == 1) 3213 goto out0; 3214 3215 /* 3216 * Give up if the root has multiple children. 3217 */ 3218 block = xfs_btree_get_iroot(cur); 3219 if (xfs_btree_get_numrecs(block) != 1) 3220 goto out0; 3221 3222 cblock = xfs_btree_get_block(cur, level - 1, &cbp); 3223 numrecs = xfs_btree_get_numrecs(cblock); 3224 3225 /* 3226 * Only do this if the next level will fit. 3227 * Then the data must be copied up to the inode, 3228 * instead of freeing the root you free the next level. 3229 */ 3230 if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level)) 3231 goto out0; 3232 3233 XFS_BTREE_STATS_INC(cur, killroot); 3234 3235 #ifdef DEBUG 3236 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB); 3237 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3238 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); 3239 ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); 3240 #endif 3241 3242 index = numrecs - cur->bc_ops->get_maxrecs(cur, level); 3243 if (index) { 3244 xfs_iroot_realloc(cur->bc_private.b.ip, index, 3245 cur->bc_private.b.whichfork); 3246 block = ifp->if_broot; 3247 } 3248 3249 be16_add_cpu(&block->bb_numrecs, index); 3250 ASSERT(block->bb_numrecs == cblock->bb_numrecs); 3251 3252 kp = xfs_btree_key_addr(cur, 1, block); 3253 ckp = xfs_btree_key_addr(cur, 1, cblock); 3254 xfs_btree_copy_keys(cur, kp, ckp, numrecs); 3255 3256 pp = xfs_btree_ptr_addr(cur, 1, block); 3257 cpp = xfs_btree_ptr_addr(cur, 1, cblock); 3258 #ifdef DEBUG 3259 for (i = 0; i < numrecs; i++) { 3260 int error; 3261 3262 error = xfs_btree_check_ptr(cur, cpp, i, level - 1); 3263 if (error) { 3264 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3265 return error; 3266 } 3267 } 3268 #endif 3269 xfs_btree_copy_ptrs(cur, pp, cpp, numrecs); 3270 3271 cur->bc_ops->free_block(cur, cbp); 3272 XFS_BTREE_STATS_INC(cur, free); 3273 3274 cur->bc_bufs[level - 1] = NULL; 3275 be16_add_cpu(&block->bb_level, -1); 3276 xfs_trans_log_inode(cur->bc_tp, ip, 3277 XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_private.b.whichfork)); 3278 cur->bc_nlevels--; 3279 out0: 3280 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3281 return 0; 3282 } 3283 3284 /* 3285 * Kill the current root node, and replace it with it's only child node. 3286 */ 3287 STATIC int 3288 xfs_btree_kill_root( 3289 struct xfs_btree_cur *cur, 3290 struct xfs_buf *bp, 3291 int level, 3292 union xfs_btree_ptr *newroot) 3293 { 3294 int error; 3295 3296 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3297 XFS_BTREE_STATS_INC(cur, killroot); 3298 3299 /* 3300 * Update the root pointer, decreasing the level by 1 and then 3301 * free the old root. 3302 */ 3303 cur->bc_ops->set_root(cur, newroot, -1); 3304 3305 error = cur->bc_ops->free_block(cur, bp); 3306 if (error) { 3307 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3308 return error; 3309 } 3310 3311 XFS_BTREE_STATS_INC(cur, free); 3312 3313 cur->bc_bufs[level] = NULL; 3314 cur->bc_ra[level] = 0; 3315 cur->bc_nlevels--; 3316 3317 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3318 return 0; 3319 } 3320 3321 STATIC int 3322 xfs_btree_dec_cursor( 3323 struct xfs_btree_cur *cur, 3324 int level, 3325 int *stat) 3326 { 3327 int error; 3328 int i; 3329 3330 if (level > 0) { 3331 error = xfs_btree_decrement(cur, level, &i); 3332 if (error) 3333 return error; 3334 } 3335 3336 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3337 *stat = 1; 3338 return 0; 3339 } 3340 3341 /* 3342 * Single level of the btree record deletion routine. 3343 * Delete record pointed to by cur/level. 3344 * Remove the record from its block then rebalance the tree. 3345 * Return 0 for error, 1 for done, 2 to go on to the next level. 3346 */ 3347 STATIC int /* error */ 3348 xfs_btree_delrec( 3349 struct xfs_btree_cur *cur, /* btree cursor */ 3350 int level, /* level removing record from */ 3351 int *stat) /* fail/done/go-on */ 3352 { 3353 struct xfs_btree_block *block; /* btree block */ 3354 union xfs_btree_ptr cptr; /* current block ptr */ 3355 struct xfs_buf *bp; /* buffer for block */ 3356 int error; /* error return value */ 3357 int i; /* loop counter */ 3358 union xfs_btree_key key; /* storage for keyp */ 3359 union xfs_btree_key *keyp = &key; /* passed to the next level */ 3360 union xfs_btree_ptr lptr; /* left sibling block ptr */ 3361 struct xfs_buf *lbp; /* left buffer pointer */ 3362 struct xfs_btree_block *left; /* left btree block */ 3363 int lrecs = 0; /* left record count */ 3364 int ptr; /* key/record index */ 3365 union xfs_btree_ptr rptr; /* right sibling block ptr */ 3366 struct xfs_buf *rbp; /* right buffer pointer */ 3367 struct xfs_btree_block *right; /* right btree block */ 3368 struct xfs_btree_block *rrblock; /* right-right btree block */ 3369 struct xfs_buf *rrbp; /* right-right buffer pointer */ 3370 int rrecs = 0; /* right record count */ 3371 struct xfs_btree_cur *tcur; /* temporary btree cursor */ 3372 int numrecs; /* temporary numrec count */ 3373 3374 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3375 XFS_BTREE_TRACE_ARGI(cur, level); 3376 3377 tcur = NULL; 3378 3379 /* Get the index of the entry being deleted, check for nothing there. */ 3380 ptr = cur->bc_ptrs[level]; 3381 if (ptr == 0) { 3382 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3383 *stat = 0; 3384 return 0; 3385 } 3386 3387 /* Get the buffer & block containing the record or key/ptr. */ 3388 block = xfs_btree_get_block(cur, level, &bp); 3389 numrecs = xfs_btree_get_numrecs(block); 3390 3391 #ifdef DEBUG 3392 error = xfs_btree_check_block(cur, block, level, bp); 3393 if (error) 3394 goto error0; 3395 #endif 3396 3397 /* Fail if we're off the end of the block. */ 3398 if (ptr > numrecs) { 3399 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3400 *stat = 0; 3401 return 0; 3402 } 3403 3404 XFS_BTREE_STATS_INC(cur, delrec); 3405 XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr); 3406 3407 /* Excise the entries being deleted. */ 3408 if (level > 0) { 3409 /* It's a nonleaf. operate on keys and ptrs */ 3410 union xfs_btree_key *lkp; 3411 union xfs_btree_ptr *lpp; 3412 3413 lkp = xfs_btree_key_addr(cur, ptr + 1, block); 3414 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block); 3415 3416 #ifdef DEBUG 3417 for (i = 0; i < numrecs - ptr; i++) { 3418 error = xfs_btree_check_ptr(cur, lpp, i, level); 3419 if (error) 3420 goto error0; 3421 } 3422 #endif 3423 3424 if (ptr < numrecs) { 3425 xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr); 3426 xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr); 3427 xfs_btree_log_keys(cur, bp, ptr, numrecs - 1); 3428 xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1); 3429 } 3430 3431 /* 3432 * If it's the first record in the block, we'll need to pass a 3433 * key up to the next level (updkey). 3434 */ 3435 if (ptr == 1) 3436 keyp = xfs_btree_key_addr(cur, 1, block); 3437 } else { 3438 /* It's a leaf. operate on records */ 3439 if (ptr < numrecs) { 3440 xfs_btree_shift_recs(cur, 3441 xfs_btree_rec_addr(cur, ptr + 1, block), 3442 -1, numrecs - ptr); 3443 xfs_btree_log_recs(cur, bp, ptr, numrecs - 1); 3444 } 3445 3446 /* 3447 * If it's the first record in the block, we'll need a key 3448 * structure to pass up to the next level (updkey). 3449 */ 3450 if (ptr == 1) { 3451 cur->bc_ops->init_key_from_rec(&key, 3452 xfs_btree_rec_addr(cur, 1, block)); 3453 keyp = &key; 3454 } 3455 } 3456 3457 /* 3458 * Decrement and log the number of entries in the block. 3459 */ 3460 xfs_btree_set_numrecs(block, --numrecs); 3461 xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS); 3462 3463 /* 3464 * If we are tracking the last record in the tree and 3465 * we are at the far right edge of the tree, update it. 3466 */ 3467 if (xfs_btree_is_lastrec(cur, block, level)) { 3468 cur->bc_ops->update_lastrec(cur, block, NULL, 3469 ptr, LASTREC_DELREC); 3470 } 3471 3472 /* 3473 * We're at the root level. First, shrink the root block in-memory. 3474 * Try to get rid of the next level down. If we can't then there's 3475 * nothing left to do. 3476 */ 3477 if (level == cur->bc_nlevels - 1) { 3478 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3479 xfs_iroot_realloc(cur->bc_private.b.ip, -1, 3480 cur->bc_private.b.whichfork); 3481 3482 error = xfs_btree_kill_iroot(cur); 3483 if (error) 3484 goto error0; 3485 3486 error = xfs_btree_dec_cursor(cur, level, stat); 3487 if (error) 3488 goto error0; 3489 *stat = 1; 3490 return 0; 3491 } 3492 3493 /* 3494 * If this is the root level, and there's only one entry left, 3495 * and it's NOT the leaf level, then we can get rid of this 3496 * level. 3497 */ 3498 if (numrecs == 1 && level > 0) { 3499 union xfs_btree_ptr *pp; 3500 /* 3501 * pp is still set to the first pointer in the block. 3502 * Make it the new root of the btree. 3503 */ 3504 pp = xfs_btree_ptr_addr(cur, 1, block); 3505 error = xfs_btree_kill_root(cur, bp, level, pp); 3506 if (error) 3507 goto error0; 3508 } else if (level > 0) { 3509 error = xfs_btree_dec_cursor(cur, level, stat); 3510 if (error) 3511 goto error0; 3512 } 3513 *stat = 1; 3514 return 0; 3515 } 3516 3517 /* 3518 * If we deleted the leftmost entry in the block, update the 3519 * key values above us in the tree. 3520 */ 3521 if (ptr == 1) { 3522 error = xfs_btree_updkey(cur, keyp, level + 1); 3523 if (error) 3524 goto error0; 3525 } 3526 3527 /* 3528 * If the number of records remaining in the block is at least 3529 * the minimum, we're done. 3530 */ 3531 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) { 3532 error = xfs_btree_dec_cursor(cur, level, stat); 3533 if (error) 3534 goto error0; 3535 return 0; 3536 } 3537 3538 /* 3539 * Otherwise, we have to move some records around to keep the 3540 * tree balanced. Look at the left and right sibling blocks to 3541 * see if we can re-balance by moving only one record. 3542 */ 3543 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 3544 xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); 3545 3546 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { 3547 /* 3548 * One child of root, need to get a chance to copy its contents 3549 * into the root and delete it. Can't go up to next level, 3550 * there's nothing to delete there. 3551 */ 3552 if (xfs_btree_ptr_is_null(cur, &rptr) && 3553 xfs_btree_ptr_is_null(cur, &lptr) && 3554 level == cur->bc_nlevels - 2) { 3555 error = xfs_btree_kill_iroot(cur); 3556 if (!error) 3557 error = xfs_btree_dec_cursor(cur, level, stat); 3558 if (error) 3559 goto error0; 3560 return 0; 3561 } 3562 } 3563 3564 ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) || 3565 !xfs_btree_ptr_is_null(cur, &lptr)); 3566 3567 /* 3568 * Duplicate the cursor so our btree manipulations here won't 3569 * disrupt the next level up. 3570 */ 3571 error = xfs_btree_dup_cursor(cur, &tcur); 3572 if (error) 3573 goto error0; 3574 3575 /* 3576 * If there's a right sibling, see if it's ok to shift an entry 3577 * out of it. 3578 */ 3579 if (!xfs_btree_ptr_is_null(cur, &rptr)) { 3580 /* 3581 * Move the temp cursor to the last entry in the next block. 3582 * Actually any entry but the first would suffice. 3583 */ 3584 i = xfs_btree_lastrec(tcur, level); 3585 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3586 3587 error = xfs_btree_increment(tcur, level, &i); 3588 if (error) 3589 goto error0; 3590 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3591 3592 i = xfs_btree_lastrec(tcur, level); 3593 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3594 3595 /* Grab a pointer to the block. */ 3596 right = xfs_btree_get_block(tcur, level, &rbp); 3597 #ifdef DEBUG 3598 error = xfs_btree_check_block(tcur, right, level, rbp); 3599 if (error) 3600 goto error0; 3601 #endif 3602 /* Grab the current block number, for future use. */ 3603 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB); 3604 3605 /* 3606 * If right block is full enough so that removing one entry 3607 * won't make it too empty, and left-shifting an entry out 3608 * of right to us works, we're done. 3609 */ 3610 if (xfs_btree_get_numrecs(right) - 1 >= 3611 cur->bc_ops->get_minrecs(tcur, level)) { 3612 error = xfs_btree_lshift(tcur, level, &i); 3613 if (error) 3614 goto error0; 3615 if (i) { 3616 ASSERT(xfs_btree_get_numrecs(block) >= 3617 cur->bc_ops->get_minrecs(tcur, level)); 3618 3619 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3620 tcur = NULL; 3621 3622 error = xfs_btree_dec_cursor(cur, level, stat); 3623 if (error) 3624 goto error0; 3625 return 0; 3626 } 3627 } 3628 3629 /* 3630 * Otherwise, grab the number of records in right for 3631 * future reference, and fix up the temp cursor to point 3632 * to our block again (last record). 3633 */ 3634 rrecs = xfs_btree_get_numrecs(right); 3635 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3636 i = xfs_btree_firstrec(tcur, level); 3637 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3638 3639 error = xfs_btree_decrement(tcur, level, &i); 3640 if (error) 3641 goto error0; 3642 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3643 } 3644 } 3645 3646 /* 3647 * If there's a left sibling, see if it's ok to shift an entry 3648 * out of it. 3649 */ 3650 if (!xfs_btree_ptr_is_null(cur, &lptr)) { 3651 /* 3652 * Move the temp cursor to the first entry in the 3653 * previous block. 3654 */ 3655 i = xfs_btree_firstrec(tcur, level); 3656 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3657 3658 error = xfs_btree_decrement(tcur, level, &i); 3659 if (error) 3660 goto error0; 3661 i = xfs_btree_firstrec(tcur, level); 3662 XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0); 3663 3664 /* Grab a pointer to the block. */ 3665 left = xfs_btree_get_block(tcur, level, &lbp); 3666 #ifdef DEBUG 3667 error = xfs_btree_check_block(cur, left, level, lbp); 3668 if (error) 3669 goto error0; 3670 #endif 3671 /* Grab the current block number, for future use. */ 3672 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB); 3673 3674 /* 3675 * If left block is full enough so that removing one entry 3676 * won't make it too empty, and right-shifting an entry out 3677 * of left to us works, we're done. 3678 */ 3679 if (xfs_btree_get_numrecs(left) - 1 >= 3680 cur->bc_ops->get_minrecs(tcur, level)) { 3681 error = xfs_btree_rshift(tcur, level, &i); 3682 if (error) 3683 goto error0; 3684 if (i) { 3685 ASSERT(xfs_btree_get_numrecs(block) >= 3686 cur->bc_ops->get_minrecs(tcur, level)); 3687 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3688 tcur = NULL; 3689 if (level == 0) 3690 cur->bc_ptrs[0]++; 3691 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3692 *stat = 1; 3693 return 0; 3694 } 3695 } 3696 3697 /* 3698 * Otherwise, grab the number of records in right for 3699 * future reference. 3700 */ 3701 lrecs = xfs_btree_get_numrecs(left); 3702 } 3703 3704 /* Delete the temp cursor, we're done with it. */ 3705 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR); 3706 tcur = NULL; 3707 3708 /* If here, we need to do a join to keep the tree balanced. */ 3709 ASSERT(!xfs_btree_ptr_is_null(cur, &cptr)); 3710 3711 if (!xfs_btree_ptr_is_null(cur, &lptr) && 3712 lrecs + xfs_btree_get_numrecs(block) <= 3713 cur->bc_ops->get_maxrecs(cur, level)) { 3714 /* 3715 * Set "right" to be the starting block, 3716 * "left" to be the left neighbor. 3717 */ 3718 rptr = cptr; 3719 right = block; 3720 rbp = bp; 3721 error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); 3722 if (error) 3723 goto error0; 3724 3725 /* 3726 * If that won't work, see if we can join with the right neighbor block. 3727 */ 3728 } else if (!xfs_btree_ptr_is_null(cur, &rptr) && 3729 rrecs + xfs_btree_get_numrecs(block) <= 3730 cur->bc_ops->get_maxrecs(cur, level)) { 3731 /* 3732 * Set "left" to be the starting block, 3733 * "right" to be the right neighbor. 3734 */ 3735 lptr = cptr; 3736 left = block; 3737 lbp = bp; 3738 error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); 3739 if (error) 3740 goto error0; 3741 3742 /* 3743 * Otherwise, we can't fix the imbalance. 3744 * Just return. This is probably a logic error, but it's not fatal. 3745 */ 3746 } else { 3747 error = xfs_btree_dec_cursor(cur, level, stat); 3748 if (error) 3749 goto error0; 3750 return 0; 3751 } 3752 3753 rrecs = xfs_btree_get_numrecs(right); 3754 lrecs = xfs_btree_get_numrecs(left); 3755 3756 /* 3757 * We're now going to join "left" and "right" by moving all the stuff 3758 * in "right" to "left" and deleting "right". 3759 */ 3760 XFS_BTREE_STATS_ADD(cur, moves, rrecs); 3761 if (level > 0) { 3762 /* It's a non-leaf. Move keys and pointers. */ 3763 union xfs_btree_key *lkp; /* left btree key */ 3764 union xfs_btree_ptr *lpp; /* left address pointer */ 3765 union xfs_btree_key *rkp; /* right btree key */ 3766 union xfs_btree_ptr *rpp; /* right address pointer */ 3767 3768 lkp = xfs_btree_key_addr(cur, lrecs + 1, left); 3769 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left); 3770 rkp = xfs_btree_key_addr(cur, 1, right); 3771 rpp = xfs_btree_ptr_addr(cur, 1, right); 3772 #ifdef DEBUG 3773 for (i = 1; i < rrecs; i++) { 3774 error = xfs_btree_check_ptr(cur, rpp, i, level); 3775 if (error) 3776 goto error0; 3777 } 3778 #endif 3779 xfs_btree_copy_keys(cur, lkp, rkp, rrecs); 3780 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs); 3781 3782 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs); 3783 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs); 3784 } else { 3785 /* It's a leaf. Move records. */ 3786 union xfs_btree_rec *lrp; /* left record pointer */ 3787 union xfs_btree_rec *rrp; /* right record pointer */ 3788 3789 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left); 3790 rrp = xfs_btree_rec_addr(cur, 1, right); 3791 3792 xfs_btree_copy_recs(cur, lrp, rrp, rrecs); 3793 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs); 3794 } 3795 3796 XFS_BTREE_STATS_INC(cur, join); 3797 3798 /* 3799 * Fix up the number of records and right block pointer in the 3800 * surviving block, and log it. 3801 */ 3802 xfs_btree_set_numrecs(left, lrecs + rrecs); 3803 xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), 3804 xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 3805 xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); 3806 3807 /* If there is a right sibling, point it to the remaining block. */ 3808 xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); 3809 if (!xfs_btree_ptr_is_null(cur, &cptr)) { 3810 error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); 3811 if (error) 3812 goto error0; 3813 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); 3814 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB); 3815 } 3816 3817 /* Free the deleted block. */ 3818 error = cur->bc_ops->free_block(cur, rbp); 3819 if (error) 3820 goto error0; 3821 XFS_BTREE_STATS_INC(cur, free); 3822 3823 /* 3824 * If we joined with the left neighbor, set the buffer in the 3825 * cursor to the left block, and fix up the index. 3826 */ 3827 if (bp != lbp) { 3828 cur->bc_bufs[level] = lbp; 3829 cur->bc_ptrs[level] += lrecs; 3830 cur->bc_ra[level] = 0; 3831 } 3832 /* 3833 * If we joined with the right neighbor and there's a level above 3834 * us, increment the cursor at that level. 3835 */ 3836 else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || 3837 (level + 1 < cur->bc_nlevels)) { 3838 error = xfs_btree_increment(cur, level + 1, &i); 3839 if (error) 3840 goto error0; 3841 } 3842 3843 /* 3844 * Readjust the ptr at this level if it's not a leaf, since it's 3845 * still pointing at the deletion point, which makes the cursor 3846 * inconsistent. If this makes the ptr 0, the caller fixes it up. 3847 * We can't use decrement because it would change the next level up. 3848 */ 3849 if (level > 0) 3850 cur->bc_ptrs[level]--; 3851 3852 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3853 /* Return value means the next level up has something to do. */ 3854 *stat = 2; 3855 return 0; 3856 3857 error0: 3858 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3859 if (tcur) 3860 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR); 3861 return error; 3862 } 3863 3864 /* 3865 * Delete the record pointed to by cur. 3866 * The cursor refers to the place where the record was (could be inserted) 3867 * when the operation returns. 3868 */ 3869 int /* error */ 3870 xfs_btree_delete( 3871 struct xfs_btree_cur *cur, 3872 int *stat) /* success/failure */ 3873 { 3874 int error; /* error return value */ 3875 int level; 3876 int i; 3877 3878 XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); 3879 3880 /* 3881 * Go up the tree, starting at leaf level. 3882 * 3883 * If 2 is returned then a join was done; go to the next level. 3884 * Otherwise we are done. 3885 */ 3886 for (level = 0, i = 2; i == 2; level++) { 3887 error = xfs_btree_delrec(cur, level, &i); 3888 if (error) 3889 goto error0; 3890 } 3891 3892 if (i == 0) { 3893 for (level = 1; level < cur->bc_nlevels; level++) { 3894 if (cur->bc_ptrs[level] == 0) { 3895 error = xfs_btree_decrement(cur, level, &i); 3896 if (error) 3897 goto error0; 3898 break; 3899 } 3900 } 3901 } 3902 3903 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); 3904 *stat = i; 3905 return 0; 3906 error0: 3907 XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); 3908 return error; 3909 } 3910 3911 /* 3912 * Get the data from the pointed-to record. 3913 */ 3914 int /* error */ 3915 xfs_btree_get_rec( 3916 struct xfs_btree_cur *cur, /* btree cursor */ 3917 union xfs_btree_rec **recp, /* output: btree record */ 3918 int *stat) /* output: success/failure */ 3919 { 3920 struct xfs_btree_block *block; /* btree block */ 3921 struct xfs_buf *bp; /* buffer pointer */ 3922 int ptr; /* record number */ 3923 #ifdef DEBUG 3924 int error; /* error return value */ 3925 #endif 3926 3927 ptr = cur->bc_ptrs[0]; 3928 block = xfs_btree_get_block(cur, 0, &bp); 3929 3930 #ifdef DEBUG 3931 error = xfs_btree_check_block(cur, block, 0, bp); 3932 if (error) 3933 return error; 3934 #endif 3935 3936 /* 3937 * Off the right end or left end, return failure. 3938 */ 3939 if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) { 3940 *stat = 0; 3941 return 0; 3942 } 3943 3944 /* 3945 * Point to the record and extract its data. 3946 */ 3947 *recp = xfs_btree_rec_addr(cur, ptr, block); 3948 *stat = 1; 3949 return 0; 3950 } 3951 3952 /* 3953 * Change the owner of a btree. 3954 * 3955 * The mechanism we use here is ordered buffer logging. Because we don't know 3956 * how many buffers were are going to need to modify, we don't really want to 3957 * have to make transaction reservations for the worst case of every buffer in a 3958 * full size btree as that may be more space that we can fit in the log.... 3959 * 3960 * We do the btree walk in the most optimal manner possible - we have sibling 3961 * pointers so we can just walk all the blocks on each level from left to right 3962 * in a single pass, and then move to the next level and do the same. We can 3963 * also do readahead on the sibling pointers to get IO moving more quickly, 3964 * though for slow disks this is unlikely to make much difference to performance 3965 * as the amount of CPU work we have to do before moving to the next block is 3966 * relatively small. 3967 * 3968 * For each btree block that we load, modify the owner appropriately, set the 3969 * buffer as an ordered buffer and log it appropriately. We need to ensure that 3970 * we mark the region we change dirty so that if the buffer is relogged in 3971 * a subsequent transaction the changes we make here as an ordered buffer are 3972 * correctly relogged in that transaction. If we are in recovery context, then 3973 * just queue the modified buffer as delayed write buffer so the transaction 3974 * recovery completion writes the changes to disk. 3975 */ 3976 static int 3977 xfs_btree_block_change_owner( 3978 struct xfs_btree_cur *cur, 3979 int level, 3980 __uint64_t new_owner, 3981 struct list_head *buffer_list) 3982 { 3983 struct xfs_btree_block *block; 3984 struct xfs_buf *bp; 3985 union xfs_btree_ptr rptr; 3986 3987 /* do right sibling readahead */ 3988 xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); 3989 3990 /* modify the owner */ 3991 block = xfs_btree_get_block(cur, level, &bp); 3992 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 3993 block->bb_u.l.bb_owner = cpu_to_be64(new_owner); 3994 else 3995 block->bb_u.s.bb_owner = cpu_to_be32(new_owner); 3996 3997 /* 3998 * If the block is a root block hosted in an inode, we might not have a 3999 * buffer pointer here and we shouldn't attempt to log the change as the 4000 * information is already held in the inode and discarded when the root 4001 * block is formatted into the on-disk inode fork. We still change it, 4002 * though, so everything is consistent in memory. 4003 */ 4004 if (bp) { 4005 if (cur->bc_tp) { 4006 xfs_trans_ordered_buf(cur->bc_tp, bp); 4007 xfs_btree_log_block(cur, bp, XFS_BB_OWNER); 4008 } else { 4009 xfs_buf_delwri_queue(bp, buffer_list); 4010 } 4011 } else { 4012 ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); 4013 ASSERT(level == cur->bc_nlevels - 1); 4014 } 4015 4016 /* now read rh sibling block for next iteration */ 4017 xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); 4018 if (xfs_btree_ptr_is_null(cur, &rptr)) 4019 return -ENOENT; 4020 4021 return xfs_btree_lookup_get_block(cur, level, &rptr, &block); 4022 } 4023 4024 int 4025 xfs_btree_change_owner( 4026 struct xfs_btree_cur *cur, 4027 __uint64_t new_owner, 4028 struct list_head *buffer_list) 4029 { 4030 union xfs_btree_ptr lptr; 4031 int level; 4032 struct xfs_btree_block *block = NULL; 4033 int error = 0; 4034 4035 cur->bc_ops->init_ptr_from_cur(cur, &lptr); 4036 4037 /* for each level */ 4038 for (level = cur->bc_nlevels - 1; level >= 0; level--) { 4039 /* grab the left hand block */ 4040 error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); 4041 if (error) 4042 return error; 4043 4044 /* readahead the left most block for the next level down */ 4045 if (level > 0) { 4046 union xfs_btree_ptr *ptr; 4047 4048 ptr = xfs_btree_ptr_addr(cur, 1, block); 4049 xfs_btree_readahead_ptr(cur, ptr, 1); 4050 4051 /* save for the next iteration of the loop */ 4052 lptr = *ptr; 4053 } 4054 4055 /* for each buffer in the level */ 4056 do { 4057 error = xfs_btree_block_change_owner(cur, level, 4058 new_owner, 4059 buffer_list); 4060 } while (!error); 4061 4062 if (error != -ENOENT) 4063 return error; 4064 } 4065 4066 return 0; 4067 } 4068