1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * Copyright (c) 2013 Red Hat, Inc. 5 * All Rights Reserved. 6 */ 7 #include "xfs.h" 8 #include "xfs_fs.h" 9 #include "xfs_shared.h" 10 #include "xfs_format.h" 11 #include "xfs_log_format.h" 12 #include "xfs_trans_resv.h" 13 #include "xfs_bit.h" 14 #include "xfs_mount.h" 15 #include "xfs_da_format.h" 16 #include "xfs_da_btree.h" 17 #include "xfs_dir2.h" 18 #include "xfs_dir2_priv.h" 19 #include "xfs_inode.h" 20 #include "xfs_trans.h" 21 #include "xfs_inode_item.h" 22 #include "xfs_alloc.h" 23 #include "xfs_bmap.h" 24 #include "xfs_attr.h" 25 #include "xfs_attr_leaf.h" 26 #include "xfs_error.h" 27 #include "xfs_trace.h" 28 #include "xfs_cksum.h" 29 #include "xfs_buf_item.h" 30 #include "xfs_log.h" 31 32 /* 33 * xfs_da_btree.c 34 * 35 * Routines to implement directories as Btrees of hashed names. 36 */ 37 38 /*======================================================================== 39 * Function prototypes for the kernel. 40 *========================================================================*/ 41 42 /* 43 * Routines used for growing the Btree. 44 */ 45 STATIC int xfs_da3_root_split(xfs_da_state_t *state, 46 xfs_da_state_blk_t *existing_root, 47 xfs_da_state_blk_t *new_child); 48 STATIC int xfs_da3_node_split(xfs_da_state_t *state, 49 xfs_da_state_blk_t *existing_blk, 50 xfs_da_state_blk_t *split_blk, 51 xfs_da_state_blk_t *blk_to_add, 52 int treelevel, 53 int *result); 54 STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state, 55 xfs_da_state_blk_t *node_blk_1, 56 xfs_da_state_blk_t *node_blk_2); 57 STATIC void xfs_da3_node_add(xfs_da_state_t *state, 58 xfs_da_state_blk_t *old_node_blk, 59 xfs_da_state_blk_t *new_node_blk); 60 61 /* 62 * Routines used for shrinking the Btree. 63 */ 64 STATIC int xfs_da3_root_join(xfs_da_state_t *state, 65 xfs_da_state_blk_t *root_blk); 66 STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); 67 STATIC void xfs_da3_node_remove(xfs_da_state_t *state, 68 xfs_da_state_blk_t *drop_blk); 69 STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state, 70 xfs_da_state_blk_t *src_node_blk, 71 xfs_da_state_blk_t *dst_node_blk); 72 73 /* 74 * Utility routines. 75 */ 76 STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, 77 xfs_da_state_blk_t *drop_blk, 78 xfs_da_state_blk_t *save_blk); 79 80 81 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ 82 83 /* 84 * Allocate a dir-state structure. 85 * We don't put them on the stack since they're large. 86 */ 87 xfs_da_state_t * 88 xfs_da_state_alloc(void) 89 { 90 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); 91 } 92 93 /* 94 * Kill the altpath contents of a da-state structure. 95 */ 96 STATIC void 97 xfs_da_state_kill_altpath(xfs_da_state_t *state) 98 { 99 int i; 100 101 for (i = 0; i < state->altpath.active; i++) 102 state->altpath.blk[i].bp = NULL; 103 state->altpath.active = 0; 104 } 105 106 /* 107 * Free a da-state structure. 108 */ 109 void 110 xfs_da_state_free(xfs_da_state_t *state) 111 { 112 xfs_da_state_kill_altpath(state); 113 #ifdef DEBUG 114 memset((char *)state, 0, sizeof(*state)); 115 #endif /* DEBUG */ 116 kmem_zone_free(xfs_da_state_zone, state); 117 } 118 119 /* 120 * Verify an xfs_da3_blkinfo structure. Note that the da3 fields are only 121 * accessible on v5 filesystems. This header format is common across da node, 122 * attr leaf and dir leaf blocks. 123 */ 124 xfs_failaddr_t 125 xfs_da3_blkinfo_verify( 126 struct xfs_buf *bp, 127 struct xfs_da3_blkinfo *hdr3) 128 { 129 struct xfs_mount *mp = bp->b_target->bt_mount; 130 struct xfs_da_blkinfo *hdr = &hdr3->hdr; 131 132 if (!xfs_verify_magic16(bp, hdr->magic)) 133 return __this_address; 134 135 if (xfs_sb_version_hascrc(&mp->m_sb)) { 136 if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid)) 137 return __this_address; 138 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 139 return __this_address; 140 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn))) 141 return __this_address; 142 } 143 144 return NULL; 145 } 146 147 static xfs_failaddr_t 148 xfs_da3_node_verify( 149 struct xfs_buf *bp) 150 { 151 struct xfs_mount *mp = bp->b_target->bt_mount; 152 struct xfs_da_intnode *hdr = bp->b_addr; 153 struct xfs_da3_icnode_hdr ichdr; 154 const struct xfs_dir_ops *ops; 155 xfs_failaddr_t fa; 156 157 ops = xfs_dir_get_ops(mp, NULL); 158 159 ops->node_hdr_from_disk(&ichdr, hdr); 160 161 fa = xfs_da3_blkinfo_verify(bp, bp->b_addr); 162 if (fa) 163 return fa; 164 165 if (ichdr.level == 0) 166 return __this_address; 167 if (ichdr.level > XFS_DA_NODE_MAXDEPTH) 168 return __this_address; 169 if (ichdr.count == 0) 170 return __this_address; 171 172 /* 173 * we don't know if the node is for and attribute or directory tree, 174 * so only fail if the count is outside both bounds 175 */ 176 if (ichdr.count > mp->m_dir_geo->node_ents && 177 ichdr.count > mp->m_attr_geo->node_ents) 178 return __this_address; 179 180 /* XXX: hash order check? */ 181 182 return NULL; 183 } 184 185 static void 186 xfs_da3_node_write_verify( 187 struct xfs_buf *bp) 188 { 189 struct xfs_mount *mp = bp->b_target->bt_mount; 190 struct xfs_buf_log_item *bip = bp->b_log_item; 191 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 192 xfs_failaddr_t fa; 193 194 fa = xfs_da3_node_verify(bp); 195 if (fa) { 196 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 197 return; 198 } 199 200 if (!xfs_sb_version_hascrc(&mp->m_sb)) 201 return; 202 203 if (bip) 204 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 205 206 xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); 207 } 208 209 /* 210 * leaf/node format detection on trees is sketchy, so a node read can be done on 211 * leaf level blocks when detection identifies the tree as a node format tree 212 * incorrectly. In this case, we need to swap the verifier to match the correct 213 * format of the block being read. 214 */ 215 static void 216 xfs_da3_node_read_verify( 217 struct xfs_buf *bp) 218 { 219 struct xfs_da_blkinfo *info = bp->b_addr; 220 xfs_failaddr_t fa; 221 222 switch (be16_to_cpu(info->magic)) { 223 case XFS_DA3_NODE_MAGIC: 224 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { 225 xfs_verifier_error(bp, -EFSBADCRC, 226 __this_address); 227 break; 228 } 229 /* fall through */ 230 case XFS_DA_NODE_MAGIC: 231 fa = xfs_da3_node_verify(bp); 232 if (fa) 233 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 234 return; 235 case XFS_ATTR_LEAF_MAGIC: 236 case XFS_ATTR3_LEAF_MAGIC: 237 bp->b_ops = &xfs_attr3_leaf_buf_ops; 238 bp->b_ops->verify_read(bp); 239 return; 240 case XFS_DIR2_LEAFN_MAGIC: 241 case XFS_DIR3_LEAFN_MAGIC: 242 bp->b_ops = &xfs_dir3_leafn_buf_ops; 243 bp->b_ops->verify_read(bp); 244 return; 245 default: 246 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 247 break; 248 } 249 } 250 251 /* Verify the structure of a da3 block. */ 252 static xfs_failaddr_t 253 xfs_da3_node_verify_struct( 254 struct xfs_buf *bp) 255 { 256 struct xfs_da_blkinfo *info = bp->b_addr; 257 258 switch (be16_to_cpu(info->magic)) { 259 case XFS_DA3_NODE_MAGIC: 260 case XFS_DA_NODE_MAGIC: 261 return xfs_da3_node_verify(bp); 262 case XFS_ATTR_LEAF_MAGIC: 263 case XFS_ATTR3_LEAF_MAGIC: 264 bp->b_ops = &xfs_attr3_leaf_buf_ops; 265 return bp->b_ops->verify_struct(bp); 266 case XFS_DIR2_LEAFN_MAGIC: 267 case XFS_DIR3_LEAFN_MAGIC: 268 bp->b_ops = &xfs_dir3_leafn_buf_ops; 269 return bp->b_ops->verify_struct(bp); 270 default: 271 return __this_address; 272 } 273 } 274 275 const struct xfs_buf_ops xfs_da3_node_buf_ops = { 276 .name = "xfs_da3_node", 277 .magic16 = { cpu_to_be16(XFS_DA_NODE_MAGIC), 278 cpu_to_be16(XFS_DA3_NODE_MAGIC) }, 279 .verify_read = xfs_da3_node_read_verify, 280 .verify_write = xfs_da3_node_write_verify, 281 .verify_struct = xfs_da3_node_verify_struct, 282 }; 283 284 int 285 xfs_da3_node_read( 286 struct xfs_trans *tp, 287 struct xfs_inode *dp, 288 xfs_dablk_t bno, 289 xfs_daddr_t mappedbno, 290 struct xfs_buf **bpp, 291 int which_fork) 292 { 293 int err; 294 295 err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, 296 which_fork, &xfs_da3_node_buf_ops); 297 if (!err && tp && *bpp) { 298 struct xfs_da_blkinfo *info = (*bpp)->b_addr; 299 int type; 300 301 switch (be16_to_cpu(info->magic)) { 302 case XFS_DA_NODE_MAGIC: 303 case XFS_DA3_NODE_MAGIC: 304 type = XFS_BLFT_DA_NODE_BUF; 305 break; 306 case XFS_ATTR_LEAF_MAGIC: 307 case XFS_ATTR3_LEAF_MAGIC: 308 type = XFS_BLFT_ATTR_LEAF_BUF; 309 break; 310 case XFS_DIR2_LEAFN_MAGIC: 311 case XFS_DIR3_LEAFN_MAGIC: 312 type = XFS_BLFT_DIR_LEAFN_BUF; 313 break; 314 default: 315 XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, 316 tp->t_mountp, info, sizeof(*info)); 317 xfs_trans_brelse(tp, *bpp); 318 *bpp = NULL; 319 return -EFSCORRUPTED; 320 } 321 xfs_trans_buf_set_type(tp, *bpp, type); 322 } 323 return err; 324 } 325 326 /*======================================================================== 327 * Routines used for growing the Btree. 328 *========================================================================*/ 329 330 /* 331 * Create the initial contents of an intermediate node. 332 */ 333 int 334 xfs_da3_node_create( 335 struct xfs_da_args *args, 336 xfs_dablk_t blkno, 337 int level, 338 struct xfs_buf **bpp, 339 int whichfork) 340 { 341 struct xfs_da_intnode *node; 342 struct xfs_trans *tp = args->trans; 343 struct xfs_mount *mp = tp->t_mountp; 344 struct xfs_da3_icnode_hdr ichdr = {0}; 345 struct xfs_buf *bp; 346 int error; 347 struct xfs_inode *dp = args->dp; 348 349 trace_xfs_da_node_create(args); 350 ASSERT(level <= XFS_DA_NODE_MAXDEPTH); 351 352 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); 353 if (error) 354 return error; 355 bp->b_ops = &xfs_da3_node_buf_ops; 356 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 357 node = bp->b_addr; 358 359 if (xfs_sb_version_hascrc(&mp->m_sb)) { 360 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 361 362 memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); 363 ichdr.magic = XFS_DA3_NODE_MAGIC; 364 hdr3->info.blkno = cpu_to_be64(bp->b_bn); 365 hdr3->info.owner = cpu_to_be64(args->dp->i_ino); 366 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid); 367 } else { 368 ichdr.magic = XFS_DA_NODE_MAGIC; 369 } 370 ichdr.level = level; 371 372 dp->d_ops->node_hdr_to_disk(node, &ichdr); 373 xfs_trans_log_buf(tp, bp, 374 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 375 376 *bpp = bp; 377 return 0; 378 } 379 380 /* 381 * Split a leaf node, rebalance, then possibly split 382 * intermediate nodes, rebalance, etc. 383 */ 384 int /* error */ 385 xfs_da3_split( 386 struct xfs_da_state *state) 387 { 388 struct xfs_da_state_blk *oldblk; 389 struct xfs_da_state_blk *newblk; 390 struct xfs_da_state_blk *addblk; 391 struct xfs_da_intnode *node; 392 int max; 393 int action = 0; 394 int error; 395 int i; 396 397 trace_xfs_da_split(state->args); 398 399 /* 400 * Walk back up the tree splitting/inserting/adjusting as necessary. 401 * If we need to insert and there isn't room, split the node, then 402 * decide which fragment to insert the new block from below into. 403 * Note that we may split the root this way, but we need more fixup. 404 */ 405 max = state->path.active - 1; 406 ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); 407 ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || 408 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); 409 410 addblk = &state->path.blk[max]; /* initial dummy value */ 411 for (i = max; (i >= 0) && addblk; state->path.active--, i--) { 412 oldblk = &state->path.blk[i]; 413 newblk = &state->altpath.blk[i]; 414 415 /* 416 * If a leaf node then 417 * Allocate a new leaf node, then rebalance across them. 418 * else if an intermediate node then 419 * We split on the last layer, must we split the node? 420 */ 421 switch (oldblk->magic) { 422 case XFS_ATTR_LEAF_MAGIC: 423 error = xfs_attr3_leaf_split(state, oldblk, newblk); 424 if ((error != 0) && (error != -ENOSPC)) { 425 return error; /* GROT: attr is inconsistent */ 426 } 427 if (!error) { 428 addblk = newblk; 429 break; 430 } 431 /* 432 * Entry wouldn't fit, split the leaf again. The new 433 * extrablk will be consumed by xfs_da3_node_split if 434 * the node is split. 435 */ 436 state->extravalid = 1; 437 if (state->inleaf) { 438 state->extraafter = 0; /* before newblk */ 439 trace_xfs_attr_leaf_split_before(state->args); 440 error = xfs_attr3_leaf_split(state, oldblk, 441 &state->extrablk); 442 } else { 443 state->extraafter = 1; /* after newblk */ 444 trace_xfs_attr_leaf_split_after(state->args); 445 error = xfs_attr3_leaf_split(state, newblk, 446 &state->extrablk); 447 } 448 if (error) 449 return error; /* GROT: attr inconsistent */ 450 addblk = newblk; 451 break; 452 case XFS_DIR2_LEAFN_MAGIC: 453 error = xfs_dir2_leafn_split(state, oldblk, newblk); 454 if (error) 455 return error; 456 addblk = newblk; 457 break; 458 case XFS_DA_NODE_MAGIC: 459 error = xfs_da3_node_split(state, oldblk, newblk, addblk, 460 max - i, &action); 461 addblk->bp = NULL; 462 if (error) 463 return error; /* GROT: dir is inconsistent */ 464 /* 465 * Record the newly split block for the next time thru? 466 */ 467 if (action) 468 addblk = newblk; 469 else 470 addblk = NULL; 471 break; 472 } 473 474 /* 475 * Update the btree to show the new hashval for this child. 476 */ 477 xfs_da3_fixhashpath(state, &state->path); 478 } 479 if (!addblk) 480 return 0; 481 482 /* 483 * xfs_da3_node_split() should have consumed any extra blocks we added 484 * during a double leaf split in the attr fork. This is guaranteed as 485 * we can't be here if the attr fork only has a single leaf block. 486 */ 487 ASSERT(state->extravalid == 0 || 488 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); 489 490 /* 491 * Split the root node. 492 */ 493 ASSERT(state->path.active == 0); 494 oldblk = &state->path.blk[0]; 495 error = xfs_da3_root_split(state, oldblk, addblk); 496 if (error) { 497 addblk->bp = NULL; 498 return error; /* GROT: dir is inconsistent */ 499 } 500 501 /* 502 * Update pointers to the node which used to be block 0 and just got 503 * bumped because of the addition of a new root node. Note that the 504 * original block 0 could be at any position in the list of blocks in 505 * the tree. 506 * 507 * Note: the magic numbers and sibling pointers are in the same physical 508 * place for both v2 and v3 headers (by design). Hence it doesn't matter 509 * which version of the xfs_da_intnode structure we use here as the 510 * result will be the same using either structure. 511 */ 512 node = oldblk->bp->b_addr; 513 if (node->hdr.info.forw) { 514 ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno); 515 node = addblk->bp->b_addr; 516 node->hdr.info.back = cpu_to_be32(oldblk->blkno); 517 xfs_trans_log_buf(state->args->trans, addblk->bp, 518 XFS_DA_LOGRANGE(node, &node->hdr.info, 519 sizeof(node->hdr.info))); 520 } 521 node = oldblk->bp->b_addr; 522 if (node->hdr.info.back) { 523 ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno); 524 node = addblk->bp->b_addr; 525 node->hdr.info.forw = cpu_to_be32(oldblk->blkno); 526 xfs_trans_log_buf(state->args->trans, addblk->bp, 527 XFS_DA_LOGRANGE(node, &node->hdr.info, 528 sizeof(node->hdr.info))); 529 } 530 addblk->bp = NULL; 531 return 0; 532 } 533 534 /* 535 * Split the root. We have to create a new root and point to the two 536 * parts (the split old root) that we just created. Copy block zero to 537 * the EOF, extending the inode in process. 538 */ 539 STATIC int /* error */ 540 xfs_da3_root_split( 541 struct xfs_da_state *state, 542 struct xfs_da_state_blk *blk1, 543 struct xfs_da_state_blk *blk2) 544 { 545 struct xfs_da_intnode *node; 546 struct xfs_da_intnode *oldroot; 547 struct xfs_da_node_entry *btree; 548 struct xfs_da3_icnode_hdr nodehdr; 549 struct xfs_da_args *args; 550 struct xfs_buf *bp; 551 struct xfs_inode *dp; 552 struct xfs_trans *tp; 553 struct xfs_dir2_leaf *leaf; 554 xfs_dablk_t blkno; 555 int level; 556 int error; 557 int size; 558 559 trace_xfs_da_root_split(state->args); 560 561 /* 562 * Copy the existing (incorrect) block from the root node position 563 * to a free space somewhere. 564 */ 565 args = state->args; 566 error = xfs_da_grow_inode(args, &blkno); 567 if (error) 568 return error; 569 570 dp = args->dp; 571 tp = args->trans; 572 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); 573 if (error) 574 return error; 575 node = bp->b_addr; 576 oldroot = blk1->bp->b_addr; 577 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 578 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { 579 struct xfs_da3_icnode_hdr icnodehdr; 580 581 dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot); 582 btree = dp->d_ops->node_tree_p(oldroot); 583 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot); 584 level = icnodehdr.level; 585 586 /* 587 * we are about to copy oldroot to bp, so set up the type 588 * of bp while we know exactly what it will be. 589 */ 590 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 591 } else { 592 struct xfs_dir3_icleaf_hdr leafhdr; 593 struct xfs_dir2_leaf_entry *ents; 594 595 leaf = (xfs_dir2_leaf_t *)oldroot; 596 dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); 597 ents = dp->d_ops->leaf_ents_p(leaf); 598 599 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || 600 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); 601 size = (int)((char *)&ents[leafhdr.count] - (char *)leaf); 602 level = 0; 603 604 /* 605 * we are about to copy oldroot to bp, so set up the type 606 * of bp while we know exactly what it will be. 607 */ 608 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); 609 } 610 611 /* 612 * we can copy most of the information in the node from one block to 613 * another, but for CRC enabled headers we have to make sure that the 614 * block specific identifiers are kept intact. We update the buffer 615 * directly for this. 616 */ 617 memcpy(node, oldroot, size); 618 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || 619 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 620 struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node; 621 622 node3->hdr.info.blkno = cpu_to_be64(bp->b_bn); 623 } 624 xfs_trans_log_buf(tp, bp, 0, size - 1); 625 626 bp->b_ops = blk1->bp->b_ops; 627 xfs_trans_buf_copy_type(bp, blk1->bp); 628 blk1->bp = bp; 629 blk1->blkno = blkno; 630 631 /* 632 * Set up the new root node. 633 */ 634 error = xfs_da3_node_create(args, 635 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, 636 level + 1, &bp, args->whichfork); 637 if (error) 638 return error; 639 640 node = bp->b_addr; 641 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 642 btree = dp->d_ops->node_tree_p(node); 643 btree[0].hashval = cpu_to_be32(blk1->hashval); 644 btree[0].before = cpu_to_be32(blk1->blkno); 645 btree[1].hashval = cpu_to_be32(blk2->hashval); 646 btree[1].before = cpu_to_be32(blk2->blkno); 647 nodehdr.count = 2; 648 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 649 650 #ifdef DEBUG 651 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 652 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 653 ASSERT(blk1->blkno >= args->geo->leafblk && 654 blk1->blkno < args->geo->freeblk); 655 ASSERT(blk2->blkno >= args->geo->leafblk && 656 blk2->blkno < args->geo->freeblk); 657 } 658 #endif 659 660 /* Header is already logged by xfs_da_node_create */ 661 xfs_trans_log_buf(tp, bp, 662 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2)); 663 664 return 0; 665 } 666 667 /* 668 * Split the node, rebalance, then add the new entry. 669 */ 670 STATIC int /* error */ 671 xfs_da3_node_split( 672 struct xfs_da_state *state, 673 struct xfs_da_state_blk *oldblk, 674 struct xfs_da_state_blk *newblk, 675 struct xfs_da_state_blk *addblk, 676 int treelevel, 677 int *result) 678 { 679 struct xfs_da_intnode *node; 680 struct xfs_da3_icnode_hdr nodehdr; 681 xfs_dablk_t blkno; 682 int newcount; 683 int error; 684 int useextra; 685 struct xfs_inode *dp = state->args->dp; 686 687 trace_xfs_da_node_split(state->args); 688 689 node = oldblk->bp->b_addr; 690 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 691 692 /* 693 * With V2 dirs the extra block is data or freespace. 694 */ 695 useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; 696 newcount = 1 + useextra; 697 /* 698 * Do we have to split the node? 699 */ 700 if (nodehdr.count + newcount > state->args->geo->node_ents) { 701 /* 702 * Allocate a new node, add to the doubly linked chain of 703 * nodes, then move some of our excess entries into it. 704 */ 705 error = xfs_da_grow_inode(state->args, &blkno); 706 if (error) 707 return error; /* GROT: dir is inconsistent */ 708 709 error = xfs_da3_node_create(state->args, blkno, treelevel, 710 &newblk->bp, state->args->whichfork); 711 if (error) 712 return error; /* GROT: dir is inconsistent */ 713 newblk->blkno = blkno; 714 newblk->magic = XFS_DA_NODE_MAGIC; 715 xfs_da3_node_rebalance(state, oldblk, newblk); 716 error = xfs_da3_blk_link(state, oldblk, newblk); 717 if (error) 718 return error; 719 *result = 1; 720 } else { 721 *result = 0; 722 } 723 724 /* 725 * Insert the new entry(s) into the correct block 726 * (updating last hashval in the process). 727 * 728 * xfs_da3_node_add() inserts BEFORE the given index, 729 * and as a result of using node_lookup_int() we always 730 * point to a valid entry (not after one), but a split 731 * operation always results in a new block whose hashvals 732 * FOLLOW the current block. 733 * 734 * If we had double-split op below us, then add the extra block too. 735 */ 736 node = oldblk->bp->b_addr; 737 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 738 if (oldblk->index <= nodehdr.count) { 739 oldblk->index++; 740 xfs_da3_node_add(state, oldblk, addblk); 741 if (useextra) { 742 if (state->extraafter) 743 oldblk->index++; 744 xfs_da3_node_add(state, oldblk, &state->extrablk); 745 state->extravalid = 0; 746 } 747 } else { 748 newblk->index++; 749 xfs_da3_node_add(state, newblk, addblk); 750 if (useextra) { 751 if (state->extraafter) 752 newblk->index++; 753 xfs_da3_node_add(state, newblk, &state->extrablk); 754 state->extravalid = 0; 755 } 756 } 757 758 return 0; 759 } 760 761 /* 762 * Balance the btree elements between two intermediate nodes, 763 * usually one full and one empty. 764 * 765 * NOTE: if blk2 is empty, then it will get the upper half of blk1. 766 */ 767 STATIC void 768 xfs_da3_node_rebalance( 769 struct xfs_da_state *state, 770 struct xfs_da_state_blk *blk1, 771 struct xfs_da_state_blk *blk2) 772 { 773 struct xfs_da_intnode *node1; 774 struct xfs_da_intnode *node2; 775 struct xfs_da_intnode *tmpnode; 776 struct xfs_da_node_entry *btree1; 777 struct xfs_da_node_entry *btree2; 778 struct xfs_da_node_entry *btree_s; 779 struct xfs_da_node_entry *btree_d; 780 struct xfs_da3_icnode_hdr nodehdr1; 781 struct xfs_da3_icnode_hdr nodehdr2; 782 struct xfs_trans *tp; 783 int count; 784 int tmp; 785 int swap = 0; 786 struct xfs_inode *dp = state->args->dp; 787 788 trace_xfs_da_node_rebalance(state->args); 789 790 node1 = blk1->bp->b_addr; 791 node2 = blk2->bp->b_addr; 792 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 793 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 794 btree1 = dp->d_ops->node_tree_p(node1); 795 btree2 = dp->d_ops->node_tree_p(node2); 796 797 /* 798 * Figure out how many entries need to move, and in which direction. 799 * Swap the nodes around if that makes it simpler. 800 */ 801 if (nodehdr1.count > 0 && nodehdr2.count > 0 && 802 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || 803 (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) < 804 be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) { 805 tmpnode = node1; 806 node1 = node2; 807 node2 = tmpnode; 808 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 809 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 810 btree1 = dp->d_ops->node_tree_p(node1); 811 btree2 = dp->d_ops->node_tree_p(node2); 812 swap = 1; 813 } 814 815 count = (nodehdr1.count - nodehdr2.count) / 2; 816 if (count == 0) 817 return; 818 tp = state->args->trans; 819 /* 820 * Two cases: high-to-low and low-to-high. 821 */ 822 if (count > 0) { 823 /* 824 * Move elements in node2 up to make a hole. 825 */ 826 tmp = nodehdr2.count; 827 if (tmp > 0) { 828 tmp *= (uint)sizeof(xfs_da_node_entry_t); 829 btree_s = &btree2[0]; 830 btree_d = &btree2[count]; 831 memmove(btree_d, btree_s, tmp); 832 } 833 834 /* 835 * Move the req'd B-tree elements from high in node1 to 836 * low in node2. 837 */ 838 nodehdr2.count += count; 839 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 840 btree_s = &btree1[nodehdr1.count - count]; 841 btree_d = &btree2[0]; 842 memcpy(btree_d, btree_s, tmp); 843 nodehdr1.count -= count; 844 } else { 845 /* 846 * Move the req'd B-tree elements from low in node2 to 847 * high in node1. 848 */ 849 count = -count; 850 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 851 btree_s = &btree2[0]; 852 btree_d = &btree1[nodehdr1.count]; 853 memcpy(btree_d, btree_s, tmp); 854 nodehdr1.count += count; 855 856 xfs_trans_log_buf(tp, blk1->bp, 857 XFS_DA_LOGRANGE(node1, btree_d, tmp)); 858 859 /* 860 * Move elements in node2 down to fill the hole. 861 */ 862 tmp = nodehdr2.count - count; 863 tmp *= (uint)sizeof(xfs_da_node_entry_t); 864 btree_s = &btree2[count]; 865 btree_d = &btree2[0]; 866 memmove(btree_d, btree_s, tmp); 867 nodehdr2.count -= count; 868 } 869 870 /* 871 * Log header of node 1 and all current bits of node 2. 872 */ 873 dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); 874 xfs_trans_log_buf(tp, blk1->bp, 875 XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); 876 877 dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); 878 xfs_trans_log_buf(tp, blk2->bp, 879 XFS_DA_LOGRANGE(node2, &node2->hdr, 880 dp->d_ops->node_hdr_size + 881 (sizeof(btree2[0]) * nodehdr2.count))); 882 883 /* 884 * Record the last hashval from each block for upward propagation. 885 * (note: don't use the swapped node pointers) 886 */ 887 if (swap) { 888 node1 = blk1->bp->b_addr; 889 node2 = blk2->bp->b_addr; 890 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 891 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 892 btree1 = dp->d_ops->node_tree_p(node1); 893 btree2 = dp->d_ops->node_tree_p(node2); 894 } 895 blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); 896 blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); 897 898 /* 899 * Adjust the expected index for insertion. 900 */ 901 if (blk1->index >= nodehdr1.count) { 902 blk2->index = blk1->index - nodehdr1.count; 903 blk1->index = nodehdr1.count + 1; /* make it invalid */ 904 } 905 } 906 907 /* 908 * Add a new entry to an intermediate node. 909 */ 910 STATIC void 911 xfs_da3_node_add( 912 struct xfs_da_state *state, 913 struct xfs_da_state_blk *oldblk, 914 struct xfs_da_state_blk *newblk) 915 { 916 struct xfs_da_intnode *node; 917 struct xfs_da3_icnode_hdr nodehdr; 918 struct xfs_da_node_entry *btree; 919 int tmp; 920 struct xfs_inode *dp = state->args->dp; 921 922 trace_xfs_da_node_add(state->args); 923 924 node = oldblk->bp->b_addr; 925 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 926 btree = dp->d_ops->node_tree_p(node); 927 928 ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); 929 ASSERT(newblk->blkno != 0); 930 if (state->args->whichfork == XFS_DATA_FORK) 931 ASSERT(newblk->blkno >= state->args->geo->leafblk && 932 newblk->blkno < state->args->geo->freeblk); 933 934 /* 935 * We may need to make some room before we insert the new node. 936 */ 937 tmp = 0; 938 if (oldblk->index < nodehdr.count) { 939 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree); 940 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp); 941 } 942 btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval); 943 btree[oldblk->index].before = cpu_to_be32(newblk->blkno); 944 xfs_trans_log_buf(state->args->trans, oldblk->bp, 945 XFS_DA_LOGRANGE(node, &btree[oldblk->index], 946 tmp + sizeof(*btree))); 947 948 nodehdr.count += 1; 949 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 950 xfs_trans_log_buf(state->args->trans, oldblk->bp, 951 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 952 953 /* 954 * Copy the last hash value from the oldblk to propagate upwards. 955 */ 956 oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); 957 } 958 959 /*======================================================================== 960 * Routines used for shrinking the Btree. 961 *========================================================================*/ 962 963 /* 964 * Deallocate an empty leaf node, remove it from its parent, 965 * possibly deallocating that block, etc... 966 */ 967 int 968 xfs_da3_join( 969 struct xfs_da_state *state) 970 { 971 struct xfs_da_state_blk *drop_blk; 972 struct xfs_da_state_blk *save_blk; 973 int action = 0; 974 int error; 975 976 trace_xfs_da_join(state->args); 977 978 drop_blk = &state->path.blk[ state->path.active-1 ]; 979 save_blk = &state->altpath.blk[ state->path.active-1 ]; 980 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); 981 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || 982 drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); 983 984 /* 985 * Walk back up the tree joining/deallocating as necessary. 986 * When we stop dropping blocks, break out. 987 */ 988 for ( ; state->path.active >= 2; drop_blk--, save_blk--, 989 state->path.active--) { 990 /* 991 * See if we can combine the block with a neighbor. 992 * (action == 0) => no options, just leave 993 * (action == 1) => coalesce, then unlink 994 * (action == 2) => block empty, unlink it 995 */ 996 switch (drop_blk->magic) { 997 case XFS_ATTR_LEAF_MAGIC: 998 error = xfs_attr3_leaf_toosmall(state, &action); 999 if (error) 1000 return error; 1001 if (action == 0) 1002 return 0; 1003 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); 1004 break; 1005 case XFS_DIR2_LEAFN_MAGIC: 1006 error = xfs_dir2_leafn_toosmall(state, &action); 1007 if (error) 1008 return error; 1009 if (action == 0) 1010 return 0; 1011 xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); 1012 break; 1013 case XFS_DA_NODE_MAGIC: 1014 /* 1015 * Remove the offending node, fixup hashvals, 1016 * check for a toosmall neighbor. 1017 */ 1018 xfs_da3_node_remove(state, drop_blk); 1019 xfs_da3_fixhashpath(state, &state->path); 1020 error = xfs_da3_node_toosmall(state, &action); 1021 if (error) 1022 return error; 1023 if (action == 0) 1024 return 0; 1025 xfs_da3_node_unbalance(state, drop_blk, save_blk); 1026 break; 1027 } 1028 xfs_da3_fixhashpath(state, &state->altpath); 1029 error = xfs_da3_blk_unlink(state, drop_blk, save_blk); 1030 xfs_da_state_kill_altpath(state); 1031 if (error) 1032 return error; 1033 error = xfs_da_shrink_inode(state->args, drop_blk->blkno, 1034 drop_blk->bp); 1035 drop_blk->bp = NULL; 1036 if (error) 1037 return error; 1038 } 1039 /* 1040 * We joined all the way to the top. If it turns out that 1041 * we only have one entry in the root, make the child block 1042 * the new root. 1043 */ 1044 xfs_da3_node_remove(state, drop_blk); 1045 xfs_da3_fixhashpath(state, &state->path); 1046 error = xfs_da3_root_join(state, &state->path.blk[0]); 1047 return error; 1048 } 1049 1050 #ifdef DEBUG 1051 static void 1052 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) 1053 { 1054 __be16 magic = blkinfo->magic; 1055 1056 if (level == 1) { 1057 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1058 magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || 1059 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || 1060 magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); 1061 } else { 1062 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1063 magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); 1064 } 1065 ASSERT(!blkinfo->forw); 1066 ASSERT(!blkinfo->back); 1067 } 1068 #else /* !DEBUG */ 1069 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level) 1070 #endif /* !DEBUG */ 1071 1072 /* 1073 * We have only one entry in the root. Copy the only remaining child of 1074 * the old root to block 0 as the new root node. 1075 */ 1076 STATIC int 1077 xfs_da3_root_join( 1078 struct xfs_da_state *state, 1079 struct xfs_da_state_blk *root_blk) 1080 { 1081 struct xfs_da_intnode *oldroot; 1082 struct xfs_da_args *args; 1083 xfs_dablk_t child; 1084 struct xfs_buf *bp; 1085 struct xfs_da3_icnode_hdr oldroothdr; 1086 struct xfs_da_node_entry *btree; 1087 int error; 1088 struct xfs_inode *dp = state->args->dp; 1089 1090 trace_xfs_da_root_join(state->args); 1091 1092 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 1093 1094 args = state->args; 1095 oldroot = root_blk->bp->b_addr; 1096 dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); 1097 ASSERT(oldroothdr.forw == 0); 1098 ASSERT(oldroothdr.back == 0); 1099 1100 /* 1101 * If the root has more than one child, then don't do anything. 1102 */ 1103 if (oldroothdr.count > 1) 1104 return 0; 1105 1106 /* 1107 * Read in the (only) child block, then copy those bytes into 1108 * the root block's buffer and free the original child block. 1109 */ 1110 btree = dp->d_ops->node_tree_p(oldroot); 1111 child = be32_to_cpu(btree[0].before); 1112 ASSERT(child != 0); 1113 error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, 1114 args->whichfork); 1115 if (error) 1116 return error; 1117 xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); 1118 1119 /* 1120 * This could be copying a leaf back into the root block in the case of 1121 * there only being a single leaf block left in the tree. Hence we have 1122 * to update the b_ops pointer as well to match the buffer type change 1123 * that could occur. For dir3 blocks we also need to update the block 1124 * number in the buffer header. 1125 */ 1126 memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); 1127 root_blk->bp->b_ops = bp->b_ops; 1128 xfs_trans_buf_copy_type(root_blk->bp, bp); 1129 if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { 1130 struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; 1131 da3->blkno = cpu_to_be64(root_blk->bp->b_bn); 1132 } 1133 xfs_trans_log_buf(args->trans, root_blk->bp, 0, 1134 args->geo->blksize - 1); 1135 error = xfs_da_shrink_inode(args, child, bp); 1136 return error; 1137 } 1138 1139 /* 1140 * Check a node block and its neighbors to see if the block should be 1141 * collapsed into one or the other neighbor. Always keep the block 1142 * with the smaller block number. 1143 * If the current block is over 50% full, don't try to join it, return 0. 1144 * If the block is empty, fill in the state structure and return 2. 1145 * If it can be collapsed, fill in the state structure and return 1. 1146 * If nothing can be done, return 0. 1147 */ 1148 STATIC int 1149 xfs_da3_node_toosmall( 1150 struct xfs_da_state *state, 1151 int *action) 1152 { 1153 struct xfs_da_intnode *node; 1154 struct xfs_da_state_blk *blk; 1155 struct xfs_da_blkinfo *info; 1156 xfs_dablk_t blkno; 1157 struct xfs_buf *bp; 1158 struct xfs_da3_icnode_hdr nodehdr; 1159 int count; 1160 int forward; 1161 int error; 1162 int retval; 1163 int i; 1164 struct xfs_inode *dp = state->args->dp; 1165 1166 trace_xfs_da_node_toosmall(state->args); 1167 1168 /* 1169 * Check for the degenerate case of the block being over 50% full. 1170 * If so, it's not worth even looking to see if we might be able 1171 * to coalesce with a sibling. 1172 */ 1173 blk = &state->path.blk[ state->path.active-1 ]; 1174 info = blk->bp->b_addr; 1175 node = (xfs_da_intnode_t *)info; 1176 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1177 if (nodehdr.count > (state->args->geo->node_ents >> 1)) { 1178 *action = 0; /* blk over 50%, don't try to join */ 1179 return 0; /* blk over 50%, don't try to join */ 1180 } 1181 1182 /* 1183 * Check for the degenerate case of the block being empty. 1184 * If the block is empty, we'll simply delete it, no need to 1185 * coalesce it with a sibling block. We choose (arbitrarily) 1186 * to merge with the forward block unless it is NULL. 1187 */ 1188 if (nodehdr.count == 0) { 1189 /* 1190 * Make altpath point to the block we want to keep and 1191 * path point to the block we want to drop (this one). 1192 */ 1193 forward = (info->forw != 0); 1194 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1195 error = xfs_da3_path_shift(state, &state->altpath, forward, 1196 0, &retval); 1197 if (error) 1198 return error; 1199 if (retval) { 1200 *action = 0; 1201 } else { 1202 *action = 2; 1203 } 1204 return 0; 1205 } 1206 1207 /* 1208 * Examine each sibling block to see if we can coalesce with 1209 * at least 25% free space to spare. We need to figure out 1210 * whether to merge with the forward or the backward block. 1211 * We prefer coalescing with the lower numbered sibling so as 1212 * to shrink a directory over time. 1213 */ 1214 count = state->args->geo->node_ents; 1215 count -= state->args->geo->node_ents >> 2; 1216 count -= nodehdr.count; 1217 1218 /* start with smaller blk num */ 1219 forward = nodehdr.forw < nodehdr.back; 1220 for (i = 0; i < 2; forward = !forward, i++) { 1221 struct xfs_da3_icnode_hdr thdr; 1222 if (forward) 1223 blkno = nodehdr.forw; 1224 else 1225 blkno = nodehdr.back; 1226 if (blkno == 0) 1227 continue; 1228 error = xfs_da3_node_read(state->args->trans, dp, 1229 blkno, -1, &bp, state->args->whichfork); 1230 if (error) 1231 return error; 1232 1233 node = bp->b_addr; 1234 dp->d_ops->node_hdr_from_disk(&thdr, node); 1235 xfs_trans_brelse(state->args->trans, bp); 1236 1237 if (count - thdr.count >= 0) 1238 break; /* fits with at least 25% to spare */ 1239 } 1240 if (i >= 2) { 1241 *action = 0; 1242 return 0; 1243 } 1244 1245 /* 1246 * Make altpath point to the block we want to keep (the lower 1247 * numbered block) and path point to the block we want to drop. 1248 */ 1249 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1250 if (blkno < blk->blkno) { 1251 error = xfs_da3_path_shift(state, &state->altpath, forward, 1252 0, &retval); 1253 } else { 1254 error = xfs_da3_path_shift(state, &state->path, forward, 1255 0, &retval); 1256 } 1257 if (error) 1258 return error; 1259 if (retval) { 1260 *action = 0; 1261 return 0; 1262 } 1263 *action = 1; 1264 return 0; 1265 } 1266 1267 /* 1268 * Pick up the last hashvalue from an intermediate node. 1269 */ 1270 STATIC uint 1271 xfs_da3_node_lasthash( 1272 struct xfs_inode *dp, 1273 struct xfs_buf *bp, 1274 int *count) 1275 { 1276 struct xfs_da_intnode *node; 1277 struct xfs_da_node_entry *btree; 1278 struct xfs_da3_icnode_hdr nodehdr; 1279 1280 node = bp->b_addr; 1281 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1282 if (count) 1283 *count = nodehdr.count; 1284 if (!nodehdr.count) 1285 return 0; 1286 btree = dp->d_ops->node_tree_p(node); 1287 return be32_to_cpu(btree[nodehdr.count - 1].hashval); 1288 } 1289 1290 /* 1291 * Walk back up the tree adjusting hash values as necessary, 1292 * when we stop making changes, return. 1293 */ 1294 void 1295 xfs_da3_fixhashpath( 1296 struct xfs_da_state *state, 1297 struct xfs_da_state_path *path) 1298 { 1299 struct xfs_da_state_blk *blk; 1300 struct xfs_da_intnode *node; 1301 struct xfs_da_node_entry *btree; 1302 xfs_dahash_t lasthash=0; 1303 int level; 1304 int count; 1305 struct xfs_inode *dp = state->args->dp; 1306 1307 trace_xfs_da_fixhashpath(state->args); 1308 1309 level = path->active-1; 1310 blk = &path->blk[ level ]; 1311 switch (blk->magic) { 1312 case XFS_ATTR_LEAF_MAGIC: 1313 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); 1314 if (count == 0) 1315 return; 1316 break; 1317 case XFS_DIR2_LEAFN_MAGIC: 1318 lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count); 1319 if (count == 0) 1320 return; 1321 break; 1322 case XFS_DA_NODE_MAGIC: 1323 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); 1324 if (count == 0) 1325 return; 1326 break; 1327 } 1328 for (blk--, level--; level >= 0; blk--, level--) { 1329 struct xfs_da3_icnode_hdr nodehdr; 1330 1331 node = blk->bp->b_addr; 1332 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1333 btree = dp->d_ops->node_tree_p(node); 1334 if (be32_to_cpu(btree[blk->index].hashval) == lasthash) 1335 break; 1336 blk->hashval = lasthash; 1337 btree[blk->index].hashval = cpu_to_be32(lasthash); 1338 xfs_trans_log_buf(state->args->trans, blk->bp, 1339 XFS_DA_LOGRANGE(node, &btree[blk->index], 1340 sizeof(*btree))); 1341 1342 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval); 1343 } 1344 } 1345 1346 /* 1347 * Remove an entry from an intermediate node. 1348 */ 1349 STATIC void 1350 xfs_da3_node_remove( 1351 struct xfs_da_state *state, 1352 struct xfs_da_state_blk *drop_blk) 1353 { 1354 struct xfs_da_intnode *node; 1355 struct xfs_da3_icnode_hdr nodehdr; 1356 struct xfs_da_node_entry *btree; 1357 int index; 1358 int tmp; 1359 struct xfs_inode *dp = state->args->dp; 1360 1361 trace_xfs_da_node_remove(state->args); 1362 1363 node = drop_blk->bp->b_addr; 1364 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1365 ASSERT(drop_blk->index < nodehdr.count); 1366 ASSERT(drop_blk->index >= 0); 1367 1368 /* 1369 * Copy over the offending entry, or just zero it out. 1370 */ 1371 index = drop_blk->index; 1372 btree = dp->d_ops->node_tree_p(node); 1373 if (index < nodehdr.count - 1) { 1374 tmp = nodehdr.count - index - 1; 1375 tmp *= (uint)sizeof(xfs_da_node_entry_t); 1376 memmove(&btree[index], &btree[index + 1], tmp); 1377 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1378 XFS_DA_LOGRANGE(node, &btree[index], tmp)); 1379 index = nodehdr.count - 1; 1380 } 1381 memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); 1382 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1383 XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); 1384 nodehdr.count -= 1; 1385 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 1386 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1387 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 1388 1389 /* 1390 * Copy the last hash value from the block to propagate upwards. 1391 */ 1392 drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); 1393 } 1394 1395 /* 1396 * Unbalance the elements between two intermediate nodes, 1397 * move all Btree elements from one node into another. 1398 */ 1399 STATIC void 1400 xfs_da3_node_unbalance( 1401 struct xfs_da_state *state, 1402 struct xfs_da_state_blk *drop_blk, 1403 struct xfs_da_state_blk *save_blk) 1404 { 1405 struct xfs_da_intnode *drop_node; 1406 struct xfs_da_intnode *save_node; 1407 struct xfs_da_node_entry *drop_btree; 1408 struct xfs_da_node_entry *save_btree; 1409 struct xfs_da3_icnode_hdr drop_hdr; 1410 struct xfs_da3_icnode_hdr save_hdr; 1411 struct xfs_trans *tp; 1412 int sindex; 1413 int tmp; 1414 struct xfs_inode *dp = state->args->dp; 1415 1416 trace_xfs_da_node_unbalance(state->args); 1417 1418 drop_node = drop_blk->bp->b_addr; 1419 save_node = save_blk->bp->b_addr; 1420 dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); 1421 dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); 1422 drop_btree = dp->d_ops->node_tree_p(drop_node); 1423 save_btree = dp->d_ops->node_tree_p(save_node); 1424 tp = state->args->trans; 1425 1426 /* 1427 * If the dying block has lower hashvals, then move all the 1428 * elements in the remaining block up to make a hole. 1429 */ 1430 if ((be32_to_cpu(drop_btree[0].hashval) < 1431 be32_to_cpu(save_btree[0].hashval)) || 1432 (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) < 1433 be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { 1434 /* XXX: check this - is memmove dst correct? */ 1435 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t); 1436 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp); 1437 1438 sindex = 0; 1439 xfs_trans_log_buf(tp, save_blk->bp, 1440 XFS_DA_LOGRANGE(save_node, &save_btree[0], 1441 (save_hdr.count + drop_hdr.count) * 1442 sizeof(xfs_da_node_entry_t))); 1443 } else { 1444 sindex = save_hdr.count; 1445 xfs_trans_log_buf(tp, save_blk->bp, 1446 XFS_DA_LOGRANGE(save_node, &save_btree[sindex], 1447 drop_hdr.count * sizeof(xfs_da_node_entry_t))); 1448 } 1449 1450 /* 1451 * Move all the B-tree elements from drop_blk to save_blk. 1452 */ 1453 tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t); 1454 memcpy(&save_btree[sindex], &drop_btree[0], tmp); 1455 save_hdr.count += drop_hdr.count; 1456 1457 dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); 1458 xfs_trans_log_buf(tp, save_blk->bp, 1459 XFS_DA_LOGRANGE(save_node, &save_node->hdr, 1460 dp->d_ops->node_hdr_size)); 1461 1462 /* 1463 * Save the last hashval in the remaining block for upward propagation. 1464 */ 1465 save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval); 1466 } 1467 1468 /*======================================================================== 1469 * Routines used for finding things in the Btree. 1470 *========================================================================*/ 1471 1472 /* 1473 * Walk down the Btree looking for a particular filename, filling 1474 * in the state structure as we go. 1475 * 1476 * We will set the state structure to point to each of the elements 1477 * in each of the nodes where either the hashval is or should be. 1478 * 1479 * We support duplicate hashval's so for each entry in the current 1480 * node that could contain the desired hashval, descend. This is a 1481 * pruned depth-first tree search. 1482 */ 1483 int /* error */ 1484 xfs_da3_node_lookup_int( 1485 struct xfs_da_state *state, 1486 int *result) 1487 { 1488 struct xfs_da_state_blk *blk; 1489 struct xfs_da_blkinfo *curr; 1490 struct xfs_da_intnode *node; 1491 struct xfs_da_node_entry *btree; 1492 struct xfs_da3_icnode_hdr nodehdr; 1493 struct xfs_da_args *args; 1494 xfs_dablk_t blkno; 1495 xfs_dahash_t hashval; 1496 xfs_dahash_t btreehashval; 1497 int probe; 1498 int span; 1499 int max; 1500 int error; 1501 int retval; 1502 unsigned int expected_level = 0; 1503 uint16_t magic; 1504 struct xfs_inode *dp = state->args->dp; 1505 1506 args = state->args; 1507 1508 /* 1509 * Descend thru the B-tree searching each level for the right 1510 * node to use, until the right hashval is found. 1511 */ 1512 blkno = args->geo->leafblk; 1513 for (blk = &state->path.blk[0], state->path.active = 1; 1514 state->path.active <= XFS_DA_NODE_MAXDEPTH; 1515 blk++, state->path.active++) { 1516 /* 1517 * Read the next node down in the tree. 1518 */ 1519 blk->blkno = blkno; 1520 error = xfs_da3_node_read(args->trans, args->dp, blkno, 1521 -1, &blk->bp, args->whichfork); 1522 if (error) { 1523 blk->blkno = 0; 1524 state->path.active--; 1525 return error; 1526 } 1527 curr = blk->bp->b_addr; 1528 magic = be16_to_cpu(curr->magic); 1529 1530 if (magic == XFS_ATTR_LEAF_MAGIC || 1531 magic == XFS_ATTR3_LEAF_MAGIC) { 1532 blk->magic = XFS_ATTR_LEAF_MAGIC; 1533 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1534 break; 1535 } 1536 1537 if (magic == XFS_DIR2_LEAFN_MAGIC || 1538 magic == XFS_DIR3_LEAFN_MAGIC) { 1539 blk->magic = XFS_DIR2_LEAFN_MAGIC; 1540 blk->hashval = xfs_dir2_leaf_lasthash(args->dp, 1541 blk->bp, NULL); 1542 break; 1543 } 1544 1545 if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) 1546 return -EFSCORRUPTED; 1547 1548 blk->magic = XFS_DA_NODE_MAGIC; 1549 1550 /* 1551 * Search an intermediate node for a match. 1552 */ 1553 node = blk->bp->b_addr; 1554 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1555 btree = dp->d_ops->node_tree_p(node); 1556 1557 /* Tree taller than we can handle; bail out! */ 1558 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) 1559 return -EFSCORRUPTED; 1560 1561 /* Check the level from the root. */ 1562 if (blkno == args->geo->leafblk) 1563 expected_level = nodehdr.level - 1; 1564 else if (expected_level != nodehdr.level) 1565 return -EFSCORRUPTED; 1566 else 1567 expected_level--; 1568 1569 max = nodehdr.count; 1570 blk->hashval = be32_to_cpu(btree[max - 1].hashval); 1571 1572 /* 1573 * Binary search. (note: small blocks will skip loop) 1574 */ 1575 probe = span = max / 2; 1576 hashval = args->hashval; 1577 while (span > 4) { 1578 span /= 2; 1579 btreehashval = be32_to_cpu(btree[probe].hashval); 1580 if (btreehashval < hashval) 1581 probe += span; 1582 else if (btreehashval > hashval) 1583 probe -= span; 1584 else 1585 break; 1586 } 1587 ASSERT((probe >= 0) && (probe < max)); 1588 ASSERT((span <= 4) || 1589 (be32_to_cpu(btree[probe].hashval) == hashval)); 1590 1591 /* 1592 * Since we may have duplicate hashval's, find the first 1593 * matching hashval in the node. 1594 */ 1595 while (probe > 0 && 1596 be32_to_cpu(btree[probe].hashval) >= hashval) { 1597 probe--; 1598 } 1599 while (probe < max && 1600 be32_to_cpu(btree[probe].hashval) < hashval) { 1601 probe++; 1602 } 1603 1604 /* 1605 * Pick the right block to descend on. 1606 */ 1607 if (probe == max) { 1608 blk->index = max - 1; 1609 blkno = be32_to_cpu(btree[max - 1].before); 1610 } else { 1611 blk->index = probe; 1612 blkno = be32_to_cpu(btree[probe].before); 1613 } 1614 1615 /* We can't point back to the root. */ 1616 if (blkno == args->geo->leafblk) 1617 return -EFSCORRUPTED; 1618 } 1619 1620 if (expected_level != 0) 1621 return -EFSCORRUPTED; 1622 1623 /* 1624 * A leaf block that ends in the hashval that we are interested in 1625 * (final hashval == search hashval) means that the next block may 1626 * contain more entries with the same hashval, shift upward to the 1627 * next leaf and keep searching. 1628 */ 1629 for (;;) { 1630 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1631 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1632 &blk->index, state); 1633 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1634 retval = xfs_attr3_leaf_lookup_int(blk->bp, args); 1635 blk->index = args->index; 1636 args->blkno = blk->blkno; 1637 } else { 1638 ASSERT(0); 1639 return -EFSCORRUPTED; 1640 } 1641 if (((retval == -ENOENT) || (retval == -ENOATTR)) && 1642 (blk->hashval == args->hashval)) { 1643 error = xfs_da3_path_shift(state, &state->path, 1, 1, 1644 &retval); 1645 if (error) 1646 return error; 1647 if (retval == 0) { 1648 continue; 1649 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1650 /* path_shift() gives ENOENT */ 1651 retval = -ENOATTR; 1652 } 1653 } 1654 break; 1655 } 1656 *result = retval; 1657 return 0; 1658 } 1659 1660 /*======================================================================== 1661 * Utility routines. 1662 *========================================================================*/ 1663 1664 /* 1665 * Compare two intermediate nodes for "order". 1666 */ 1667 STATIC int 1668 xfs_da3_node_order( 1669 struct xfs_inode *dp, 1670 struct xfs_buf *node1_bp, 1671 struct xfs_buf *node2_bp) 1672 { 1673 struct xfs_da_intnode *node1; 1674 struct xfs_da_intnode *node2; 1675 struct xfs_da_node_entry *btree1; 1676 struct xfs_da_node_entry *btree2; 1677 struct xfs_da3_icnode_hdr node1hdr; 1678 struct xfs_da3_icnode_hdr node2hdr; 1679 1680 node1 = node1_bp->b_addr; 1681 node2 = node2_bp->b_addr; 1682 dp->d_ops->node_hdr_from_disk(&node1hdr, node1); 1683 dp->d_ops->node_hdr_from_disk(&node2hdr, node2); 1684 btree1 = dp->d_ops->node_tree_p(node1); 1685 btree2 = dp->d_ops->node_tree_p(node2); 1686 1687 if (node1hdr.count > 0 && node2hdr.count > 0 && 1688 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || 1689 (be32_to_cpu(btree2[node2hdr.count - 1].hashval) < 1690 be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) { 1691 return 1; 1692 } 1693 return 0; 1694 } 1695 1696 /* 1697 * Link a new block into a doubly linked list of blocks (of whatever type). 1698 */ 1699 int /* error */ 1700 xfs_da3_blk_link( 1701 struct xfs_da_state *state, 1702 struct xfs_da_state_blk *old_blk, 1703 struct xfs_da_state_blk *new_blk) 1704 { 1705 struct xfs_da_blkinfo *old_info; 1706 struct xfs_da_blkinfo *new_info; 1707 struct xfs_da_blkinfo *tmp_info; 1708 struct xfs_da_args *args; 1709 struct xfs_buf *bp; 1710 int before = 0; 1711 int error; 1712 struct xfs_inode *dp = state->args->dp; 1713 1714 /* 1715 * Set up environment. 1716 */ 1717 args = state->args; 1718 ASSERT(args != NULL); 1719 old_info = old_blk->bp->b_addr; 1720 new_info = new_blk->bp->b_addr; 1721 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || 1722 old_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1723 old_blk->magic == XFS_ATTR_LEAF_MAGIC); 1724 1725 switch (old_blk->magic) { 1726 case XFS_ATTR_LEAF_MAGIC: 1727 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); 1728 break; 1729 case XFS_DIR2_LEAFN_MAGIC: 1730 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); 1731 break; 1732 case XFS_DA_NODE_MAGIC: 1733 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); 1734 break; 1735 } 1736 1737 /* 1738 * Link blocks in appropriate order. 1739 */ 1740 if (before) { 1741 /* 1742 * Link new block in before existing block. 1743 */ 1744 trace_xfs_da_link_before(args); 1745 new_info->forw = cpu_to_be32(old_blk->blkno); 1746 new_info->back = old_info->back; 1747 if (old_info->back) { 1748 error = xfs_da3_node_read(args->trans, dp, 1749 be32_to_cpu(old_info->back), 1750 -1, &bp, args->whichfork); 1751 if (error) 1752 return error; 1753 ASSERT(bp != NULL); 1754 tmp_info = bp->b_addr; 1755 ASSERT(tmp_info->magic == old_info->magic); 1756 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); 1757 tmp_info->forw = cpu_to_be32(new_blk->blkno); 1758 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1759 } 1760 old_info->back = cpu_to_be32(new_blk->blkno); 1761 } else { 1762 /* 1763 * Link new block in after existing block. 1764 */ 1765 trace_xfs_da_link_after(args); 1766 new_info->forw = old_info->forw; 1767 new_info->back = cpu_to_be32(old_blk->blkno); 1768 if (old_info->forw) { 1769 error = xfs_da3_node_read(args->trans, dp, 1770 be32_to_cpu(old_info->forw), 1771 -1, &bp, args->whichfork); 1772 if (error) 1773 return error; 1774 ASSERT(bp != NULL); 1775 tmp_info = bp->b_addr; 1776 ASSERT(tmp_info->magic == old_info->magic); 1777 ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); 1778 tmp_info->back = cpu_to_be32(new_blk->blkno); 1779 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1780 } 1781 old_info->forw = cpu_to_be32(new_blk->blkno); 1782 } 1783 1784 xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); 1785 xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); 1786 return 0; 1787 } 1788 1789 /* 1790 * Unlink a block from a doubly linked list of blocks. 1791 */ 1792 STATIC int /* error */ 1793 xfs_da3_blk_unlink( 1794 struct xfs_da_state *state, 1795 struct xfs_da_state_blk *drop_blk, 1796 struct xfs_da_state_blk *save_blk) 1797 { 1798 struct xfs_da_blkinfo *drop_info; 1799 struct xfs_da_blkinfo *save_info; 1800 struct xfs_da_blkinfo *tmp_info; 1801 struct xfs_da_args *args; 1802 struct xfs_buf *bp; 1803 int error; 1804 1805 /* 1806 * Set up environment. 1807 */ 1808 args = state->args; 1809 ASSERT(args != NULL); 1810 save_info = save_blk->bp->b_addr; 1811 drop_info = drop_blk->bp->b_addr; 1812 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || 1813 save_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1814 save_blk->magic == XFS_ATTR_LEAF_MAGIC); 1815 ASSERT(save_blk->magic == drop_blk->magic); 1816 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || 1817 (be32_to_cpu(save_info->back) == drop_blk->blkno)); 1818 ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || 1819 (be32_to_cpu(drop_info->back) == save_blk->blkno)); 1820 1821 /* 1822 * Unlink the leaf block from the doubly linked chain of leaves. 1823 */ 1824 if (be32_to_cpu(save_info->back) == drop_blk->blkno) { 1825 trace_xfs_da_unlink_back(args); 1826 save_info->back = drop_info->back; 1827 if (drop_info->back) { 1828 error = xfs_da3_node_read(args->trans, args->dp, 1829 be32_to_cpu(drop_info->back), 1830 -1, &bp, args->whichfork); 1831 if (error) 1832 return error; 1833 ASSERT(bp != NULL); 1834 tmp_info = bp->b_addr; 1835 ASSERT(tmp_info->magic == save_info->magic); 1836 ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); 1837 tmp_info->forw = cpu_to_be32(save_blk->blkno); 1838 xfs_trans_log_buf(args->trans, bp, 0, 1839 sizeof(*tmp_info) - 1); 1840 } 1841 } else { 1842 trace_xfs_da_unlink_forward(args); 1843 save_info->forw = drop_info->forw; 1844 if (drop_info->forw) { 1845 error = xfs_da3_node_read(args->trans, args->dp, 1846 be32_to_cpu(drop_info->forw), 1847 -1, &bp, args->whichfork); 1848 if (error) 1849 return error; 1850 ASSERT(bp != NULL); 1851 tmp_info = bp->b_addr; 1852 ASSERT(tmp_info->magic == save_info->magic); 1853 ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); 1854 tmp_info->back = cpu_to_be32(save_blk->blkno); 1855 xfs_trans_log_buf(args->trans, bp, 0, 1856 sizeof(*tmp_info) - 1); 1857 } 1858 } 1859 1860 xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); 1861 return 0; 1862 } 1863 1864 /* 1865 * Move a path "forward" or "!forward" one block at the current level. 1866 * 1867 * This routine will adjust a "path" to point to the next block 1868 * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the 1869 * Btree, including updating pointers to the intermediate nodes between 1870 * the new bottom and the root. 1871 */ 1872 int /* error */ 1873 xfs_da3_path_shift( 1874 struct xfs_da_state *state, 1875 struct xfs_da_state_path *path, 1876 int forward, 1877 int release, 1878 int *result) 1879 { 1880 struct xfs_da_state_blk *blk; 1881 struct xfs_da_blkinfo *info; 1882 struct xfs_da_intnode *node; 1883 struct xfs_da_args *args; 1884 struct xfs_da_node_entry *btree; 1885 struct xfs_da3_icnode_hdr nodehdr; 1886 struct xfs_buf *bp; 1887 xfs_dablk_t blkno = 0; 1888 int level; 1889 int error; 1890 struct xfs_inode *dp = state->args->dp; 1891 1892 trace_xfs_da_path_shift(state->args); 1893 1894 /* 1895 * Roll up the Btree looking for the first block where our 1896 * current index is not at the edge of the block. Note that 1897 * we skip the bottom layer because we want the sibling block. 1898 */ 1899 args = state->args; 1900 ASSERT(args != NULL); 1901 ASSERT(path != NULL); 1902 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1903 level = (path->active-1) - 1; /* skip bottom layer in path */ 1904 for (blk = &path->blk[level]; level >= 0; blk--, level--) { 1905 node = blk->bp->b_addr; 1906 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1907 btree = dp->d_ops->node_tree_p(node); 1908 1909 if (forward && (blk->index < nodehdr.count - 1)) { 1910 blk->index++; 1911 blkno = be32_to_cpu(btree[blk->index].before); 1912 break; 1913 } else if (!forward && (blk->index > 0)) { 1914 blk->index--; 1915 blkno = be32_to_cpu(btree[blk->index].before); 1916 break; 1917 } 1918 } 1919 if (level < 0) { 1920 *result = -ENOENT; /* we're out of our tree */ 1921 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 1922 return 0; 1923 } 1924 1925 /* 1926 * Roll down the edge of the subtree until we reach the 1927 * same depth we were at originally. 1928 */ 1929 for (blk++, level++; level < path->active; blk++, level++) { 1930 /* 1931 * Read the next child block into a local buffer. 1932 */ 1933 error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp, 1934 args->whichfork); 1935 if (error) 1936 return error; 1937 1938 /* 1939 * Release the old block (if it's dirty, the trans doesn't 1940 * actually let go) and swap the local buffer into the path 1941 * structure. This ensures failure of the above read doesn't set 1942 * a NULL buffer in an active slot in the path. 1943 */ 1944 if (release) 1945 xfs_trans_brelse(args->trans, blk->bp); 1946 blk->blkno = blkno; 1947 blk->bp = bp; 1948 1949 info = blk->bp->b_addr; 1950 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1951 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || 1952 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1953 info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || 1954 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || 1955 info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); 1956 1957 1958 /* 1959 * Note: we flatten the magic number to a single type so we 1960 * don't have to compare against crc/non-crc types elsewhere. 1961 */ 1962 switch (be16_to_cpu(info->magic)) { 1963 case XFS_DA_NODE_MAGIC: 1964 case XFS_DA3_NODE_MAGIC: 1965 blk->magic = XFS_DA_NODE_MAGIC; 1966 node = (xfs_da_intnode_t *)info; 1967 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1968 btree = dp->d_ops->node_tree_p(node); 1969 blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); 1970 if (forward) 1971 blk->index = 0; 1972 else 1973 blk->index = nodehdr.count - 1; 1974 blkno = be32_to_cpu(btree[blk->index].before); 1975 break; 1976 case XFS_ATTR_LEAF_MAGIC: 1977 case XFS_ATTR3_LEAF_MAGIC: 1978 blk->magic = XFS_ATTR_LEAF_MAGIC; 1979 ASSERT(level == path->active-1); 1980 blk->index = 0; 1981 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1982 break; 1983 case XFS_DIR2_LEAFN_MAGIC: 1984 case XFS_DIR3_LEAFN_MAGIC: 1985 blk->magic = XFS_DIR2_LEAFN_MAGIC; 1986 ASSERT(level == path->active-1); 1987 blk->index = 0; 1988 blk->hashval = xfs_dir2_leaf_lasthash(args->dp, 1989 blk->bp, NULL); 1990 break; 1991 default: 1992 ASSERT(0); 1993 break; 1994 } 1995 } 1996 *result = 0; 1997 return 0; 1998 } 1999 2000 2001 /*======================================================================== 2002 * Utility routines. 2003 *========================================================================*/ 2004 2005 /* 2006 * Implement a simple hash on a character string. 2007 * Rotate the hash value by 7 bits, then XOR each character in. 2008 * This is implemented with some source-level loop unrolling. 2009 */ 2010 xfs_dahash_t 2011 xfs_da_hashname(const uint8_t *name, int namelen) 2012 { 2013 xfs_dahash_t hash; 2014 2015 /* 2016 * Do four characters at a time as long as we can. 2017 */ 2018 for (hash = 0; namelen >= 4; namelen -= 4, name += 4) 2019 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ 2020 (name[3] << 0) ^ rol32(hash, 7 * 4); 2021 2022 /* 2023 * Now do the rest of the characters. 2024 */ 2025 switch (namelen) { 2026 case 3: 2027 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ 2028 rol32(hash, 7 * 3); 2029 case 2: 2030 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); 2031 case 1: 2032 return (name[0] << 0) ^ rol32(hash, 7 * 1); 2033 default: /* case 0: */ 2034 return hash; 2035 } 2036 } 2037 2038 enum xfs_dacmp 2039 xfs_da_compname( 2040 struct xfs_da_args *args, 2041 const unsigned char *name, 2042 int len) 2043 { 2044 return (args->namelen == len && memcmp(args->name, name, len) == 0) ? 2045 XFS_CMP_EXACT : XFS_CMP_DIFFERENT; 2046 } 2047 2048 static xfs_dahash_t 2049 xfs_default_hashname( 2050 struct xfs_name *name) 2051 { 2052 return xfs_da_hashname(name->name, name->len); 2053 } 2054 2055 const struct xfs_nameops xfs_default_nameops = { 2056 .hashname = xfs_default_hashname, 2057 .compname = xfs_da_compname 2058 }; 2059 2060 int 2061 xfs_da_grow_inode_int( 2062 struct xfs_da_args *args, 2063 xfs_fileoff_t *bno, 2064 int count) 2065 { 2066 struct xfs_trans *tp = args->trans; 2067 struct xfs_inode *dp = args->dp; 2068 int w = args->whichfork; 2069 xfs_rfsblock_t nblks = dp->i_d.di_nblocks; 2070 struct xfs_bmbt_irec map, *mapp; 2071 int nmap, error, got, i, mapi; 2072 2073 /* 2074 * Find a spot in the file space to put the new block. 2075 */ 2076 error = xfs_bmap_first_unused(tp, dp, count, bno, w); 2077 if (error) 2078 return error; 2079 2080 /* 2081 * Try mapping it in one filesystem block. 2082 */ 2083 nmap = 1; 2084 error = xfs_bmapi_write(tp, dp, *bno, count, 2085 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 2086 args->total, &map, &nmap); 2087 if (error) 2088 return error; 2089 2090 ASSERT(nmap <= 1); 2091 if (nmap == 1) { 2092 mapp = ↦ 2093 mapi = 1; 2094 } else if (nmap == 0 && count > 1) { 2095 xfs_fileoff_t b; 2096 int c; 2097 2098 /* 2099 * If we didn't get it and the block might work if fragmented, 2100 * try without the CONTIG flag. Loop until we get it all. 2101 */ 2102 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); 2103 for (b = *bno, mapi = 0; b < *bno + count; ) { 2104 nmap = min(XFS_BMAP_MAX_NMAP, count); 2105 c = (int)(*bno + count - b); 2106 error = xfs_bmapi_write(tp, dp, b, c, 2107 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2108 args->total, &mapp[mapi], &nmap); 2109 if (error) 2110 goto out_free_map; 2111 if (nmap < 1) 2112 break; 2113 mapi += nmap; 2114 b = mapp[mapi - 1].br_startoff + 2115 mapp[mapi - 1].br_blockcount; 2116 } 2117 } else { 2118 mapi = 0; 2119 mapp = NULL; 2120 } 2121 2122 /* 2123 * Count the blocks we got, make sure it matches the total. 2124 */ 2125 for (i = 0, got = 0; i < mapi; i++) 2126 got += mapp[i].br_blockcount; 2127 if (got != count || mapp[0].br_startoff != *bno || 2128 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 2129 *bno + count) { 2130 error = -ENOSPC; 2131 goto out_free_map; 2132 } 2133 2134 /* account for newly allocated blocks in reserved blocks total */ 2135 args->total -= dp->i_d.di_nblocks - nblks; 2136 2137 out_free_map: 2138 if (mapp != &map) 2139 kmem_free(mapp); 2140 return error; 2141 } 2142 2143 /* 2144 * Add a block to the btree ahead of the file. 2145 * Return the new block number to the caller. 2146 */ 2147 int 2148 xfs_da_grow_inode( 2149 struct xfs_da_args *args, 2150 xfs_dablk_t *new_blkno) 2151 { 2152 xfs_fileoff_t bno; 2153 int error; 2154 2155 trace_xfs_da_grow_inode(args); 2156 2157 bno = args->geo->leafblk; 2158 error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); 2159 if (!error) 2160 *new_blkno = (xfs_dablk_t)bno; 2161 return error; 2162 } 2163 2164 /* 2165 * Ick. We need to always be able to remove a btree block, even 2166 * if there's no space reservation because the filesystem is full. 2167 * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. 2168 * It swaps the target block with the last block in the file. The 2169 * last block in the file can always be removed since it can't cause 2170 * a bmap btree split to do that. 2171 */ 2172 STATIC int 2173 xfs_da3_swap_lastblock( 2174 struct xfs_da_args *args, 2175 xfs_dablk_t *dead_blknop, 2176 struct xfs_buf **dead_bufp) 2177 { 2178 struct xfs_da_blkinfo *dead_info; 2179 struct xfs_da_blkinfo *sib_info; 2180 struct xfs_da_intnode *par_node; 2181 struct xfs_da_intnode *dead_node; 2182 struct xfs_dir2_leaf *dead_leaf2; 2183 struct xfs_da_node_entry *btree; 2184 struct xfs_da3_icnode_hdr par_hdr; 2185 struct xfs_inode *dp; 2186 struct xfs_trans *tp; 2187 struct xfs_mount *mp; 2188 struct xfs_buf *dead_buf; 2189 struct xfs_buf *last_buf; 2190 struct xfs_buf *sib_buf; 2191 struct xfs_buf *par_buf; 2192 xfs_dahash_t dead_hash; 2193 xfs_fileoff_t lastoff; 2194 xfs_dablk_t dead_blkno; 2195 xfs_dablk_t last_blkno; 2196 xfs_dablk_t sib_blkno; 2197 xfs_dablk_t par_blkno; 2198 int error; 2199 int w; 2200 int entno; 2201 int level; 2202 int dead_level; 2203 2204 trace_xfs_da_swap_lastblock(args); 2205 2206 dead_buf = *dead_bufp; 2207 dead_blkno = *dead_blknop; 2208 tp = args->trans; 2209 dp = args->dp; 2210 w = args->whichfork; 2211 ASSERT(w == XFS_DATA_FORK); 2212 mp = dp->i_mount; 2213 lastoff = args->geo->freeblk; 2214 error = xfs_bmap_last_before(tp, dp, &lastoff, w); 2215 if (error) 2216 return error; 2217 if (unlikely(lastoff == 0)) { 2218 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, 2219 mp); 2220 return -EFSCORRUPTED; 2221 } 2222 /* 2223 * Read the last block in the btree space. 2224 */ 2225 last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; 2226 error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); 2227 if (error) 2228 return error; 2229 /* 2230 * Copy the last block into the dead buffer and log it. 2231 */ 2232 memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); 2233 xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); 2234 dead_info = dead_buf->b_addr; 2235 /* 2236 * Get values from the moved block. 2237 */ 2238 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 2239 dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 2240 struct xfs_dir3_icleaf_hdr leafhdr; 2241 struct xfs_dir2_leaf_entry *ents; 2242 2243 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 2244 dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); 2245 ents = dp->d_ops->leaf_ents_p(dead_leaf2); 2246 dead_level = 0; 2247 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); 2248 } else { 2249 struct xfs_da3_icnode_hdr deadhdr; 2250 2251 dead_node = (xfs_da_intnode_t *)dead_info; 2252 dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); 2253 btree = dp->d_ops->node_tree_p(dead_node); 2254 dead_level = deadhdr.level; 2255 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); 2256 } 2257 sib_buf = par_buf = NULL; 2258 /* 2259 * If the moved block has a left sibling, fix up the pointers. 2260 */ 2261 if ((sib_blkno = be32_to_cpu(dead_info->back))) { 2262 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); 2263 if (error) 2264 goto done; 2265 sib_info = sib_buf->b_addr; 2266 if (unlikely( 2267 be32_to_cpu(sib_info->forw) != last_blkno || 2268 sib_info->magic != dead_info->magic)) { 2269 XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", 2270 XFS_ERRLEVEL_LOW, mp); 2271 error = -EFSCORRUPTED; 2272 goto done; 2273 } 2274 sib_info->forw = cpu_to_be32(dead_blkno); 2275 xfs_trans_log_buf(tp, sib_buf, 2276 XFS_DA_LOGRANGE(sib_info, &sib_info->forw, 2277 sizeof(sib_info->forw))); 2278 sib_buf = NULL; 2279 } 2280 /* 2281 * If the moved block has a right sibling, fix up the pointers. 2282 */ 2283 if ((sib_blkno = be32_to_cpu(dead_info->forw))) { 2284 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); 2285 if (error) 2286 goto done; 2287 sib_info = sib_buf->b_addr; 2288 if (unlikely( 2289 be32_to_cpu(sib_info->back) != last_blkno || 2290 sib_info->magic != dead_info->magic)) { 2291 XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", 2292 XFS_ERRLEVEL_LOW, mp); 2293 error = -EFSCORRUPTED; 2294 goto done; 2295 } 2296 sib_info->back = cpu_to_be32(dead_blkno); 2297 xfs_trans_log_buf(tp, sib_buf, 2298 XFS_DA_LOGRANGE(sib_info, &sib_info->back, 2299 sizeof(sib_info->back))); 2300 sib_buf = NULL; 2301 } 2302 par_blkno = args->geo->leafblk; 2303 level = -1; 2304 /* 2305 * Walk down the tree looking for the parent of the moved block. 2306 */ 2307 for (;;) { 2308 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); 2309 if (error) 2310 goto done; 2311 par_node = par_buf->b_addr; 2312 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); 2313 if (level >= 0 && level != par_hdr.level + 1) { 2314 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", 2315 XFS_ERRLEVEL_LOW, mp); 2316 error = -EFSCORRUPTED; 2317 goto done; 2318 } 2319 level = par_hdr.level; 2320 btree = dp->d_ops->node_tree_p(par_node); 2321 for (entno = 0; 2322 entno < par_hdr.count && 2323 be32_to_cpu(btree[entno].hashval) < dead_hash; 2324 entno++) 2325 continue; 2326 if (entno == par_hdr.count) { 2327 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", 2328 XFS_ERRLEVEL_LOW, mp); 2329 error = -EFSCORRUPTED; 2330 goto done; 2331 } 2332 par_blkno = be32_to_cpu(btree[entno].before); 2333 if (level == dead_level + 1) 2334 break; 2335 xfs_trans_brelse(tp, par_buf); 2336 par_buf = NULL; 2337 } 2338 /* 2339 * We're in the right parent block. 2340 * Look for the right entry. 2341 */ 2342 for (;;) { 2343 for (; 2344 entno < par_hdr.count && 2345 be32_to_cpu(btree[entno].before) != last_blkno; 2346 entno++) 2347 continue; 2348 if (entno < par_hdr.count) 2349 break; 2350 par_blkno = par_hdr.forw; 2351 xfs_trans_brelse(tp, par_buf); 2352 par_buf = NULL; 2353 if (unlikely(par_blkno == 0)) { 2354 XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", 2355 XFS_ERRLEVEL_LOW, mp); 2356 error = -EFSCORRUPTED; 2357 goto done; 2358 } 2359 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); 2360 if (error) 2361 goto done; 2362 par_node = par_buf->b_addr; 2363 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); 2364 if (par_hdr.level != level) { 2365 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", 2366 XFS_ERRLEVEL_LOW, mp); 2367 error = -EFSCORRUPTED; 2368 goto done; 2369 } 2370 btree = dp->d_ops->node_tree_p(par_node); 2371 entno = 0; 2372 } 2373 /* 2374 * Update the parent entry pointing to the moved block. 2375 */ 2376 btree[entno].before = cpu_to_be32(dead_blkno); 2377 xfs_trans_log_buf(tp, par_buf, 2378 XFS_DA_LOGRANGE(par_node, &btree[entno].before, 2379 sizeof(btree[entno].before))); 2380 *dead_blknop = last_blkno; 2381 *dead_bufp = last_buf; 2382 return 0; 2383 done: 2384 if (par_buf) 2385 xfs_trans_brelse(tp, par_buf); 2386 if (sib_buf) 2387 xfs_trans_brelse(tp, sib_buf); 2388 xfs_trans_brelse(tp, last_buf); 2389 return error; 2390 } 2391 2392 /* 2393 * Remove a btree block from a directory or attribute. 2394 */ 2395 int 2396 xfs_da_shrink_inode( 2397 struct xfs_da_args *args, 2398 xfs_dablk_t dead_blkno, 2399 struct xfs_buf *dead_buf) 2400 { 2401 struct xfs_inode *dp; 2402 int done, error, w, count; 2403 struct xfs_trans *tp; 2404 2405 trace_xfs_da_shrink_inode(args); 2406 2407 dp = args->dp; 2408 w = args->whichfork; 2409 tp = args->trans; 2410 count = args->geo->fsbcount; 2411 for (;;) { 2412 /* 2413 * Remove extents. If we get ENOSPC for a dir we have to move 2414 * the last block to the place we want to kill. 2415 */ 2416 error = xfs_bunmapi(tp, dp, dead_blkno, count, 2417 xfs_bmapi_aflag(w), 0, &done); 2418 if (error == -ENOSPC) { 2419 if (w != XFS_DATA_FORK) 2420 break; 2421 error = xfs_da3_swap_lastblock(args, &dead_blkno, 2422 &dead_buf); 2423 if (error) 2424 break; 2425 } else { 2426 break; 2427 } 2428 } 2429 xfs_trans_binval(tp, dead_buf); 2430 return error; 2431 } 2432 2433 /* 2434 * See if the mapping(s) for this btree block are valid, i.e. 2435 * don't contain holes, are logically contiguous, and cover the whole range. 2436 */ 2437 STATIC int 2438 xfs_da_map_covers_blocks( 2439 int nmap, 2440 xfs_bmbt_irec_t *mapp, 2441 xfs_dablk_t bno, 2442 int count) 2443 { 2444 int i; 2445 xfs_fileoff_t off; 2446 2447 for (i = 0, off = bno; i < nmap; i++) { 2448 if (mapp[i].br_startblock == HOLESTARTBLOCK || 2449 mapp[i].br_startblock == DELAYSTARTBLOCK) { 2450 return 0; 2451 } 2452 if (off != mapp[i].br_startoff) { 2453 return 0; 2454 } 2455 off += mapp[i].br_blockcount; 2456 } 2457 return off == bno + count; 2458 } 2459 2460 /* 2461 * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. 2462 * 2463 * For the single map case, it is assumed that the caller has provided a pointer 2464 * to a valid xfs_buf_map. For the multiple map case, this function will 2465 * allocate the xfs_buf_map to hold all the maps and replace the caller's single 2466 * map pointer with the allocated map. 2467 */ 2468 static int 2469 xfs_buf_map_from_irec( 2470 struct xfs_mount *mp, 2471 struct xfs_buf_map **mapp, 2472 int *nmaps, 2473 struct xfs_bmbt_irec *irecs, 2474 int nirecs) 2475 { 2476 struct xfs_buf_map *map; 2477 int i; 2478 2479 ASSERT(*nmaps == 1); 2480 ASSERT(nirecs >= 1); 2481 2482 if (nirecs > 1) { 2483 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), 2484 KM_SLEEP | KM_NOFS); 2485 if (!map) 2486 return -ENOMEM; 2487 *mapp = map; 2488 } 2489 2490 *nmaps = nirecs; 2491 map = *mapp; 2492 for (i = 0; i < *nmaps; i++) { 2493 ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && 2494 irecs[i].br_startblock != HOLESTARTBLOCK); 2495 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); 2496 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); 2497 } 2498 return 0; 2499 } 2500 2501 /* 2502 * Map the block we are given ready for reading. There are three possible return 2503 * values: 2504 * -1 - will be returned if we land in a hole and mappedbno == -2 so the 2505 * caller knows not to execute a subsequent read. 2506 * 0 - if we mapped the block successfully 2507 * >0 - positive error number if there was an error. 2508 */ 2509 static int 2510 xfs_dabuf_map( 2511 struct xfs_inode *dp, 2512 xfs_dablk_t bno, 2513 xfs_daddr_t mappedbno, 2514 int whichfork, 2515 struct xfs_buf_map **map, 2516 int *nmaps) 2517 { 2518 struct xfs_mount *mp = dp->i_mount; 2519 int nfsb; 2520 int error = 0; 2521 struct xfs_bmbt_irec irec; 2522 struct xfs_bmbt_irec *irecs = &irec; 2523 int nirecs; 2524 2525 ASSERT(map && *map); 2526 ASSERT(*nmaps == 1); 2527 2528 if (whichfork == XFS_DATA_FORK) 2529 nfsb = mp->m_dir_geo->fsbcount; 2530 else 2531 nfsb = mp->m_attr_geo->fsbcount; 2532 2533 /* 2534 * Caller doesn't have a mapping. -2 means don't complain 2535 * if we land in a hole. 2536 */ 2537 if (mappedbno == -1 || mappedbno == -2) { 2538 /* 2539 * Optimize the one-block case. 2540 */ 2541 if (nfsb != 1) 2542 irecs = kmem_zalloc(sizeof(irec) * nfsb, 2543 KM_SLEEP | KM_NOFS); 2544 2545 nirecs = nfsb; 2546 error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, 2547 &nirecs, xfs_bmapi_aflag(whichfork)); 2548 if (error) 2549 goto out; 2550 } else { 2551 irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); 2552 irecs->br_startoff = (xfs_fileoff_t)bno; 2553 irecs->br_blockcount = nfsb; 2554 irecs->br_state = 0; 2555 nirecs = 1; 2556 } 2557 2558 if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { 2559 error = mappedbno == -2 ? -1 : -EFSCORRUPTED; 2560 if (unlikely(error == -EFSCORRUPTED)) { 2561 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 2562 int i; 2563 xfs_alert(mp, "%s: bno %lld dir: inode %lld", 2564 __func__, (long long)bno, 2565 (long long)dp->i_ino); 2566 for (i = 0; i < *nmaps; i++) { 2567 xfs_alert(mp, 2568 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", 2569 i, 2570 (long long)irecs[i].br_startoff, 2571 (long long)irecs[i].br_startblock, 2572 (long long)irecs[i].br_blockcount, 2573 irecs[i].br_state); 2574 } 2575 } 2576 XFS_ERROR_REPORT("xfs_da_do_buf(1)", 2577 XFS_ERRLEVEL_LOW, mp); 2578 } 2579 goto out; 2580 } 2581 error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); 2582 out: 2583 if (irecs != &irec) 2584 kmem_free(irecs); 2585 return error; 2586 } 2587 2588 /* 2589 * Get a buffer for the dir/attr block. 2590 */ 2591 int 2592 xfs_da_get_buf( 2593 struct xfs_trans *trans, 2594 struct xfs_inode *dp, 2595 xfs_dablk_t bno, 2596 xfs_daddr_t mappedbno, 2597 struct xfs_buf **bpp, 2598 int whichfork) 2599 { 2600 struct xfs_buf *bp; 2601 struct xfs_buf_map map; 2602 struct xfs_buf_map *mapp; 2603 int nmap; 2604 int error; 2605 2606 *bpp = NULL; 2607 mapp = ↦ 2608 nmap = 1; 2609 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2610 &mapp, &nmap); 2611 if (error) { 2612 /* mapping a hole is not an error, but we don't continue */ 2613 if (error == -1) 2614 error = 0; 2615 goto out_free; 2616 } 2617 2618 bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, 2619 mapp, nmap, 0); 2620 error = bp ? bp->b_error : -EIO; 2621 if (error) { 2622 if (bp) 2623 xfs_trans_brelse(trans, bp); 2624 goto out_free; 2625 } 2626 2627 *bpp = bp; 2628 2629 out_free: 2630 if (mapp != &map) 2631 kmem_free(mapp); 2632 2633 return error; 2634 } 2635 2636 /* 2637 * Get a buffer for the dir/attr block, fill in the contents. 2638 */ 2639 int 2640 xfs_da_read_buf( 2641 struct xfs_trans *trans, 2642 struct xfs_inode *dp, 2643 xfs_dablk_t bno, 2644 xfs_daddr_t mappedbno, 2645 struct xfs_buf **bpp, 2646 int whichfork, 2647 const struct xfs_buf_ops *ops) 2648 { 2649 struct xfs_buf *bp; 2650 struct xfs_buf_map map; 2651 struct xfs_buf_map *mapp; 2652 int nmap; 2653 int error; 2654 2655 *bpp = NULL; 2656 mapp = ↦ 2657 nmap = 1; 2658 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2659 &mapp, &nmap); 2660 if (error) { 2661 /* mapping a hole is not an error, but we don't continue */ 2662 if (error == -1) 2663 error = 0; 2664 goto out_free; 2665 } 2666 2667 error = xfs_trans_read_buf_map(dp->i_mount, trans, 2668 dp->i_mount->m_ddev_targp, 2669 mapp, nmap, 0, &bp, ops); 2670 if (error) 2671 goto out_free; 2672 2673 if (whichfork == XFS_ATTR_FORK) 2674 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); 2675 else 2676 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); 2677 *bpp = bp; 2678 out_free: 2679 if (mapp != &map) 2680 kmem_free(mapp); 2681 2682 return error; 2683 } 2684 2685 /* 2686 * Readahead the dir/attr block. 2687 */ 2688 int 2689 xfs_da_reada_buf( 2690 struct xfs_inode *dp, 2691 xfs_dablk_t bno, 2692 xfs_daddr_t mappedbno, 2693 int whichfork, 2694 const struct xfs_buf_ops *ops) 2695 { 2696 struct xfs_buf_map map; 2697 struct xfs_buf_map *mapp; 2698 int nmap; 2699 int error; 2700 2701 mapp = ↦ 2702 nmap = 1; 2703 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2704 &mapp, &nmap); 2705 if (error) { 2706 /* mapping a hole is not an error, but we don't continue */ 2707 if (error == -1) 2708 error = 0; 2709 goto out_free; 2710 } 2711 2712 mappedbno = mapp[0].bm_bn; 2713 xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); 2714 2715 out_free: 2716 if (mapp != &map) 2717 kmem_free(mapp); 2718 2719 return error; 2720 } 2721