1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * Copyright (c) 2013 Red Hat, Inc. 4 * All Rights Reserved. 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License as 8 * published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it would be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 */ 19 #include "xfs.h" 20 #include "xfs_fs.h" 21 #include "xfs_shared.h" 22 #include "xfs_format.h" 23 #include "xfs_log_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_bit.h" 26 #include "xfs_mount.h" 27 #include "xfs_da_format.h" 28 #include "xfs_da_btree.h" 29 #include "xfs_dir2.h" 30 #include "xfs_dir2_priv.h" 31 #include "xfs_inode.h" 32 #include "xfs_trans.h" 33 #include "xfs_inode_item.h" 34 #include "xfs_alloc.h" 35 #include "xfs_bmap.h" 36 #include "xfs_attr.h" 37 #include "xfs_attr_leaf.h" 38 #include "xfs_error.h" 39 #include "xfs_trace.h" 40 #include "xfs_cksum.h" 41 #include "xfs_buf_item.h" 42 #include "xfs_log.h" 43 44 /* 45 * xfs_da_btree.c 46 * 47 * Routines to implement directories as Btrees of hashed names. 48 */ 49 50 /*======================================================================== 51 * Function prototypes for the kernel. 52 *========================================================================*/ 53 54 /* 55 * Routines used for growing the Btree. 56 */ 57 STATIC int xfs_da3_root_split(xfs_da_state_t *state, 58 xfs_da_state_blk_t *existing_root, 59 xfs_da_state_blk_t *new_child); 60 STATIC int xfs_da3_node_split(xfs_da_state_t *state, 61 xfs_da_state_blk_t *existing_blk, 62 xfs_da_state_blk_t *split_blk, 63 xfs_da_state_blk_t *blk_to_add, 64 int treelevel, 65 int *result); 66 STATIC void xfs_da3_node_rebalance(xfs_da_state_t *state, 67 xfs_da_state_blk_t *node_blk_1, 68 xfs_da_state_blk_t *node_blk_2); 69 STATIC void xfs_da3_node_add(xfs_da_state_t *state, 70 xfs_da_state_blk_t *old_node_blk, 71 xfs_da_state_blk_t *new_node_blk); 72 73 /* 74 * Routines used for shrinking the Btree. 75 */ 76 STATIC int xfs_da3_root_join(xfs_da_state_t *state, 77 xfs_da_state_blk_t *root_blk); 78 STATIC int xfs_da3_node_toosmall(xfs_da_state_t *state, int *retval); 79 STATIC void xfs_da3_node_remove(xfs_da_state_t *state, 80 xfs_da_state_blk_t *drop_blk); 81 STATIC void xfs_da3_node_unbalance(xfs_da_state_t *state, 82 xfs_da_state_blk_t *src_node_blk, 83 xfs_da_state_blk_t *dst_node_blk); 84 85 /* 86 * Utility routines. 87 */ 88 STATIC int xfs_da3_blk_unlink(xfs_da_state_t *state, 89 xfs_da_state_blk_t *drop_blk, 90 xfs_da_state_blk_t *save_blk); 91 92 93 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */ 94 95 /* 96 * Allocate a dir-state structure. 97 * We don't put them on the stack since they're large. 98 */ 99 xfs_da_state_t * 100 xfs_da_state_alloc(void) 101 { 102 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS); 103 } 104 105 /* 106 * Kill the altpath contents of a da-state structure. 107 */ 108 STATIC void 109 xfs_da_state_kill_altpath(xfs_da_state_t *state) 110 { 111 int i; 112 113 for (i = 0; i < state->altpath.active; i++) 114 state->altpath.blk[i].bp = NULL; 115 state->altpath.active = 0; 116 } 117 118 /* 119 * Free a da-state structure. 120 */ 121 void 122 xfs_da_state_free(xfs_da_state_t *state) 123 { 124 xfs_da_state_kill_altpath(state); 125 #ifdef DEBUG 126 memset((char *)state, 0, sizeof(*state)); 127 #endif /* DEBUG */ 128 kmem_zone_free(xfs_da_state_zone, state); 129 } 130 131 static xfs_failaddr_t 132 xfs_da3_node_verify( 133 struct xfs_buf *bp) 134 { 135 struct xfs_mount *mp = bp->b_target->bt_mount; 136 struct xfs_da_intnode *hdr = bp->b_addr; 137 struct xfs_da3_icnode_hdr ichdr; 138 const struct xfs_dir_ops *ops; 139 140 ops = xfs_dir_get_ops(mp, NULL); 141 142 ops->node_hdr_from_disk(&ichdr, hdr); 143 144 if (xfs_sb_version_hascrc(&mp->m_sb)) { 145 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 146 147 if (ichdr.magic != XFS_DA3_NODE_MAGIC) 148 return __this_address; 149 150 if (!uuid_equal(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid)) 151 return __this_address; 152 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 153 return __this_address; 154 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn))) 155 return __this_address; 156 } else { 157 if (ichdr.magic != XFS_DA_NODE_MAGIC) 158 return __this_address; 159 } 160 if (ichdr.level == 0) 161 return __this_address; 162 if (ichdr.level > XFS_DA_NODE_MAXDEPTH) 163 return __this_address; 164 if (ichdr.count == 0) 165 return __this_address; 166 167 /* 168 * we don't know if the node is for and attribute or directory tree, 169 * so only fail if the count is outside both bounds 170 */ 171 if (ichdr.count > mp->m_dir_geo->node_ents && 172 ichdr.count > mp->m_attr_geo->node_ents) 173 return __this_address; 174 175 /* XXX: hash order check? */ 176 177 return NULL; 178 } 179 180 static void 181 xfs_da3_node_write_verify( 182 struct xfs_buf *bp) 183 { 184 struct xfs_mount *mp = bp->b_target->bt_mount; 185 struct xfs_buf_log_item *bip = bp->b_log_item; 186 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 187 xfs_failaddr_t fa; 188 189 fa = xfs_da3_node_verify(bp); 190 if (fa) { 191 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 192 return; 193 } 194 195 if (!xfs_sb_version_hascrc(&mp->m_sb)) 196 return; 197 198 if (bip) 199 hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); 200 201 xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); 202 } 203 204 /* 205 * leaf/node format detection on trees is sketchy, so a node read can be done on 206 * leaf level blocks when detection identifies the tree as a node format tree 207 * incorrectly. In this case, we need to swap the verifier to match the correct 208 * format of the block being read. 209 */ 210 static void 211 xfs_da3_node_read_verify( 212 struct xfs_buf *bp) 213 { 214 struct xfs_da_blkinfo *info = bp->b_addr; 215 xfs_failaddr_t fa; 216 217 switch (be16_to_cpu(info->magic)) { 218 case XFS_DA3_NODE_MAGIC: 219 if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { 220 xfs_verifier_error(bp, -EFSBADCRC, 221 __this_address); 222 break; 223 } 224 /* fall through */ 225 case XFS_DA_NODE_MAGIC: 226 fa = xfs_da3_node_verify(bp); 227 if (fa) 228 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 229 return; 230 case XFS_ATTR_LEAF_MAGIC: 231 case XFS_ATTR3_LEAF_MAGIC: 232 bp->b_ops = &xfs_attr3_leaf_buf_ops; 233 bp->b_ops->verify_read(bp); 234 return; 235 case XFS_DIR2_LEAFN_MAGIC: 236 case XFS_DIR3_LEAFN_MAGIC: 237 bp->b_ops = &xfs_dir3_leafn_buf_ops; 238 bp->b_ops->verify_read(bp); 239 return; 240 default: 241 xfs_verifier_error(bp, -EFSCORRUPTED, __this_address); 242 break; 243 } 244 } 245 246 /* Verify the structure of a da3 block. */ 247 static xfs_failaddr_t 248 xfs_da3_node_verify_struct( 249 struct xfs_buf *bp) 250 { 251 struct xfs_da_blkinfo *info = bp->b_addr; 252 253 switch (be16_to_cpu(info->magic)) { 254 case XFS_DA3_NODE_MAGIC: 255 case XFS_DA_NODE_MAGIC: 256 return xfs_da3_node_verify(bp); 257 case XFS_ATTR_LEAF_MAGIC: 258 case XFS_ATTR3_LEAF_MAGIC: 259 bp->b_ops = &xfs_attr3_leaf_buf_ops; 260 return bp->b_ops->verify_struct(bp); 261 case XFS_DIR2_LEAFN_MAGIC: 262 case XFS_DIR3_LEAFN_MAGIC: 263 bp->b_ops = &xfs_dir3_leafn_buf_ops; 264 return bp->b_ops->verify_struct(bp); 265 default: 266 return __this_address; 267 } 268 } 269 270 const struct xfs_buf_ops xfs_da3_node_buf_ops = { 271 .name = "xfs_da3_node", 272 .verify_read = xfs_da3_node_read_verify, 273 .verify_write = xfs_da3_node_write_verify, 274 .verify_struct = xfs_da3_node_verify_struct, 275 }; 276 277 int 278 xfs_da3_node_read( 279 struct xfs_trans *tp, 280 struct xfs_inode *dp, 281 xfs_dablk_t bno, 282 xfs_daddr_t mappedbno, 283 struct xfs_buf **bpp, 284 int which_fork) 285 { 286 int err; 287 288 err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp, 289 which_fork, &xfs_da3_node_buf_ops); 290 if (!err && tp && *bpp) { 291 struct xfs_da_blkinfo *info = (*bpp)->b_addr; 292 int type; 293 294 switch (be16_to_cpu(info->magic)) { 295 case XFS_DA_NODE_MAGIC: 296 case XFS_DA3_NODE_MAGIC: 297 type = XFS_BLFT_DA_NODE_BUF; 298 break; 299 case XFS_ATTR_LEAF_MAGIC: 300 case XFS_ATTR3_LEAF_MAGIC: 301 type = XFS_BLFT_ATTR_LEAF_BUF; 302 break; 303 case XFS_DIR2_LEAFN_MAGIC: 304 case XFS_DIR3_LEAFN_MAGIC: 305 type = XFS_BLFT_DIR_LEAFN_BUF; 306 break; 307 default: 308 type = 0; 309 ASSERT(0); 310 break; 311 } 312 xfs_trans_buf_set_type(tp, *bpp, type); 313 } 314 return err; 315 } 316 317 /*======================================================================== 318 * Routines used for growing the Btree. 319 *========================================================================*/ 320 321 /* 322 * Create the initial contents of an intermediate node. 323 */ 324 int 325 xfs_da3_node_create( 326 struct xfs_da_args *args, 327 xfs_dablk_t blkno, 328 int level, 329 struct xfs_buf **bpp, 330 int whichfork) 331 { 332 struct xfs_da_intnode *node; 333 struct xfs_trans *tp = args->trans; 334 struct xfs_mount *mp = tp->t_mountp; 335 struct xfs_da3_icnode_hdr ichdr = {0}; 336 struct xfs_buf *bp; 337 int error; 338 struct xfs_inode *dp = args->dp; 339 340 trace_xfs_da_node_create(args); 341 ASSERT(level <= XFS_DA_NODE_MAXDEPTH); 342 343 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork); 344 if (error) 345 return error; 346 bp->b_ops = &xfs_da3_node_buf_ops; 347 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 348 node = bp->b_addr; 349 350 if (xfs_sb_version_hascrc(&mp->m_sb)) { 351 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 352 353 memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr)); 354 ichdr.magic = XFS_DA3_NODE_MAGIC; 355 hdr3->info.blkno = cpu_to_be64(bp->b_bn); 356 hdr3->info.owner = cpu_to_be64(args->dp->i_ino); 357 uuid_copy(&hdr3->info.uuid, &mp->m_sb.sb_meta_uuid); 358 } else { 359 ichdr.magic = XFS_DA_NODE_MAGIC; 360 } 361 ichdr.level = level; 362 363 dp->d_ops->node_hdr_to_disk(node, &ichdr); 364 xfs_trans_log_buf(tp, bp, 365 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 366 367 *bpp = bp; 368 return 0; 369 } 370 371 /* 372 * Split a leaf node, rebalance, then possibly split 373 * intermediate nodes, rebalance, etc. 374 */ 375 int /* error */ 376 xfs_da3_split( 377 struct xfs_da_state *state) 378 { 379 struct xfs_da_state_blk *oldblk; 380 struct xfs_da_state_blk *newblk; 381 struct xfs_da_state_blk *addblk; 382 struct xfs_da_intnode *node; 383 int max; 384 int action = 0; 385 int error; 386 int i; 387 388 trace_xfs_da_split(state->args); 389 390 /* 391 * Walk back up the tree splitting/inserting/adjusting as necessary. 392 * If we need to insert and there isn't room, split the node, then 393 * decide which fragment to insert the new block from below into. 394 * Note that we may split the root this way, but we need more fixup. 395 */ 396 max = state->path.active - 1; 397 ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH)); 398 ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC || 399 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); 400 401 addblk = &state->path.blk[max]; /* initial dummy value */ 402 for (i = max; (i >= 0) && addblk; state->path.active--, i--) { 403 oldblk = &state->path.blk[i]; 404 newblk = &state->altpath.blk[i]; 405 406 /* 407 * If a leaf node then 408 * Allocate a new leaf node, then rebalance across them. 409 * else if an intermediate node then 410 * We split on the last layer, must we split the node? 411 */ 412 switch (oldblk->magic) { 413 case XFS_ATTR_LEAF_MAGIC: 414 error = xfs_attr3_leaf_split(state, oldblk, newblk); 415 if ((error != 0) && (error != -ENOSPC)) { 416 return error; /* GROT: attr is inconsistent */ 417 } 418 if (!error) { 419 addblk = newblk; 420 break; 421 } 422 /* 423 * Entry wouldn't fit, split the leaf again. The new 424 * extrablk will be consumed by xfs_da3_node_split if 425 * the node is split. 426 */ 427 state->extravalid = 1; 428 if (state->inleaf) { 429 state->extraafter = 0; /* before newblk */ 430 trace_xfs_attr_leaf_split_before(state->args); 431 error = xfs_attr3_leaf_split(state, oldblk, 432 &state->extrablk); 433 } else { 434 state->extraafter = 1; /* after newblk */ 435 trace_xfs_attr_leaf_split_after(state->args); 436 error = xfs_attr3_leaf_split(state, newblk, 437 &state->extrablk); 438 } 439 if (error) 440 return error; /* GROT: attr inconsistent */ 441 addblk = newblk; 442 break; 443 case XFS_DIR2_LEAFN_MAGIC: 444 error = xfs_dir2_leafn_split(state, oldblk, newblk); 445 if (error) 446 return error; 447 addblk = newblk; 448 break; 449 case XFS_DA_NODE_MAGIC: 450 error = xfs_da3_node_split(state, oldblk, newblk, addblk, 451 max - i, &action); 452 addblk->bp = NULL; 453 if (error) 454 return error; /* GROT: dir is inconsistent */ 455 /* 456 * Record the newly split block for the next time thru? 457 */ 458 if (action) 459 addblk = newblk; 460 else 461 addblk = NULL; 462 break; 463 } 464 465 /* 466 * Update the btree to show the new hashval for this child. 467 */ 468 xfs_da3_fixhashpath(state, &state->path); 469 } 470 if (!addblk) 471 return 0; 472 473 /* 474 * xfs_da3_node_split() should have consumed any extra blocks we added 475 * during a double leaf split in the attr fork. This is guaranteed as 476 * we can't be here if the attr fork only has a single leaf block. 477 */ 478 ASSERT(state->extravalid == 0 || 479 state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC); 480 481 /* 482 * Split the root node. 483 */ 484 ASSERT(state->path.active == 0); 485 oldblk = &state->path.blk[0]; 486 error = xfs_da3_root_split(state, oldblk, addblk); 487 if (error) { 488 addblk->bp = NULL; 489 return error; /* GROT: dir is inconsistent */ 490 } 491 492 /* 493 * Update pointers to the node which used to be block 0 and just got 494 * bumped because of the addition of a new root node. Note that the 495 * original block 0 could be at any position in the list of blocks in 496 * the tree. 497 * 498 * Note: the magic numbers and sibling pointers are in the same physical 499 * place for both v2 and v3 headers (by design). Hence it doesn't matter 500 * which version of the xfs_da_intnode structure we use here as the 501 * result will be the same using either structure. 502 */ 503 node = oldblk->bp->b_addr; 504 if (node->hdr.info.forw) { 505 ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno); 506 node = addblk->bp->b_addr; 507 node->hdr.info.back = cpu_to_be32(oldblk->blkno); 508 xfs_trans_log_buf(state->args->trans, addblk->bp, 509 XFS_DA_LOGRANGE(node, &node->hdr.info, 510 sizeof(node->hdr.info))); 511 } 512 node = oldblk->bp->b_addr; 513 if (node->hdr.info.back) { 514 ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno); 515 node = addblk->bp->b_addr; 516 node->hdr.info.forw = cpu_to_be32(oldblk->blkno); 517 xfs_trans_log_buf(state->args->trans, addblk->bp, 518 XFS_DA_LOGRANGE(node, &node->hdr.info, 519 sizeof(node->hdr.info))); 520 } 521 addblk->bp = NULL; 522 return 0; 523 } 524 525 /* 526 * Split the root. We have to create a new root and point to the two 527 * parts (the split old root) that we just created. Copy block zero to 528 * the EOF, extending the inode in process. 529 */ 530 STATIC int /* error */ 531 xfs_da3_root_split( 532 struct xfs_da_state *state, 533 struct xfs_da_state_blk *blk1, 534 struct xfs_da_state_blk *blk2) 535 { 536 struct xfs_da_intnode *node; 537 struct xfs_da_intnode *oldroot; 538 struct xfs_da_node_entry *btree; 539 struct xfs_da3_icnode_hdr nodehdr; 540 struct xfs_da_args *args; 541 struct xfs_buf *bp; 542 struct xfs_inode *dp; 543 struct xfs_trans *tp; 544 struct xfs_dir2_leaf *leaf; 545 xfs_dablk_t blkno; 546 int level; 547 int error; 548 int size; 549 550 trace_xfs_da_root_split(state->args); 551 552 /* 553 * Copy the existing (incorrect) block from the root node position 554 * to a free space somewhere. 555 */ 556 args = state->args; 557 error = xfs_da_grow_inode(args, &blkno); 558 if (error) 559 return error; 560 561 dp = args->dp; 562 tp = args->trans; 563 error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork); 564 if (error) 565 return error; 566 node = bp->b_addr; 567 oldroot = blk1->bp->b_addr; 568 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 569 oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { 570 struct xfs_da3_icnode_hdr icnodehdr; 571 572 dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot); 573 btree = dp->d_ops->node_tree_p(oldroot); 574 size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot); 575 level = icnodehdr.level; 576 577 /* 578 * we are about to copy oldroot to bp, so set up the type 579 * of bp while we know exactly what it will be. 580 */ 581 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); 582 } else { 583 struct xfs_dir3_icleaf_hdr leafhdr; 584 struct xfs_dir2_leaf_entry *ents; 585 586 leaf = (xfs_dir2_leaf_t *)oldroot; 587 dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); 588 ents = dp->d_ops->leaf_ents_p(leaf); 589 590 ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC || 591 leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); 592 size = (int)((char *)&ents[leafhdr.count] - (char *)leaf); 593 level = 0; 594 595 /* 596 * we are about to copy oldroot to bp, so set up the type 597 * of bp while we know exactly what it will be. 598 */ 599 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); 600 } 601 602 /* 603 * we can copy most of the information in the node from one block to 604 * another, but for CRC enabled headers we have to make sure that the 605 * block specific identifiers are kept intact. We update the buffer 606 * directly for this. 607 */ 608 memcpy(node, oldroot, size); 609 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || 610 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 611 struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node; 612 613 node3->hdr.info.blkno = cpu_to_be64(bp->b_bn); 614 } 615 xfs_trans_log_buf(tp, bp, 0, size - 1); 616 617 bp->b_ops = blk1->bp->b_ops; 618 xfs_trans_buf_copy_type(bp, blk1->bp); 619 blk1->bp = bp; 620 blk1->blkno = blkno; 621 622 /* 623 * Set up the new root node. 624 */ 625 error = xfs_da3_node_create(args, 626 (args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0, 627 level + 1, &bp, args->whichfork); 628 if (error) 629 return error; 630 631 node = bp->b_addr; 632 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 633 btree = dp->d_ops->node_tree_p(node); 634 btree[0].hashval = cpu_to_be32(blk1->hashval); 635 btree[0].before = cpu_to_be32(blk1->blkno); 636 btree[1].hashval = cpu_to_be32(blk2->hashval); 637 btree[1].before = cpu_to_be32(blk2->blkno); 638 nodehdr.count = 2; 639 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 640 641 #ifdef DEBUG 642 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 643 oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 644 ASSERT(blk1->blkno >= args->geo->leafblk && 645 blk1->blkno < args->geo->freeblk); 646 ASSERT(blk2->blkno >= args->geo->leafblk && 647 blk2->blkno < args->geo->freeblk); 648 } 649 #endif 650 651 /* Header is already logged by xfs_da_node_create */ 652 xfs_trans_log_buf(tp, bp, 653 XFS_DA_LOGRANGE(node, btree, sizeof(xfs_da_node_entry_t) * 2)); 654 655 return 0; 656 } 657 658 /* 659 * Split the node, rebalance, then add the new entry. 660 */ 661 STATIC int /* error */ 662 xfs_da3_node_split( 663 struct xfs_da_state *state, 664 struct xfs_da_state_blk *oldblk, 665 struct xfs_da_state_blk *newblk, 666 struct xfs_da_state_blk *addblk, 667 int treelevel, 668 int *result) 669 { 670 struct xfs_da_intnode *node; 671 struct xfs_da3_icnode_hdr nodehdr; 672 xfs_dablk_t blkno; 673 int newcount; 674 int error; 675 int useextra; 676 struct xfs_inode *dp = state->args->dp; 677 678 trace_xfs_da_node_split(state->args); 679 680 node = oldblk->bp->b_addr; 681 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 682 683 /* 684 * With V2 dirs the extra block is data or freespace. 685 */ 686 useextra = state->extravalid && state->args->whichfork == XFS_ATTR_FORK; 687 newcount = 1 + useextra; 688 /* 689 * Do we have to split the node? 690 */ 691 if (nodehdr.count + newcount > state->args->geo->node_ents) { 692 /* 693 * Allocate a new node, add to the doubly linked chain of 694 * nodes, then move some of our excess entries into it. 695 */ 696 error = xfs_da_grow_inode(state->args, &blkno); 697 if (error) 698 return error; /* GROT: dir is inconsistent */ 699 700 error = xfs_da3_node_create(state->args, blkno, treelevel, 701 &newblk->bp, state->args->whichfork); 702 if (error) 703 return error; /* GROT: dir is inconsistent */ 704 newblk->blkno = blkno; 705 newblk->magic = XFS_DA_NODE_MAGIC; 706 xfs_da3_node_rebalance(state, oldblk, newblk); 707 error = xfs_da3_blk_link(state, oldblk, newblk); 708 if (error) 709 return error; 710 *result = 1; 711 } else { 712 *result = 0; 713 } 714 715 /* 716 * Insert the new entry(s) into the correct block 717 * (updating last hashval in the process). 718 * 719 * xfs_da3_node_add() inserts BEFORE the given index, 720 * and as a result of using node_lookup_int() we always 721 * point to a valid entry (not after one), but a split 722 * operation always results in a new block whose hashvals 723 * FOLLOW the current block. 724 * 725 * If we had double-split op below us, then add the extra block too. 726 */ 727 node = oldblk->bp->b_addr; 728 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 729 if (oldblk->index <= nodehdr.count) { 730 oldblk->index++; 731 xfs_da3_node_add(state, oldblk, addblk); 732 if (useextra) { 733 if (state->extraafter) 734 oldblk->index++; 735 xfs_da3_node_add(state, oldblk, &state->extrablk); 736 state->extravalid = 0; 737 } 738 } else { 739 newblk->index++; 740 xfs_da3_node_add(state, newblk, addblk); 741 if (useextra) { 742 if (state->extraafter) 743 newblk->index++; 744 xfs_da3_node_add(state, newblk, &state->extrablk); 745 state->extravalid = 0; 746 } 747 } 748 749 return 0; 750 } 751 752 /* 753 * Balance the btree elements between two intermediate nodes, 754 * usually one full and one empty. 755 * 756 * NOTE: if blk2 is empty, then it will get the upper half of blk1. 757 */ 758 STATIC void 759 xfs_da3_node_rebalance( 760 struct xfs_da_state *state, 761 struct xfs_da_state_blk *blk1, 762 struct xfs_da_state_blk *blk2) 763 { 764 struct xfs_da_intnode *node1; 765 struct xfs_da_intnode *node2; 766 struct xfs_da_intnode *tmpnode; 767 struct xfs_da_node_entry *btree1; 768 struct xfs_da_node_entry *btree2; 769 struct xfs_da_node_entry *btree_s; 770 struct xfs_da_node_entry *btree_d; 771 struct xfs_da3_icnode_hdr nodehdr1; 772 struct xfs_da3_icnode_hdr nodehdr2; 773 struct xfs_trans *tp; 774 int count; 775 int tmp; 776 int swap = 0; 777 struct xfs_inode *dp = state->args->dp; 778 779 trace_xfs_da_node_rebalance(state->args); 780 781 node1 = blk1->bp->b_addr; 782 node2 = blk2->bp->b_addr; 783 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 784 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 785 btree1 = dp->d_ops->node_tree_p(node1); 786 btree2 = dp->d_ops->node_tree_p(node2); 787 788 /* 789 * Figure out how many entries need to move, and in which direction. 790 * Swap the nodes around if that makes it simpler. 791 */ 792 if (nodehdr1.count > 0 && nodehdr2.count > 0 && 793 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || 794 (be32_to_cpu(btree2[nodehdr2.count - 1].hashval) < 795 be32_to_cpu(btree1[nodehdr1.count - 1].hashval)))) { 796 tmpnode = node1; 797 node1 = node2; 798 node2 = tmpnode; 799 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 800 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 801 btree1 = dp->d_ops->node_tree_p(node1); 802 btree2 = dp->d_ops->node_tree_p(node2); 803 swap = 1; 804 } 805 806 count = (nodehdr1.count - nodehdr2.count) / 2; 807 if (count == 0) 808 return; 809 tp = state->args->trans; 810 /* 811 * Two cases: high-to-low and low-to-high. 812 */ 813 if (count > 0) { 814 /* 815 * Move elements in node2 up to make a hole. 816 */ 817 tmp = nodehdr2.count; 818 if (tmp > 0) { 819 tmp *= (uint)sizeof(xfs_da_node_entry_t); 820 btree_s = &btree2[0]; 821 btree_d = &btree2[count]; 822 memmove(btree_d, btree_s, tmp); 823 } 824 825 /* 826 * Move the req'd B-tree elements from high in node1 to 827 * low in node2. 828 */ 829 nodehdr2.count += count; 830 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 831 btree_s = &btree1[nodehdr1.count - count]; 832 btree_d = &btree2[0]; 833 memcpy(btree_d, btree_s, tmp); 834 nodehdr1.count -= count; 835 } else { 836 /* 837 * Move the req'd B-tree elements from low in node2 to 838 * high in node1. 839 */ 840 count = -count; 841 tmp = count * (uint)sizeof(xfs_da_node_entry_t); 842 btree_s = &btree2[0]; 843 btree_d = &btree1[nodehdr1.count]; 844 memcpy(btree_d, btree_s, tmp); 845 nodehdr1.count += count; 846 847 xfs_trans_log_buf(tp, blk1->bp, 848 XFS_DA_LOGRANGE(node1, btree_d, tmp)); 849 850 /* 851 * Move elements in node2 down to fill the hole. 852 */ 853 tmp = nodehdr2.count - count; 854 tmp *= (uint)sizeof(xfs_da_node_entry_t); 855 btree_s = &btree2[count]; 856 btree_d = &btree2[0]; 857 memmove(btree_d, btree_s, tmp); 858 nodehdr2.count -= count; 859 } 860 861 /* 862 * Log header of node 1 and all current bits of node 2. 863 */ 864 dp->d_ops->node_hdr_to_disk(node1, &nodehdr1); 865 xfs_trans_log_buf(tp, blk1->bp, 866 XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); 867 868 dp->d_ops->node_hdr_to_disk(node2, &nodehdr2); 869 xfs_trans_log_buf(tp, blk2->bp, 870 XFS_DA_LOGRANGE(node2, &node2->hdr, 871 dp->d_ops->node_hdr_size + 872 (sizeof(btree2[0]) * nodehdr2.count))); 873 874 /* 875 * Record the last hashval from each block for upward propagation. 876 * (note: don't use the swapped node pointers) 877 */ 878 if (swap) { 879 node1 = blk1->bp->b_addr; 880 node2 = blk2->bp->b_addr; 881 dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); 882 dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); 883 btree1 = dp->d_ops->node_tree_p(node1); 884 btree2 = dp->d_ops->node_tree_p(node2); 885 } 886 blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval); 887 blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); 888 889 /* 890 * Adjust the expected index for insertion. 891 */ 892 if (blk1->index >= nodehdr1.count) { 893 blk2->index = blk1->index - nodehdr1.count; 894 blk1->index = nodehdr1.count + 1; /* make it invalid */ 895 } 896 } 897 898 /* 899 * Add a new entry to an intermediate node. 900 */ 901 STATIC void 902 xfs_da3_node_add( 903 struct xfs_da_state *state, 904 struct xfs_da_state_blk *oldblk, 905 struct xfs_da_state_blk *newblk) 906 { 907 struct xfs_da_intnode *node; 908 struct xfs_da3_icnode_hdr nodehdr; 909 struct xfs_da_node_entry *btree; 910 int tmp; 911 struct xfs_inode *dp = state->args->dp; 912 913 trace_xfs_da_node_add(state->args); 914 915 node = oldblk->bp->b_addr; 916 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 917 btree = dp->d_ops->node_tree_p(node); 918 919 ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count); 920 ASSERT(newblk->blkno != 0); 921 if (state->args->whichfork == XFS_DATA_FORK) 922 ASSERT(newblk->blkno >= state->args->geo->leafblk && 923 newblk->blkno < state->args->geo->freeblk); 924 925 /* 926 * We may need to make some room before we insert the new node. 927 */ 928 tmp = 0; 929 if (oldblk->index < nodehdr.count) { 930 tmp = (nodehdr.count - oldblk->index) * (uint)sizeof(*btree); 931 memmove(&btree[oldblk->index + 1], &btree[oldblk->index], tmp); 932 } 933 btree[oldblk->index].hashval = cpu_to_be32(newblk->hashval); 934 btree[oldblk->index].before = cpu_to_be32(newblk->blkno); 935 xfs_trans_log_buf(state->args->trans, oldblk->bp, 936 XFS_DA_LOGRANGE(node, &btree[oldblk->index], 937 tmp + sizeof(*btree))); 938 939 nodehdr.count += 1; 940 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 941 xfs_trans_log_buf(state->args->trans, oldblk->bp, 942 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 943 944 /* 945 * Copy the last hash value from the oldblk to propagate upwards. 946 */ 947 oldblk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); 948 } 949 950 /*======================================================================== 951 * Routines used for shrinking the Btree. 952 *========================================================================*/ 953 954 /* 955 * Deallocate an empty leaf node, remove it from its parent, 956 * possibly deallocating that block, etc... 957 */ 958 int 959 xfs_da3_join( 960 struct xfs_da_state *state) 961 { 962 struct xfs_da_state_blk *drop_blk; 963 struct xfs_da_state_blk *save_blk; 964 int action = 0; 965 int error; 966 967 trace_xfs_da_join(state->args); 968 969 drop_blk = &state->path.blk[ state->path.active-1 ]; 970 save_blk = &state->altpath.blk[ state->path.active-1 ]; 971 ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC); 972 ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC || 973 drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); 974 975 /* 976 * Walk back up the tree joining/deallocating as necessary. 977 * When we stop dropping blocks, break out. 978 */ 979 for ( ; state->path.active >= 2; drop_blk--, save_blk--, 980 state->path.active--) { 981 /* 982 * See if we can combine the block with a neighbor. 983 * (action == 0) => no options, just leave 984 * (action == 1) => coalesce, then unlink 985 * (action == 2) => block empty, unlink it 986 */ 987 switch (drop_blk->magic) { 988 case XFS_ATTR_LEAF_MAGIC: 989 error = xfs_attr3_leaf_toosmall(state, &action); 990 if (error) 991 return error; 992 if (action == 0) 993 return 0; 994 xfs_attr3_leaf_unbalance(state, drop_blk, save_blk); 995 break; 996 case XFS_DIR2_LEAFN_MAGIC: 997 error = xfs_dir2_leafn_toosmall(state, &action); 998 if (error) 999 return error; 1000 if (action == 0) 1001 return 0; 1002 xfs_dir2_leafn_unbalance(state, drop_blk, save_blk); 1003 break; 1004 case XFS_DA_NODE_MAGIC: 1005 /* 1006 * Remove the offending node, fixup hashvals, 1007 * check for a toosmall neighbor. 1008 */ 1009 xfs_da3_node_remove(state, drop_blk); 1010 xfs_da3_fixhashpath(state, &state->path); 1011 error = xfs_da3_node_toosmall(state, &action); 1012 if (error) 1013 return error; 1014 if (action == 0) 1015 return 0; 1016 xfs_da3_node_unbalance(state, drop_blk, save_blk); 1017 break; 1018 } 1019 xfs_da3_fixhashpath(state, &state->altpath); 1020 error = xfs_da3_blk_unlink(state, drop_blk, save_blk); 1021 xfs_da_state_kill_altpath(state); 1022 if (error) 1023 return error; 1024 error = xfs_da_shrink_inode(state->args, drop_blk->blkno, 1025 drop_blk->bp); 1026 drop_blk->bp = NULL; 1027 if (error) 1028 return error; 1029 } 1030 /* 1031 * We joined all the way to the top. If it turns out that 1032 * we only have one entry in the root, make the child block 1033 * the new root. 1034 */ 1035 xfs_da3_node_remove(state, drop_blk); 1036 xfs_da3_fixhashpath(state, &state->path); 1037 error = xfs_da3_root_join(state, &state->path.blk[0]); 1038 return error; 1039 } 1040 1041 #ifdef DEBUG 1042 static void 1043 xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) 1044 { 1045 __be16 magic = blkinfo->magic; 1046 1047 if (level == 1) { 1048 ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1049 magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || 1050 magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || 1051 magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); 1052 } else { 1053 ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1054 magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); 1055 } 1056 ASSERT(!blkinfo->forw); 1057 ASSERT(!blkinfo->back); 1058 } 1059 #else /* !DEBUG */ 1060 #define xfs_da_blkinfo_onlychild_validate(blkinfo, level) 1061 #endif /* !DEBUG */ 1062 1063 /* 1064 * We have only one entry in the root. Copy the only remaining child of 1065 * the old root to block 0 as the new root node. 1066 */ 1067 STATIC int 1068 xfs_da3_root_join( 1069 struct xfs_da_state *state, 1070 struct xfs_da_state_blk *root_blk) 1071 { 1072 struct xfs_da_intnode *oldroot; 1073 struct xfs_da_args *args; 1074 xfs_dablk_t child; 1075 struct xfs_buf *bp; 1076 struct xfs_da3_icnode_hdr oldroothdr; 1077 struct xfs_da_node_entry *btree; 1078 int error; 1079 struct xfs_inode *dp = state->args->dp; 1080 1081 trace_xfs_da_root_join(state->args); 1082 1083 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 1084 1085 args = state->args; 1086 oldroot = root_blk->bp->b_addr; 1087 dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot); 1088 ASSERT(oldroothdr.forw == 0); 1089 ASSERT(oldroothdr.back == 0); 1090 1091 /* 1092 * If the root has more than one child, then don't do anything. 1093 */ 1094 if (oldroothdr.count > 1) 1095 return 0; 1096 1097 /* 1098 * Read in the (only) child block, then copy those bytes into 1099 * the root block's buffer and free the original child block. 1100 */ 1101 btree = dp->d_ops->node_tree_p(oldroot); 1102 child = be32_to_cpu(btree[0].before); 1103 ASSERT(child != 0); 1104 error = xfs_da3_node_read(args->trans, dp, child, -1, &bp, 1105 args->whichfork); 1106 if (error) 1107 return error; 1108 xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); 1109 1110 /* 1111 * This could be copying a leaf back into the root block in the case of 1112 * there only being a single leaf block left in the tree. Hence we have 1113 * to update the b_ops pointer as well to match the buffer type change 1114 * that could occur. For dir3 blocks we also need to update the block 1115 * number in the buffer header. 1116 */ 1117 memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); 1118 root_blk->bp->b_ops = bp->b_ops; 1119 xfs_trans_buf_copy_type(root_blk->bp, bp); 1120 if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { 1121 struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; 1122 da3->blkno = cpu_to_be64(root_blk->bp->b_bn); 1123 } 1124 xfs_trans_log_buf(args->trans, root_blk->bp, 0, 1125 args->geo->blksize - 1); 1126 error = xfs_da_shrink_inode(args, child, bp); 1127 return error; 1128 } 1129 1130 /* 1131 * Check a node block and its neighbors to see if the block should be 1132 * collapsed into one or the other neighbor. Always keep the block 1133 * with the smaller block number. 1134 * If the current block is over 50% full, don't try to join it, return 0. 1135 * If the block is empty, fill in the state structure and return 2. 1136 * If it can be collapsed, fill in the state structure and return 1. 1137 * If nothing can be done, return 0. 1138 */ 1139 STATIC int 1140 xfs_da3_node_toosmall( 1141 struct xfs_da_state *state, 1142 int *action) 1143 { 1144 struct xfs_da_intnode *node; 1145 struct xfs_da_state_blk *blk; 1146 struct xfs_da_blkinfo *info; 1147 xfs_dablk_t blkno; 1148 struct xfs_buf *bp; 1149 struct xfs_da3_icnode_hdr nodehdr; 1150 int count; 1151 int forward; 1152 int error; 1153 int retval; 1154 int i; 1155 struct xfs_inode *dp = state->args->dp; 1156 1157 trace_xfs_da_node_toosmall(state->args); 1158 1159 /* 1160 * Check for the degenerate case of the block being over 50% full. 1161 * If so, it's not worth even looking to see if we might be able 1162 * to coalesce with a sibling. 1163 */ 1164 blk = &state->path.blk[ state->path.active-1 ]; 1165 info = blk->bp->b_addr; 1166 node = (xfs_da_intnode_t *)info; 1167 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1168 if (nodehdr.count > (state->args->geo->node_ents >> 1)) { 1169 *action = 0; /* blk over 50%, don't try to join */ 1170 return 0; /* blk over 50%, don't try to join */ 1171 } 1172 1173 /* 1174 * Check for the degenerate case of the block being empty. 1175 * If the block is empty, we'll simply delete it, no need to 1176 * coalesce it with a sibling block. We choose (arbitrarily) 1177 * to merge with the forward block unless it is NULL. 1178 */ 1179 if (nodehdr.count == 0) { 1180 /* 1181 * Make altpath point to the block we want to keep and 1182 * path point to the block we want to drop (this one). 1183 */ 1184 forward = (info->forw != 0); 1185 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1186 error = xfs_da3_path_shift(state, &state->altpath, forward, 1187 0, &retval); 1188 if (error) 1189 return error; 1190 if (retval) { 1191 *action = 0; 1192 } else { 1193 *action = 2; 1194 } 1195 return 0; 1196 } 1197 1198 /* 1199 * Examine each sibling block to see if we can coalesce with 1200 * at least 25% free space to spare. We need to figure out 1201 * whether to merge with the forward or the backward block. 1202 * We prefer coalescing with the lower numbered sibling so as 1203 * to shrink a directory over time. 1204 */ 1205 count = state->args->geo->node_ents; 1206 count -= state->args->geo->node_ents >> 2; 1207 count -= nodehdr.count; 1208 1209 /* start with smaller blk num */ 1210 forward = nodehdr.forw < nodehdr.back; 1211 for (i = 0; i < 2; forward = !forward, i++) { 1212 struct xfs_da3_icnode_hdr thdr; 1213 if (forward) 1214 blkno = nodehdr.forw; 1215 else 1216 blkno = nodehdr.back; 1217 if (blkno == 0) 1218 continue; 1219 error = xfs_da3_node_read(state->args->trans, dp, 1220 blkno, -1, &bp, state->args->whichfork); 1221 if (error) 1222 return error; 1223 1224 node = bp->b_addr; 1225 dp->d_ops->node_hdr_from_disk(&thdr, node); 1226 xfs_trans_brelse(state->args->trans, bp); 1227 1228 if (count - thdr.count >= 0) 1229 break; /* fits with at least 25% to spare */ 1230 } 1231 if (i >= 2) { 1232 *action = 0; 1233 return 0; 1234 } 1235 1236 /* 1237 * Make altpath point to the block we want to keep (the lower 1238 * numbered block) and path point to the block we want to drop. 1239 */ 1240 memcpy(&state->altpath, &state->path, sizeof(state->path)); 1241 if (blkno < blk->blkno) { 1242 error = xfs_da3_path_shift(state, &state->altpath, forward, 1243 0, &retval); 1244 } else { 1245 error = xfs_da3_path_shift(state, &state->path, forward, 1246 0, &retval); 1247 } 1248 if (error) 1249 return error; 1250 if (retval) { 1251 *action = 0; 1252 return 0; 1253 } 1254 *action = 1; 1255 return 0; 1256 } 1257 1258 /* 1259 * Pick up the last hashvalue from an intermediate node. 1260 */ 1261 STATIC uint 1262 xfs_da3_node_lasthash( 1263 struct xfs_inode *dp, 1264 struct xfs_buf *bp, 1265 int *count) 1266 { 1267 struct xfs_da_intnode *node; 1268 struct xfs_da_node_entry *btree; 1269 struct xfs_da3_icnode_hdr nodehdr; 1270 1271 node = bp->b_addr; 1272 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1273 if (count) 1274 *count = nodehdr.count; 1275 if (!nodehdr.count) 1276 return 0; 1277 btree = dp->d_ops->node_tree_p(node); 1278 return be32_to_cpu(btree[nodehdr.count - 1].hashval); 1279 } 1280 1281 /* 1282 * Walk back up the tree adjusting hash values as necessary, 1283 * when we stop making changes, return. 1284 */ 1285 void 1286 xfs_da3_fixhashpath( 1287 struct xfs_da_state *state, 1288 struct xfs_da_state_path *path) 1289 { 1290 struct xfs_da_state_blk *blk; 1291 struct xfs_da_intnode *node; 1292 struct xfs_da_node_entry *btree; 1293 xfs_dahash_t lasthash=0; 1294 int level; 1295 int count; 1296 struct xfs_inode *dp = state->args->dp; 1297 1298 trace_xfs_da_fixhashpath(state->args); 1299 1300 level = path->active-1; 1301 blk = &path->blk[ level ]; 1302 switch (blk->magic) { 1303 case XFS_ATTR_LEAF_MAGIC: 1304 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count); 1305 if (count == 0) 1306 return; 1307 break; 1308 case XFS_DIR2_LEAFN_MAGIC: 1309 lasthash = xfs_dir2_leaf_lasthash(dp, blk->bp, &count); 1310 if (count == 0) 1311 return; 1312 break; 1313 case XFS_DA_NODE_MAGIC: 1314 lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count); 1315 if (count == 0) 1316 return; 1317 break; 1318 } 1319 for (blk--, level--; level >= 0; blk--, level--) { 1320 struct xfs_da3_icnode_hdr nodehdr; 1321 1322 node = blk->bp->b_addr; 1323 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1324 btree = dp->d_ops->node_tree_p(node); 1325 if (be32_to_cpu(btree[blk->index].hashval) == lasthash) 1326 break; 1327 blk->hashval = lasthash; 1328 btree[blk->index].hashval = cpu_to_be32(lasthash); 1329 xfs_trans_log_buf(state->args->trans, blk->bp, 1330 XFS_DA_LOGRANGE(node, &btree[blk->index], 1331 sizeof(*btree))); 1332 1333 lasthash = be32_to_cpu(btree[nodehdr.count - 1].hashval); 1334 } 1335 } 1336 1337 /* 1338 * Remove an entry from an intermediate node. 1339 */ 1340 STATIC void 1341 xfs_da3_node_remove( 1342 struct xfs_da_state *state, 1343 struct xfs_da_state_blk *drop_blk) 1344 { 1345 struct xfs_da_intnode *node; 1346 struct xfs_da3_icnode_hdr nodehdr; 1347 struct xfs_da_node_entry *btree; 1348 int index; 1349 int tmp; 1350 struct xfs_inode *dp = state->args->dp; 1351 1352 trace_xfs_da_node_remove(state->args); 1353 1354 node = drop_blk->bp->b_addr; 1355 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1356 ASSERT(drop_blk->index < nodehdr.count); 1357 ASSERT(drop_blk->index >= 0); 1358 1359 /* 1360 * Copy over the offending entry, or just zero it out. 1361 */ 1362 index = drop_blk->index; 1363 btree = dp->d_ops->node_tree_p(node); 1364 if (index < nodehdr.count - 1) { 1365 tmp = nodehdr.count - index - 1; 1366 tmp *= (uint)sizeof(xfs_da_node_entry_t); 1367 memmove(&btree[index], &btree[index + 1], tmp); 1368 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1369 XFS_DA_LOGRANGE(node, &btree[index], tmp)); 1370 index = nodehdr.count - 1; 1371 } 1372 memset(&btree[index], 0, sizeof(xfs_da_node_entry_t)); 1373 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1374 XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index]))); 1375 nodehdr.count -= 1; 1376 dp->d_ops->node_hdr_to_disk(node, &nodehdr); 1377 xfs_trans_log_buf(state->args->trans, drop_blk->bp, 1378 XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size)); 1379 1380 /* 1381 * Copy the last hash value from the block to propagate upwards. 1382 */ 1383 drop_blk->hashval = be32_to_cpu(btree[index - 1].hashval); 1384 } 1385 1386 /* 1387 * Unbalance the elements between two intermediate nodes, 1388 * move all Btree elements from one node into another. 1389 */ 1390 STATIC void 1391 xfs_da3_node_unbalance( 1392 struct xfs_da_state *state, 1393 struct xfs_da_state_blk *drop_blk, 1394 struct xfs_da_state_blk *save_blk) 1395 { 1396 struct xfs_da_intnode *drop_node; 1397 struct xfs_da_intnode *save_node; 1398 struct xfs_da_node_entry *drop_btree; 1399 struct xfs_da_node_entry *save_btree; 1400 struct xfs_da3_icnode_hdr drop_hdr; 1401 struct xfs_da3_icnode_hdr save_hdr; 1402 struct xfs_trans *tp; 1403 int sindex; 1404 int tmp; 1405 struct xfs_inode *dp = state->args->dp; 1406 1407 trace_xfs_da_node_unbalance(state->args); 1408 1409 drop_node = drop_blk->bp->b_addr; 1410 save_node = save_blk->bp->b_addr; 1411 dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); 1412 dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); 1413 drop_btree = dp->d_ops->node_tree_p(drop_node); 1414 save_btree = dp->d_ops->node_tree_p(save_node); 1415 tp = state->args->trans; 1416 1417 /* 1418 * If the dying block has lower hashvals, then move all the 1419 * elements in the remaining block up to make a hole. 1420 */ 1421 if ((be32_to_cpu(drop_btree[0].hashval) < 1422 be32_to_cpu(save_btree[0].hashval)) || 1423 (be32_to_cpu(drop_btree[drop_hdr.count - 1].hashval) < 1424 be32_to_cpu(save_btree[save_hdr.count - 1].hashval))) { 1425 /* XXX: check this - is memmove dst correct? */ 1426 tmp = save_hdr.count * sizeof(xfs_da_node_entry_t); 1427 memmove(&save_btree[drop_hdr.count], &save_btree[0], tmp); 1428 1429 sindex = 0; 1430 xfs_trans_log_buf(tp, save_blk->bp, 1431 XFS_DA_LOGRANGE(save_node, &save_btree[0], 1432 (save_hdr.count + drop_hdr.count) * 1433 sizeof(xfs_da_node_entry_t))); 1434 } else { 1435 sindex = save_hdr.count; 1436 xfs_trans_log_buf(tp, save_blk->bp, 1437 XFS_DA_LOGRANGE(save_node, &save_btree[sindex], 1438 drop_hdr.count * sizeof(xfs_da_node_entry_t))); 1439 } 1440 1441 /* 1442 * Move all the B-tree elements from drop_blk to save_blk. 1443 */ 1444 tmp = drop_hdr.count * (uint)sizeof(xfs_da_node_entry_t); 1445 memcpy(&save_btree[sindex], &drop_btree[0], tmp); 1446 save_hdr.count += drop_hdr.count; 1447 1448 dp->d_ops->node_hdr_to_disk(save_node, &save_hdr); 1449 xfs_trans_log_buf(tp, save_blk->bp, 1450 XFS_DA_LOGRANGE(save_node, &save_node->hdr, 1451 dp->d_ops->node_hdr_size)); 1452 1453 /* 1454 * Save the last hashval in the remaining block for upward propagation. 1455 */ 1456 save_blk->hashval = be32_to_cpu(save_btree[save_hdr.count - 1].hashval); 1457 } 1458 1459 /*======================================================================== 1460 * Routines used for finding things in the Btree. 1461 *========================================================================*/ 1462 1463 /* 1464 * Walk down the Btree looking for a particular filename, filling 1465 * in the state structure as we go. 1466 * 1467 * We will set the state structure to point to each of the elements 1468 * in each of the nodes where either the hashval is or should be. 1469 * 1470 * We support duplicate hashval's so for each entry in the current 1471 * node that could contain the desired hashval, descend. This is a 1472 * pruned depth-first tree search. 1473 */ 1474 int /* error */ 1475 xfs_da3_node_lookup_int( 1476 struct xfs_da_state *state, 1477 int *result) 1478 { 1479 struct xfs_da_state_blk *blk; 1480 struct xfs_da_blkinfo *curr; 1481 struct xfs_da_intnode *node; 1482 struct xfs_da_node_entry *btree; 1483 struct xfs_da3_icnode_hdr nodehdr; 1484 struct xfs_da_args *args; 1485 xfs_dablk_t blkno; 1486 xfs_dahash_t hashval; 1487 xfs_dahash_t btreehashval; 1488 int probe; 1489 int span; 1490 int max; 1491 int error; 1492 int retval; 1493 unsigned int expected_level = 0; 1494 struct xfs_inode *dp = state->args->dp; 1495 1496 args = state->args; 1497 1498 /* 1499 * Descend thru the B-tree searching each level for the right 1500 * node to use, until the right hashval is found. 1501 */ 1502 blkno = args->geo->leafblk; 1503 for (blk = &state->path.blk[0], state->path.active = 1; 1504 state->path.active <= XFS_DA_NODE_MAXDEPTH; 1505 blk++, state->path.active++) { 1506 /* 1507 * Read the next node down in the tree. 1508 */ 1509 blk->blkno = blkno; 1510 error = xfs_da3_node_read(args->trans, args->dp, blkno, 1511 -1, &blk->bp, args->whichfork); 1512 if (error) { 1513 blk->blkno = 0; 1514 state->path.active--; 1515 return error; 1516 } 1517 curr = blk->bp->b_addr; 1518 blk->magic = be16_to_cpu(curr->magic); 1519 1520 if (blk->magic == XFS_ATTR_LEAF_MAGIC || 1521 blk->magic == XFS_ATTR3_LEAF_MAGIC) { 1522 blk->magic = XFS_ATTR_LEAF_MAGIC; 1523 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1524 break; 1525 } 1526 1527 if (blk->magic == XFS_DIR2_LEAFN_MAGIC || 1528 blk->magic == XFS_DIR3_LEAFN_MAGIC) { 1529 blk->magic = XFS_DIR2_LEAFN_MAGIC; 1530 blk->hashval = xfs_dir2_leaf_lasthash(args->dp, 1531 blk->bp, NULL); 1532 break; 1533 } 1534 1535 blk->magic = XFS_DA_NODE_MAGIC; 1536 1537 1538 /* 1539 * Search an intermediate node for a match. 1540 */ 1541 node = blk->bp->b_addr; 1542 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1543 btree = dp->d_ops->node_tree_p(node); 1544 1545 /* Tree taller than we can handle; bail out! */ 1546 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) 1547 return -EFSCORRUPTED; 1548 1549 /* Check the level from the root. */ 1550 if (blkno == args->geo->leafblk) 1551 expected_level = nodehdr.level - 1; 1552 else if (expected_level != nodehdr.level) 1553 return -EFSCORRUPTED; 1554 else 1555 expected_level--; 1556 1557 max = nodehdr.count; 1558 blk->hashval = be32_to_cpu(btree[max - 1].hashval); 1559 1560 /* 1561 * Binary search. (note: small blocks will skip loop) 1562 */ 1563 probe = span = max / 2; 1564 hashval = args->hashval; 1565 while (span > 4) { 1566 span /= 2; 1567 btreehashval = be32_to_cpu(btree[probe].hashval); 1568 if (btreehashval < hashval) 1569 probe += span; 1570 else if (btreehashval > hashval) 1571 probe -= span; 1572 else 1573 break; 1574 } 1575 ASSERT((probe >= 0) && (probe < max)); 1576 ASSERT((span <= 4) || 1577 (be32_to_cpu(btree[probe].hashval) == hashval)); 1578 1579 /* 1580 * Since we may have duplicate hashval's, find the first 1581 * matching hashval in the node. 1582 */ 1583 while (probe > 0 && 1584 be32_to_cpu(btree[probe].hashval) >= hashval) { 1585 probe--; 1586 } 1587 while (probe < max && 1588 be32_to_cpu(btree[probe].hashval) < hashval) { 1589 probe++; 1590 } 1591 1592 /* 1593 * Pick the right block to descend on. 1594 */ 1595 if (probe == max) { 1596 blk->index = max - 1; 1597 blkno = be32_to_cpu(btree[max - 1].before); 1598 } else { 1599 blk->index = probe; 1600 blkno = be32_to_cpu(btree[probe].before); 1601 } 1602 1603 /* We can't point back to the root. */ 1604 if (blkno == args->geo->leafblk) 1605 return -EFSCORRUPTED; 1606 } 1607 1608 if (expected_level != 0) 1609 return -EFSCORRUPTED; 1610 1611 /* 1612 * A leaf block that ends in the hashval that we are interested in 1613 * (final hashval == search hashval) means that the next block may 1614 * contain more entries with the same hashval, shift upward to the 1615 * next leaf and keep searching. 1616 */ 1617 for (;;) { 1618 if (blk->magic == XFS_DIR2_LEAFN_MAGIC) { 1619 retval = xfs_dir2_leafn_lookup_int(blk->bp, args, 1620 &blk->index, state); 1621 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1622 retval = xfs_attr3_leaf_lookup_int(blk->bp, args); 1623 blk->index = args->index; 1624 args->blkno = blk->blkno; 1625 } else { 1626 ASSERT(0); 1627 return -EFSCORRUPTED; 1628 } 1629 if (((retval == -ENOENT) || (retval == -ENOATTR)) && 1630 (blk->hashval == args->hashval)) { 1631 error = xfs_da3_path_shift(state, &state->path, 1, 1, 1632 &retval); 1633 if (error) 1634 return error; 1635 if (retval == 0) { 1636 continue; 1637 } else if (blk->magic == XFS_ATTR_LEAF_MAGIC) { 1638 /* path_shift() gives ENOENT */ 1639 retval = -ENOATTR; 1640 } 1641 } 1642 break; 1643 } 1644 *result = retval; 1645 return 0; 1646 } 1647 1648 /*======================================================================== 1649 * Utility routines. 1650 *========================================================================*/ 1651 1652 /* 1653 * Compare two intermediate nodes for "order". 1654 */ 1655 STATIC int 1656 xfs_da3_node_order( 1657 struct xfs_inode *dp, 1658 struct xfs_buf *node1_bp, 1659 struct xfs_buf *node2_bp) 1660 { 1661 struct xfs_da_intnode *node1; 1662 struct xfs_da_intnode *node2; 1663 struct xfs_da_node_entry *btree1; 1664 struct xfs_da_node_entry *btree2; 1665 struct xfs_da3_icnode_hdr node1hdr; 1666 struct xfs_da3_icnode_hdr node2hdr; 1667 1668 node1 = node1_bp->b_addr; 1669 node2 = node2_bp->b_addr; 1670 dp->d_ops->node_hdr_from_disk(&node1hdr, node1); 1671 dp->d_ops->node_hdr_from_disk(&node2hdr, node2); 1672 btree1 = dp->d_ops->node_tree_p(node1); 1673 btree2 = dp->d_ops->node_tree_p(node2); 1674 1675 if (node1hdr.count > 0 && node2hdr.count > 0 && 1676 ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || 1677 (be32_to_cpu(btree2[node2hdr.count - 1].hashval) < 1678 be32_to_cpu(btree1[node1hdr.count - 1].hashval)))) { 1679 return 1; 1680 } 1681 return 0; 1682 } 1683 1684 /* 1685 * Link a new block into a doubly linked list of blocks (of whatever type). 1686 */ 1687 int /* error */ 1688 xfs_da3_blk_link( 1689 struct xfs_da_state *state, 1690 struct xfs_da_state_blk *old_blk, 1691 struct xfs_da_state_blk *new_blk) 1692 { 1693 struct xfs_da_blkinfo *old_info; 1694 struct xfs_da_blkinfo *new_info; 1695 struct xfs_da_blkinfo *tmp_info; 1696 struct xfs_da_args *args; 1697 struct xfs_buf *bp; 1698 int before = 0; 1699 int error; 1700 struct xfs_inode *dp = state->args->dp; 1701 1702 /* 1703 * Set up environment. 1704 */ 1705 args = state->args; 1706 ASSERT(args != NULL); 1707 old_info = old_blk->bp->b_addr; 1708 new_info = new_blk->bp->b_addr; 1709 ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC || 1710 old_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1711 old_blk->magic == XFS_ATTR_LEAF_MAGIC); 1712 1713 switch (old_blk->magic) { 1714 case XFS_ATTR_LEAF_MAGIC: 1715 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp); 1716 break; 1717 case XFS_DIR2_LEAFN_MAGIC: 1718 before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp); 1719 break; 1720 case XFS_DA_NODE_MAGIC: 1721 before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp); 1722 break; 1723 } 1724 1725 /* 1726 * Link blocks in appropriate order. 1727 */ 1728 if (before) { 1729 /* 1730 * Link new block in before existing block. 1731 */ 1732 trace_xfs_da_link_before(args); 1733 new_info->forw = cpu_to_be32(old_blk->blkno); 1734 new_info->back = old_info->back; 1735 if (old_info->back) { 1736 error = xfs_da3_node_read(args->trans, dp, 1737 be32_to_cpu(old_info->back), 1738 -1, &bp, args->whichfork); 1739 if (error) 1740 return error; 1741 ASSERT(bp != NULL); 1742 tmp_info = bp->b_addr; 1743 ASSERT(tmp_info->magic == old_info->magic); 1744 ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno); 1745 tmp_info->forw = cpu_to_be32(new_blk->blkno); 1746 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1747 } 1748 old_info->back = cpu_to_be32(new_blk->blkno); 1749 } else { 1750 /* 1751 * Link new block in after existing block. 1752 */ 1753 trace_xfs_da_link_after(args); 1754 new_info->forw = old_info->forw; 1755 new_info->back = cpu_to_be32(old_blk->blkno); 1756 if (old_info->forw) { 1757 error = xfs_da3_node_read(args->trans, dp, 1758 be32_to_cpu(old_info->forw), 1759 -1, &bp, args->whichfork); 1760 if (error) 1761 return error; 1762 ASSERT(bp != NULL); 1763 tmp_info = bp->b_addr; 1764 ASSERT(tmp_info->magic == old_info->magic); 1765 ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno); 1766 tmp_info->back = cpu_to_be32(new_blk->blkno); 1767 xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1); 1768 } 1769 old_info->forw = cpu_to_be32(new_blk->blkno); 1770 } 1771 1772 xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1); 1773 xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1); 1774 return 0; 1775 } 1776 1777 /* 1778 * Unlink a block from a doubly linked list of blocks. 1779 */ 1780 STATIC int /* error */ 1781 xfs_da3_blk_unlink( 1782 struct xfs_da_state *state, 1783 struct xfs_da_state_blk *drop_blk, 1784 struct xfs_da_state_blk *save_blk) 1785 { 1786 struct xfs_da_blkinfo *drop_info; 1787 struct xfs_da_blkinfo *save_info; 1788 struct xfs_da_blkinfo *tmp_info; 1789 struct xfs_da_args *args; 1790 struct xfs_buf *bp; 1791 int error; 1792 1793 /* 1794 * Set up environment. 1795 */ 1796 args = state->args; 1797 ASSERT(args != NULL); 1798 save_info = save_blk->bp->b_addr; 1799 drop_info = drop_blk->bp->b_addr; 1800 ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC || 1801 save_blk->magic == XFS_DIR2_LEAFN_MAGIC || 1802 save_blk->magic == XFS_ATTR_LEAF_MAGIC); 1803 ASSERT(save_blk->magic == drop_blk->magic); 1804 ASSERT((be32_to_cpu(save_info->forw) == drop_blk->blkno) || 1805 (be32_to_cpu(save_info->back) == drop_blk->blkno)); 1806 ASSERT((be32_to_cpu(drop_info->forw) == save_blk->blkno) || 1807 (be32_to_cpu(drop_info->back) == save_blk->blkno)); 1808 1809 /* 1810 * Unlink the leaf block from the doubly linked chain of leaves. 1811 */ 1812 if (be32_to_cpu(save_info->back) == drop_blk->blkno) { 1813 trace_xfs_da_unlink_back(args); 1814 save_info->back = drop_info->back; 1815 if (drop_info->back) { 1816 error = xfs_da3_node_read(args->trans, args->dp, 1817 be32_to_cpu(drop_info->back), 1818 -1, &bp, args->whichfork); 1819 if (error) 1820 return error; 1821 ASSERT(bp != NULL); 1822 tmp_info = bp->b_addr; 1823 ASSERT(tmp_info->magic == save_info->magic); 1824 ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno); 1825 tmp_info->forw = cpu_to_be32(save_blk->blkno); 1826 xfs_trans_log_buf(args->trans, bp, 0, 1827 sizeof(*tmp_info) - 1); 1828 } 1829 } else { 1830 trace_xfs_da_unlink_forward(args); 1831 save_info->forw = drop_info->forw; 1832 if (drop_info->forw) { 1833 error = xfs_da3_node_read(args->trans, args->dp, 1834 be32_to_cpu(drop_info->forw), 1835 -1, &bp, args->whichfork); 1836 if (error) 1837 return error; 1838 ASSERT(bp != NULL); 1839 tmp_info = bp->b_addr; 1840 ASSERT(tmp_info->magic == save_info->magic); 1841 ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno); 1842 tmp_info->back = cpu_to_be32(save_blk->blkno); 1843 xfs_trans_log_buf(args->trans, bp, 0, 1844 sizeof(*tmp_info) - 1); 1845 } 1846 } 1847 1848 xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1); 1849 return 0; 1850 } 1851 1852 /* 1853 * Move a path "forward" or "!forward" one block at the current level. 1854 * 1855 * This routine will adjust a "path" to point to the next block 1856 * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the 1857 * Btree, including updating pointers to the intermediate nodes between 1858 * the new bottom and the root. 1859 */ 1860 int /* error */ 1861 xfs_da3_path_shift( 1862 struct xfs_da_state *state, 1863 struct xfs_da_state_path *path, 1864 int forward, 1865 int release, 1866 int *result) 1867 { 1868 struct xfs_da_state_blk *blk; 1869 struct xfs_da_blkinfo *info; 1870 struct xfs_da_intnode *node; 1871 struct xfs_da_args *args; 1872 struct xfs_da_node_entry *btree; 1873 struct xfs_da3_icnode_hdr nodehdr; 1874 struct xfs_buf *bp; 1875 xfs_dablk_t blkno = 0; 1876 int level; 1877 int error; 1878 struct xfs_inode *dp = state->args->dp; 1879 1880 trace_xfs_da_path_shift(state->args); 1881 1882 /* 1883 * Roll up the Btree looking for the first block where our 1884 * current index is not at the edge of the block. Note that 1885 * we skip the bottom layer because we want the sibling block. 1886 */ 1887 args = state->args; 1888 ASSERT(args != NULL); 1889 ASSERT(path != NULL); 1890 ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH)); 1891 level = (path->active-1) - 1; /* skip bottom layer in path */ 1892 for (blk = &path->blk[level]; level >= 0; blk--, level--) { 1893 node = blk->bp->b_addr; 1894 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1895 btree = dp->d_ops->node_tree_p(node); 1896 1897 if (forward && (blk->index < nodehdr.count - 1)) { 1898 blk->index++; 1899 blkno = be32_to_cpu(btree[blk->index].before); 1900 break; 1901 } else if (!forward && (blk->index > 0)) { 1902 blk->index--; 1903 blkno = be32_to_cpu(btree[blk->index].before); 1904 break; 1905 } 1906 } 1907 if (level < 0) { 1908 *result = -ENOENT; /* we're out of our tree */ 1909 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 1910 return 0; 1911 } 1912 1913 /* 1914 * Roll down the edge of the subtree until we reach the 1915 * same depth we were at originally. 1916 */ 1917 for (blk++, level++; level < path->active; blk++, level++) { 1918 /* 1919 * Read the next child block into a local buffer. 1920 */ 1921 error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp, 1922 args->whichfork); 1923 if (error) 1924 return error; 1925 1926 /* 1927 * Release the old block (if it's dirty, the trans doesn't 1928 * actually let go) and swap the local buffer into the path 1929 * structure. This ensures failure of the above read doesn't set 1930 * a NULL buffer in an active slot in the path. 1931 */ 1932 if (release) 1933 xfs_trans_brelse(args->trans, blk->bp); 1934 blk->blkno = blkno; 1935 blk->bp = bp; 1936 1937 info = blk->bp->b_addr; 1938 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || 1939 info->magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || 1940 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 1941 info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC) || 1942 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC) || 1943 info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)); 1944 1945 1946 /* 1947 * Note: we flatten the magic number to a single type so we 1948 * don't have to compare against crc/non-crc types elsewhere. 1949 */ 1950 switch (be16_to_cpu(info->magic)) { 1951 case XFS_DA_NODE_MAGIC: 1952 case XFS_DA3_NODE_MAGIC: 1953 blk->magic = XFS_DA_NODE_MAGIC; 1954 node = (xfs_da_intnode_t *)info; 1955 dp->d_ops->node_hdr_from_disk(&nodehdr, node); 1956 btree = dp->d_ops->node_tree_p(node); 1957 blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval); 1958 if (forward) 1959 blk->index = 0; 1960 else 1961 blk->index = nodehdr.count - 1; 1962 blkno = be32_to_cpu(btree[blk->index].before); 1963 break; 1964 case XFS_ATTR_LEAF_MAGIC: 1965 case XFS_ATTR3_LEAF_MAGIC: 1966 blk->magic = XFS_ATTR_LEAF_MAGIC; 1967 ASSERT(level == path->active-1); 1968 blk->index = 0; 1969 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); 1970 break; 1971 case XFS_DIR2_LEAFN_MAGIC: 1972 case XFS_DIR3_LEAFN_MAGIC: 1973 blk->magic = XFS_DIR2_LEAFN_MAGIC; 1974 ASSERT(level == path->active-1); 1975 blk->index = 0; 1976 blk->hashval = xfs_dir2_leaf_lasthash(args->dp, 1977 blk->bp, NULL); 1978 break; 1979 default: 1980 ASSERT(0); 1981 break; 1982 } 1983 } 1984 *result = 0; 1985 return 0; 1986 } 1987 1988 1989 /*======================================================================== 1990 * Utility routines. 1991 *========================================================================*/ 1992 1993 /* 1994 * Implement a simple hash on a character string. 1995 * Rotate the hash value by 7 bits, then XOR each character in. 1996 * This is implemented with some source-level loop unrolling. 1997 */ 1998 xfs_dahash_t 1999 xfs_da_hashname(const uint8_t *name, int namelen) 2000 { 2001 xfs_dahash_t hash; 2002 2003 /* 2004 * Do four characters at a time as long as we can. 2005 */ 2006 for (hash = 0; namelen >= 4; namelen -= 4, name += 4) 2007 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^ 2008 (name[3] << 0) ^ rol32(hash, 7 * 4); 2009 2010 /* 2011 * Now do the rest of the characters. 2012 */ 2013 switch (namelen) { 2014 case 3: 2015 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^ 2016 rol32(hash, 7 * 3); 2017 case 2: 2018 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2); 2019 case 1: 2020 return (name[0] << 0) ^ rol32(hash, 7 * 1); 2021 default: /* case 0: */ 2022 return hash; 2023 } 2024 } 2025 2026 enum xfs_dacmp 2027 xfs_da_compname( 2028 struct xfs_da_args *args, 2029 const unsigned char *name, 2030 int len) 2031 { 2032 return (args->namelen == len && memcmp(args->name, name, len) == 0) ? 2033 XFS_CMP_EXACT : XFS_CMP_DIFFERENT; 2034 } 2035 2036 static xfs_dahash_t 2037 xfs_default_hashname( 2038 struct xfs_name *name) 2039 { 2040 return xfs_da_hashname(name->name, name->len); 2041 } 2042 2043 const struct xfs_nameops xfs_default_nameops = { 2044 .hashname = xfs_default_hashname, 2045 .compname = xfs_da_compname 2046 }; 2047 2048 int 2049 xfs_da_grow_inode_int( 2050 struct xfs_da_args *args, 2051 xfs_fileoff_t *bno, 2052 int count) 2053 { 2054 struct xfs_trans *tp = args->trans; 2055 struct xfs_inode *dp = args->dp; 2056 int w = args->whichfork; 2057 xfs_rfsblock_t nblks = dp->i_d.di_nblocks; 2058 struct xfs_bmbt_irec map, *mapp; 2059 int nmap, error, got, i, mapi; 2060 2061 /* 2062 * Find a spot in the file space to put the new block. 2063 */ 2064 error = xfs_bmap_first_unused(tp, dp, count, bno, w); 2065 if (error) 2066 return error; 2067 2068 /* 2069 * Try mapping it in one filesystem block. 2070 */ 2071 nmap = 1; 2072 ASSERT(args->firstblock != NULL); 2073 error = xfs_bmapi_write(tp, dp, *bno, count, 2074 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG, 2075 args->firstblock, args->total, &map, &nmap, 2076 args->dfops); 2077 if (error) 2078 return error; 2079 2080 ASSERT(nmap <= 1); 2081 if (nmap == 1) { 2082 mapp = ↦ 2083 mapi = 1; 2084 } else if (nmap == 0 && count > 1) { 2085 xfs_fileoff_t b; 2086 int c; 2087 2088 /* 2089 * If we didn't get it and the block might work if fragmented, 2090 * try without the CONTIG flag. Loop until we get it all. 2091 */ 2092 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); 2093 for (b = *bno, mapi = 0; b < *bno + count; ) { 2094 nmap = MIN(XFS_BMAP_MAX_NMAP, count); 2095 c = (int)(*bno + count - b); 2096 error = xfs_bmapi_write(tp, dp, b, c, 2097 xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA, 2098 args->firstblock, args->total, 2099 &mapp[mapi], &nmap, args->dfops); 2100 if (error) 2101 goto out_free_map; 2102 if (nmap < 1) 2103 break; 2104 mapi += nmap; 2105 b = mapp[mapi - 1].br_startoff + 2106 mapp[mapi - 1].br_blockcount; 2107 } 2108 } else { 2109 mapi = 0; 2110 mapp = NULL; 2111 } 2112 2113 /* 2114 * Count the blocks we got, make sure it matches the total. 2115 */ 2116 for (i = 0, got = 0; i < mapi; i++) 2117 got += mapp[i].br_blockcount; 2118 if (got != count || mapp[0].br_startoff != *bno || 2119 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 2120 *bno + count) { 2121 error = -ENOSPC; 2122 goto out_free_map; 2123 } 2124 2125 /* account for newly allocated blocks in reserved blocks total */ 2126 args->total -= dp->i_d.di_nblocks - nblks; 2127 2128 out_free_map: 2129 if (mapp != &map) 2130 kmem_free(mapp); 2131 return error; 2132 } 2133 2134 /* 2135 * Add a block to the btree ahead of the file. 2136 * Return the new block number to the caller. 2137 */ 2138 int 2139 xfs_da_grow_inode( 2140 struct xfs_da_args *args, 2141 xfs_dablk_t *new_blkno) 2142 { 2143 xfs_fileoff_t bno; 2144 int error; 2145 2146 trace_xfs_da_grow_inode(args); 2147 2148 bno = args->geo->leafblk; 2149 error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount); 2150 if (!error) 2151 *new_blkno = (xfs_dablk_t)bno; 2152 return error; 2153 } 2154 2155 /* 2156 * Ick. We need to always be able to remove a btree block, even 2157 * if there's no space reservation because the filesystem is full. 2158 * This is called if xfs_bunmapi on a btree block fails due to ENOSPC. 2159 * It swaps the target block with the last block in the file. The 2160 * last block in the file can always be removed since it can't cause 2161 * a bmap btree split to do that. 2162 */ 2163 STATIC int 2164 xfs_da3_swap_lastblock( 2165 struct xfs_da_args *args, 2166 xfs_dablk_t *dead_blknop, 2167 struct xfs_buf **dead_bufp) 2168 { 2169 struct xfs_da_blkinfo *dead_info; 2170 struct xfs_da_blkinfo *sib_info; 2171 struct xfs_da_intnode *par_node; 2172 struct xfs_da_intnode *dead_node; 2173 struct xfs_dir2_leaf *dead_leaf2; 2174 struct xfs_da_node_entry *btree; 2175 struct xfs_da3_icnode_hdr par_hdr; 2176 struct xfs_inode *dp; 2177 struct xfs_trans *tp; 2178 struct xfs_mount *mp; 2179 struct xfs_buf *dead_buf; 2180 struct xfs_buf *last_buf; 2181 struct xfs_buf *sib_buf; 2182 struct xfs_buf *par_buf; 2183 xfs_dahash_t dead_hash; 2184 xfs_fileoff_t lastoff; 2185 xfs_dablk_t dead_blkno; 2186 xfs_dablk_t last_blkno; 2187 xfs_dablk_t sib_blkno; 2188 xfs_dablk_t par_blkno; 2189 int error; 2190 int w; 2191 int entno; 2192 int level; 2193 int dead_level; 2194 2195 trace_xfs_da_swap_lastblock(args); 2196 2197 dead_buf = *dead_bufp; 2198 dead_blkno = *dead_blknop; 2199 tp = args->trans; 2200 dp = args->dp; 2201 w = args->whichfork; 2202 ASSERT(w == XFS_DATA_FORK); 2203 mp = dp->i_mount; 2204 lastoff = args->geo->freeblk; 2205 error = xfs_bmap_last_before(tp, dp, &lastoff, w); 2206 if (error) 2207 return error; 2208 if (unlikely(lastoff == 0)) { 2209 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW, 2210 mp); 2211 return -EFSCORRUPTED; 2212 } 2213 /* 2214 * Read the last block in the btree space. 2215 */ 2216 last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; 2217 error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w); 2218 if (error) 2219 return error; 2220 /* 2221 * Copy the last block into the dead buffer and log it. 2222 */ 2223 memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); 2224 xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); 2225 dead_info = dead_buf->b_addr; 2226 /* 2227 * Get values from the moved block. 2228 */ 2229 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || 2230 dead_info->magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { 2231 struct xfs_dir3_icleaf_hdr leafhdr; 2232 struct xfs_dir2_leaf_entry *ents; 2233 2234 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 2235 dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); 2236 ents = dp->d_ops->leaf_ents_p(dead_leaf2); 2237 dead_level = 0; 2238 dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval); 2239 } else { 2240 struct xfs_da3_icnode_hdr deadhdr; 2241 2242 dead_node = (xfs_da_intnode_t *)dead_info; 2243 dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); 2244 btree = dp->d_ops->node_tree_p(dead_node); 2245 dead_level = deadhdr.level; 2246 dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval); 2247 } 2248 sib_buf = par_buf = NULL; 2249 /* 2250 * If the moved block has a left sibling, fix up the pointers. 2251 */ 2252 if ((sib_blkno = be32_to_cpu(dead_info->back))) { 2253 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); 2254 if (error) 2255 goto done; 2256 sib_info = sib_buf->b_addr; 2257 if (unlikely( 2258 be32_to_cpu(sib_info->forw) != last_blkno || 2259 sib_info->magic != dead_info->magic)) { 2260 XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)", 2261 XFS_ERRLEVEL_LOW, mp); 2262 error = -EFSCORRUPTED; 2263 goto done; 2264 } 2265 sib_info->forw = cpu_to_be32(dead_blkno); 2266 xfs_trans_log_buf(tp, sib_buf, 2267 XFS_DA_LOGRANGE(sib_info, &sib_info->forw, 2268 sizeof(sib_info->forw))); 2269 sib_buf = NULL; 2270 } 2271 /* 2272 * If the moved block has a right sibling, fix up the pointers. 2273 */ 2274 if ((sib_blkno = be32_to_cpu(dead_info->forw))) { 2275 error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w); 2276 if (error) 2277 goto done; 2278 sib_info = sib_buf->b_addr; 2279 if (unlikely( 2280 be32_to_cpu(sib_info->back) != last_blkno || 2281 sib_info->magic != dead_info->magic)) { 2282 XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)", 2283 XFS_ERRLEVEL_LOW, mp); 2284 error = -EFSCORRUPTED; 2285 goto done; 2286 } 2287 sib_info->back = cpu_to_be32(dead_blkno); 2288 xfs_trans_log_buf(tp, sib_buf, 2289 XFS_DA_LOGRANGE(sib_info, &sib_info->back, 2290 sizeof(sib_info->back))); 2291 sib_buf = NULL; 2292 } 2293 par_blkno = args->geo->leafblk; 2294 level = -1; 2295 /* 2296 * Walk down the tree looking for the parent of the moved block. 2297 */ 2298 for (;;) { 2299 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); 2300 if (error) 2301 goto done; 2302 par_node = par_buf->b_addr; 2303 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); 2304 if (level >= 0 && level != par_hdr.level + 1) { 2305 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", 2306 XFS_ERRLEVEL_LOW, mp); 2307 error = -EFSCORRUPTED; 2308 goto done; 2309 } 2310 level = par_hdr.level; 2311 btree = dp->d_ops->node_tree_p(par_node); 2312 for (entno = 0; 2313 entno < par_hdr.count && 2314 be32_to_cpu(btree[entno].hashval) < dead_hash; 2315 entno++) 2316 continue; 2317 if (entno == par_hdr.count) { 2318 XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)", 2319 XFS_ERRLEVEL_LOW, mp); 2320 error = -EFSCORRUPTED; 2321 goto done; 2322 } 2323 par_blkno = be32_to_cpu(btree[entno].before); 2324 if (level == dead_level + 1) 2325 break; 2326 xfs_trans_brelse(tp, par_buf); 2327 par_buf = NULL; 2328 } 2329 /* 2330 * We're in the right parent block. 2331 * Look for the right entry. 2332 */ 2333 for (;;) { 2334 for (; 2335 entno < par_hdr.count && 2336 be32_to_cpu(btree[entno].before) != last_blkno; 2337 entno++) 2338 continue; 2339 if (entno < par_hdr.count) 2340 break; 2341 par_blkno = par_hdr.forw; 2342 xfs_trans_brelse(tp, par_buf); 2343 par_buf = NULL; 2344 if (unlikely(par_blkno == 0)) { 2345 XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)", 2346 XFS_ERRLEVEL_LOW, mp); 2347 error = -EFSCORRUPTED; 2348 goto done; 2349 } 2350 error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w); 2351 if (error) 2352 goto done; 2353 par_node = par_buf->b_addr; 2354 dp->d_ops->node_hdr_from_disk(&par_hdr, par_node); 2355 if (par_hdr.level != level) { 2356 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", 2357 XFS_ERRLEVEL_LOW, mp); 2358 error = -EFSCORRUPTED; 2359 goto done; 2360 } 2361 btree = dp->d_ops->node_tree_p(par_node); 2362 entno = 0; 2363 } 2364 /* 2365 * Update the parent entry pointing to the moved block. 2366 */ 2367 btree[entno].before = cpu_to_be32(dead_blkno); 2368 xfs_trans_log_buf(tp, par_buf, 2369 XFS_DA_LOGRANGE(par_node, &btree[entno].before, 2370 sizeof(btree[entno].before))); 2371 *dead_blknop = last_blkno; 2372 *dead_bufp = last_buf; 2373 return 0; 2374 done: 2375 if (par_buf) 2376 xfs_trans_brelse(tp, par_buf); 2377 if (sib_buf) 2378 xfs_trans_brelse(tp, sib_buf); 2379 xfs_trans_brelse(tp, last_buf); 2380 return error; 2381 } 2382 2383 /* 2384 * Remove a btree block from a directory or attribute. 2385 */ 2386 int 2387 xfs_da_shrink_inode( 2388 xfs_da_args_t *args, 2389 xfs_dablk_t dead_blkno, 2390 struct xfs_buf *dead_buf) 2391 { 2392 xfs_inode_t *dp; 2393 int done, error, w, count; 2394 xfs_trans_t *tp; 2395 2396 trace_xfs_da_shrink_inode(args); 2397 2398 dp = args->dp; 2399 w = args->whichfork; 2400 tp = args->trans; 2401 count = args->geo->fsbcount; 2402 for (;;) { 2403 /* 2404 * Remove extents. If we get ENOSPC for a dir we have to move 2405 * the last block to the place we want to kill. 2406 */ 2407 error = xfs_bunmapi(tp, dp, dead_blkno, count, 2408 xfs_bmapi_aflag(w), 0, args->firstblock, 2409 args->dfops, &done); 2410 if (error == -ENOSPC) { 2411 if (w != XFS_DATA_FORK) 2412 break; 2413 error = xfs_da3_swap_lastblock(args, &dead_blkno, 2414 &dead_buf); 2415 if (error) 2416 break; 2417 } else { 2418 break; 2419 } 2420 } 2421 xfs_trans_binval(tp, dead_buf); 2422 return error; 2423 } 2424 2425 /* 2426 * See if the mapping(s) for this btree block are valid, i.e. 2427 * don't contain holes, are logically contiguous, and cover the whole range. 2428 */ 2429 STATIC int 2430 xfs_da_map_covers_blocks( 2431 int nmap, 2432 xfs_bmbt_irec_t *mapp, 2433 xfs_dablk_t bno, 2434 int count) 2435 { 2436 int i; 2437 xfs_fileoff_t off; 2438 2439 for (i = 0, off = bno; i < nmap; i++) { 2440 if (mapp[i].br_startblock == HOLESTARTBLOCK || 2441 mapp[i].br_startblock == DELAYSTARTBLOCK) { 2442 return 0; 2443 } 2444 if (off != mapp[i].br_startoff) { 2445 return 0; 2446 } 2447 off += mapp[i].br_blockcount; 2448 } 2449 return off == bno + count; 2450 } 2451 2452 /* 2453 * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map. 2454 * 2455 * For the single map case, it is assumed that the caller has provided a pointer 2456 * to a valid xfs_buf_map. For the multiple map case, this function will 2457 * allocate the xfs_buf_map to hold all the maps and replace the caller's single 2458 * map pointer with the allocated map. 2459 */ 2460 static int 2461 xfs_buf_map_from_irec( 2462 struct xfs_mount *mp, 2463 struct xfs_buf_map **mapp, 2464 int *nmaps, 2465 struct xfs_bmbt_irec *irecs, 2466 int nirecs) 2467 { 2468 struct xfs_buf_map *map; 2469 int i; 2470 2471 ASSERT(*nmaps == 1); 2472 ASSERT(nirecs >= 1); 2473 2474 if (nirecs > 1) { 2475 map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), 2476 KM_SLEEP | KM_NOFS); 2477 if (!map) 2478 return -ENOMEM; 2479 *mapp = map; 2480 } 2481 2482 *nmaps = nirecs; 2483 map = *mapp; 2484 for (i = 0; i < *nmaps; i++) { 2485 ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK && 2486 irecs[i].br_startblock != HOLESTARTBLOCK); 2487 map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock); 2488 map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount); 2489 } 2490 return 0; 2491 } 2492 2493 /* 2494 * Map the block we are given ready for reading. There are three possible return 2495 * values: 2496 * -1 - will be returned if we land in a hole and mappedbno == -2 so the 2497 * caller knows not to execute a subsequent read. 2498 * 0 - if we mapped the block successfully 2499 * >0 - positive error number if there was an error. 2500 */ 2501 static int 2502 xfs_dabuf_map( 2503 struct xfs_inode *dp, 2504 xfs_dablk_t bno, 2505 xfs_daddr_t mappedbno, 2506 int whichfork, 2507 struct xfs_buf_map **map, 2508 int *nmaps) 2509 { 2510 struct xfs_mount *mp = dp->i_mount; 2511 int nfsb; 2512 int error = 0; 2513 struct xfs_bmbt_irec irec; 2514 struct xfs_bmbt_irec *irecs = &irec; 2515 int nirecs; 2516 2517 ASSERT(map && *map); 2518 ASSERT(*nmaps == 1); 2519 2520 if (whichfork == XFS_DATA_FORK) 2521 nfsb = mp->m_dir_geo->fsbcount; 2522 else 2523 nfsb = mp->m_attr_geo->fsbcount; 2524 2525 /* 2526 * Caller doesn't have a mapping. -2 means don't complain 2527 * if we land in a hole. 2528 */ 2529 if (mappedbno == -1 || mappedbno == -2) { 2530 /* 2531 * Optimize the one-block case. 2532 */ 2533 if (nfsb != 1) 2534 irecs = kmem_zalloc(sizeof(irec) * nfsb, 2535 KM_SLEEP | KM_NOFS); 2536 2537 nirecs = nfsb; 2538 error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs, 2539 &nirecs, xfs_bmapi_aflag(whichfork)); 2540 if (error) 2541 goto out; 2542 } else { 2543 irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno); 2544 irecs->br_startoff = (xfs_fileoff_t)bno; 2545 irecs->br_blockcount = nfsb; 2546 irecs->br_state = 0; 2547 nirecs = 1; 2548 } 2549 2550 if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) { 2551 error = mappedbno == -2 ? -1 : -EFSCORRUPTED; 2552 if (unlikely(error == -EFSCORRUPTED)) { 2553 if (xfs_error_level >= XFS_ERRLEVEL_LOW) { 2554 int i; 2555 xfs_alert(mp, "%s: bno %lld dir: inode %lld", 2556 __func__, (long long)bno, 2557 (long long)dp->i_ino); 2558 for (i = 0; i < *nmaps; i++) { 2559 xfs_alert(mp, 2560 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d", 2561 i, 2562 (long long)irecs[i].br_startoff, 2563 (long long)irecs[i].br_startblock, 2564 (long long)irecs[i].br_blockcount, 2565 irecs[i].br_state); 2566 } 2567 } 2568 XFS_ERROR_REPORT("xfs_da_do_buf(1)", 2569 XFS_ERRLEVEL_LOW, mp); 2570 } 2571 goto out; 2572 } 2573 error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs); 2574 out: 2575 if (irecs != &irec) 2576 kmem_free(irecs); 2577 return error; 2578 } 2579 2580 /* 2581 * Get a buffer for the dir/attr block. 2582 */ 2583 int 2584 xfs_da_get_buf( 2585 struct xfs_trans *trans, 2586 struct xfs_inode *dp, 2587 xfs_dablk_t bno, 2588 xfs_daddr_t mappedbno, 2589 struct xfs_buf **bpp, 2590 int whichfork) 2591 { 2592 struct xfs_buf *bp; 2593 struct xfs_buf_map map; 2594 struct xfs_buf_map *mapp; 2595 int nmap; 2596 int error; 2597 2598 *bpp = NULL; 2599 mapp = ↦ 2600 nmap = 1; 2601 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2602 &mapp, &nmap); 2603 if (error) { 2604 /* mapping a hole is not an error, but we don't continue */ 2605 if (error == -1) 2606 error = 0; 2607 goto out_free; 2608 } 2609 2610 bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp, 2611 mapp, nmap, 0); 2612 error = bp ? bp->b_error : -EIO; 2613 if (error) { 2614 if (bp) 2615 xfs_trans_brelse(trans, bp); 2616 goto out_free; 2617 } 2618 2619 *bpp = bp; 2620 2621 out_free: 2622 if (mapp != &map) 2623 kmem_free(mapp); 2624 2625 return error; 2626 } 2627 2628 /* 2629 * Get a buffer for the dir/attr block, fill in the contents. 2630 */ 2631 int 2632 xfs_da_read_buf( 2633 struct xfs_trans *trans, 2634 struct xfs_inode *dp, 2635 xfs_dablk_t bno, 2636 xfs_daddr_t mappedbno, 2637 struct xfs_buf **bpp, 2638 int whichfork, 2639 const struct xfs_buf_ops *ops) 2640 { 2641 struct xfs_buf *bp; 2642 struct xfs_buf_map map; 2643 struct xfs_buf_map *mapp; 2644 int nmap; 2645 int error; 2646 2647 *bpp = NULL; 2648 mapp = ↦ 2649 nmap = 1; 2650 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2651 &mapp, &nmap); 2652 if (error) { 2653 /* mapping a hole is not an error, but we don't continue */ 2654 if (error == -1) 2655 error = 0; 2656 goto out_free; 2657 } 2658 2659 error = xfs_trans_read_buf_map(dp->i_mount, trans, 2660 dp->i_mount->m_ddev_targp, 2661 mapp, nmap, 0, &bp, ops); 2662 if (error) 2663 goto out_free; 2664 2665 if (whichfork == XFS_ATTR_FORK) 2666 xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF); 2667 else 2668 xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); 2669 *bpp = bp; 2670 out_free: 2671 if (mapp != &map) 2672 kmem_free(mapp); 2673 2674 return error; 2675 } 2676 2677 /* 2678 * Readahead the dir/attr block. 2679 */ 2680 int 2681 xfs_da_reada_buf( 2682 struct xfs_inode *dp, 2683 xfs_dablk_t bno, 2684 xfs_daddr_t mappedbno, 2685 int whichfork, 2686 const struct xfs_buf_ops *ops) 2687 { 2688 struct xfs_buf_map map; 2689 struct xfs_buf_map *mapp; 2690 int nmap; 2691 int error; 2692 2693 mapp = ↦ 2694 nmap = 1; 2695 error = xfs_dabuf_map(dp, bno, mappedbno, whichfork, 2696 &mapp, &nmap); 2697 if (error) { 2698 /* mapping a hole is not an error, but we don't continue */ 2699 if (error == -1) 2700 error = 0; 2701 goto out_free; 2702 } 2703 2704 mappedbno = mapp[0].bm_bn; 2705 xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops); 2706 2707 out_free: 2708 if (mapp != &map) 2709 kmem_free(mapp); 2710 2711 return error; 2712 } 2713