1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_btree.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_ialloc_btree.h" 17 #include "xfs_alloc.h" 18 #include "xfs_error.h" 19 #include "xfs_trace.h" 20 #include "xfs_trans.h" 21 #include "xfs_rmap.h" 22 23 24 STATIC int 25 xfs_inobt_get_minrecs( 26 struct xfs_btree_cur *cur, 27 int level) 28 { 29 return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0]; 30 } 31 32 STATIC struct xfs_btree_cur * 33 xfs_inobt_dup_cursor( 34 struct xfs_btree_cur *cur) 35 { 36 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, 37 cur->bc_private.a.agbp, cur->bc_private.a.agno, 38 cur->bc_btnum); 39 } 40 41 STATIC void 42 xfs_inobt_set_root( 43 struct xfs_btree_cur *cur, 44 union xfs_btree_ptr *nptr, 45 int inc) /* level change */ 46 { 47 struct xfs_buf *agbp = cur->bc_private.a.agbp; 48 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 49 50 agi->agi_root = nptr->s; 51 be32_add_cpu(&agi->agi_level, inc); 52 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); 53 } 54 55 STATIC void 56 xfs_finobt_set_root( 57 struct xfs_btree_cur *cur, 58 union xfs_btree_ptr *nptr, 59 int inc) /* level change */ 60 { 61 struct xfs_buf *agbp = cur->bc_private.a.agbp; 62 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 63 64 agi->agi_free_root = nptr->s; 65 be32_add_cpu(&agi->agi_free_level, inc); 66 xfs_ialloc_log_agi(cur->bc_tp, agbp, 67 XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); 68 } 69 70 STATIC int 71 __xfs_inobt_alloc_block( 72 struct xfs_btree_cur *cur, 73 union xfs_btree_ptr *start, 74 union xfs_btree_ptr *new, 75 int *stat, 76 enum xfs_ag_resv_type resv) 77 { 78 xfs_alloc_arg_t args; /* block allocation args */ 79 int error; /* error return value */ 80 xfs_agblock_t sbno = be32_to_cpu(start->s); 81 82 memset(&args, 0, sizeof(args)); 83 args.tp = cur->bc_tp; 84 args.mp = cur->bc_mp; 85 args.oinfo = XFS_RMAP_OINFO_INOBT; 86 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); 87 args.minlen = 1; 88 args.maxlen = 1; 89 args.prod = 1; 90 args.type = XFS_ALLOCTYPE_NEAR_BNO; 91 args.resv = resv; 92 93 error = xfs_alloc_vextent(&args); 94 if (error) 95 return error; 96 97 if (args.fsbno == NULLFSBLOCK) { 98 *stat = 0; 99 return 0; 100 } 101 ASSERT(args.len == 1); 102 103 new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); 104 *stat = 1; 105 return 0; 106 } 107 108 STATIC int 109 xfs_inobt_alloc_block( 110 struct xfs_btree_cur *cur, 111 union xfs_btree_ptr *start, 112 union xfs_btree_ptr *new, 113 int *stat) 114 { 115 return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); 116 } 117 118 STATIC int 119 xfs_finobt_alloc_block( 120 struct xfs_btree_cur *cur, 121 union xfs_btree_ptr *start, 122 union xfs_btree_ptr *new, 123 int *stat) 124 { 125 if (cur->bc_mp->m_finobt_nores) 126 return xfs_inobt_alloc_block(cur, start, new, stat); 127 return __xfs_inobt_alloc_block(cur, start, new, stat, 128 XFS_AG_RESV_METADATA); 129 } 130 131 STATIC int 132 __xfs_inobt_free_block( 133 struct xfs_btree_cur *cur, 134 struct xfs_buf *bp, 135 enum xfs_ag_resv_type resv) 136 { 137 return xfs_free_extent(cur->bc_tp, 138 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 139 &XFS_RMAP_OINFO_INOBT, resv); 140 } 141 142 STATIC int 143 xfs_inobt_free_block( 144 struct xfs_btree_cur *cur, 145 struct xfs_buf *bp) 146 { 147 return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE); 148 } 149 150 STATIC int 151 xfs_finobt_free_block( 152 struct xfs_btree_cur *cur, 153 struct xfs_buf *bp) 154 { 155 if (cur->bc_mp->m_finobt_nores) 156 return xfs_inobt_free_block(cur, bp); 157 return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA); 158 } 159 160 STATIC int 161 xfs_inobt_get_maxrecs( 162 struct xfs_btree_cur *cur, 163 int level) 164 { 165 return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0]; 166 } 167 168 STATIC void 169 xfs_inobt_init_key_from_rec( 170 union xfs_btree_key *key, 171 union xfs_btree_rec *rec) 172 { 173 key->inobt.ir_startino = rec->inobt.ir_startino; 174 } 175 176 STATIC void 177 xfs_inobt_init_high_key_from_rec( 178 union xfs_btree_key *key, 179 union xfs_btree_rec *rec) 180 { 181 __u32 x; 182 183 x = be32_to_cpu(rec->inobt.ir_startino); 184 x += XFS_INODES_PER_CHUNK - 1; 185 key->inobt.ir_startino = cpu_to_be32(x); 186 } 187 188 STATIC void 189 xfs_inobt_init_rec_from_cur( 190 struct xfs_btree_cur *cur, 191 union xfs_btree_rec *rec) 192 { 193 rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); 194 if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) { 195 rec->inobt.ir_u.sp.ir_holemask = 196 cpu_to_be16(cur->bc_rec.i.ir_holemask); 197 rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count; 198 rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount; 199 } else { 200 /* ir_holemask/ir_count not supported on-disk */ 201 rec->inobt.ir_u.f.ir_freecount = 202 cpu_to_be32(cur->bc_rec.i.ir_freecount); 203 } 204 rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); 205 } 206 207 /* 208 * initial value of ptr for lookup 209 */ 210 STATIC void 211 xfs_inobt_init_ptr_from_cur( 212 struct xfs_btree_cur *cur, 213 union xfs_btree_ptr *ptr) 214 { 215 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); 216 217 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); 218 219 ptr->s = agi->agi_root; 220 } 221 222 STATIC void 223 xfs_finobt_init_ptr_from_cur( 224 struct xfs_btree_cur *cur, 225 union xfs_btree_ptr *ptr) 226 { 227 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); 228 229 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); 230 ptr->s = agi->agi_free_root; 231 } 232 233 STATIC int64_t 234 xfs_inobt_key_diff( 235 struct xfs_btree_cur *cur, 236 union xfs_btree_key *key) 237 { 238 return (int64_t)be32_to_cpu(key->inobt.ir_startino) - 239 cur->bc_rec.i.ir_startino; 240 } 241 242 STATIC int64_t 243 xfs_inobt_diff_two_keys( 244 struct xfs_btree_cur *cur, 245 union xfs_btree_key *k1, 246 union xfs_btree_key *k2) 247 { 248 return (int64_t)be32_to_cpu(k1->inobt.ir_startino) - 249 be32_to_cpu(k2->inobt.ir_startino); 250 } 251 252 static xfs_failaddr_t 253 xfs_inobt_verify( 254 struct xfs_buf *bp) 255 { 256 struct xfs_mount *mp = bp->b_mount; 257 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 258 xfs_failaddr_t fa; 259 unsigned int level; 260 261 if (!xfs_verify_magic(bp, block->bb_magic)) 262 return __this_address; 263 264 /* 265 * During growfs operations, we can't verify the exact owner as the 266 * perag is not fully initialised and hence not attached to the buffer. 267 * 268 * Similarly, during log recovery we will have a perag structure 269 * attached, but the agi information will not yet have been initialised 270 * from the on disk AGI. We don't currently use any of this information, 271 * but beware of the landmine (i.e. need to check pag->pagi_init) if we 272 * ever do. 273 */ 274 if (xfs_sb_version_hascrc(&mp->m_sb)) { 275 fa = xfs_btree_sblock_v5hdr_verify(bp); 276 if (fa) 277 return fa; 278 } 279 280 /* level verification */ 281 level = be16_to_cpu(block->bb_level); 282 if (level >= M_IGEO(mp)->inobt_maxlevels) 283 return __this_address; 284 285 return xfs_btree_sblock_verify(bp, 286 M_IGEO(mp)->inobt_mxr[level != 0]); 287 } 288 289 static void 290 xfs_inobt_read_verify( 291 struct xfs_buf *bp) 292 { 293 xfs_failaddr_t fa; 294 295 if (!xfs_btree_sblock_verify_crc(bp)) 296 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 297 else { 298 fa = xfs_inobt_verify(bp); 299 if (fa) 300 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 301 } 302 303 if (bp->b_error) 304 trace_xfs_btree_corrupt(bp, _RET_IP_); 305 } 306 307 static void 308 xfs_inobt_write_verify( 309 struct xfs_buf *bp) 310 { 311 xfs_failaddr_t fa; 312 313 fa = xfs_inobt_verify(bp); 314 if (fa) { 315 trace_xfs_btree_corrupt(bp, _RET_IP_); 316 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 317 return; 318 } 319 xfs_btree_sblock_calc_crc(bp); 320 321 } 322 323 const struct xfs_buf_ops xfs_inobt_buf_ops = { 324 .name = "xfs_inobt", 325 .magic = { cpu_to_be32(XFS_IBT_MAGIC), cpu_to_be32(XFS_IBT_CRC_MAGIC) }, 326 .verify_read = xfs_inobt_read_verify, 327 .verify_write = xfs_inobt_write_verify, 328 .verify_struct = xfs_inobt_verify, 329 }; 330 331 const struct xfs_buf_ops xfs_finobt_buf_ops = { 332 .name = "xfs_finobt", 333 .magic = { cpu_to_be32(XFS_FIBT_MAGIC), 334 cpu_to_be32(XFS_FIBT_CRC_MAGIC) }, 335 .verify_read = xfs_inobt_read_verify, 336 .verify_write = xfs_inobt_write_verify, 337 .verify_struct = xfs_inobt_verify, 338 }; 339 340 STATIC int 341 xfs_inobt_keys_inorder( 342 struct xfs_btree_cur *cur, 343 union xfs_btree_key *k1, 344 union xfs_btree_key *k2) 345 { 346 return be32_to_cpu(k1->inobt.ir_startino) < 347 be32_to_cpu(k2->inobt.ir_startino); 348 } 349 350 STATIC int 351 xfs_inobt_recs_inorder( 352 struct xfs_btree_cur *cur, 353 union xfs_btree_rec *r1, 354 union xfs_btree_rec *r2) 355 { 356 return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= 357 be32_to_cpu(r2->inobt.ir_startino); 358 } 359 360 static const struct xfs_btree_ops xfs_inobt_ops = { 361 .rec_len = sizeof(xfs_inobt_rec_t), 362 .key_len = sizeof(xfs_inobt_key_t), 363 364 .dup_cursor = xfs_inobt_dup_cursor, 365 .set_root = xfs_inobt_set_root, 366 .alloc_block = xfs_inobt_alloc_block, 367 .free_block = xfs_inobt_free_block, 368 .get_minrecs = xfs_inobt_get_minrecs, 369 .get_maxrecs = xfs_inobt_get_maxrecs, 370 .init_key_from_rec = xfs_inobt_init_key_from_rec, 371 .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec, 372 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 373 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, 374 .key_diff = xfs_inobt_key_diff, 375 .buf_ops = &xfs_inobt_buf_ops, 376 .diff_two_keys = xfs_inobt_diff_two_keys, 377 .keys_inorder = xfs_inobt_keys_inorder, 378 .recs_inorder = xfs_inobt_recs_inorder, 379 }; 380 381 static const struct xfs_btree_ops xfs_finobt_ops = { 382 .rec_len = sizeof(xfs_inobt_rec_t), 383 .key_len = sizeof(xfs_inobt_key_t), 384 385 .dup_cursor = xfs_inobt_dup_cursor, 386 .set_root = xfs_finobt_set_root, 387 .alloc_block = xfs_finobt_alloc_block, 388 .free_block = xfs_finobt_free_block, 389 .get_minrecs = xfs_inobt_get_minrecs, 390 .get_maxrecs = xfs_inobt_get_maxrecs, 391 .init_key_from_rec = xfs_inobt_init_key_from_rec, 392 .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec, 393 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 394 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, 395 .key_diff = xfs_inobt_key_diff, 396 .buf_ops = &xfs_finobt_buf_ops, 397 .diff_two_keys = xfs_inobt_diff_two_keys, 398 .keys_inorder = xfs_inobt_keys_inorder, 399 .recs_inorder = xfs_inobt_recs_inorder, 400 }; 401 402 /* 403 * Allocate a new inode btree cursor. 404 */ 405 struct xfs_btree_cur * /* new inode btree cursor */ 406 xfs_inobt_init_cursor( 407 struct xfs_mount *mp, /* file system mount point */ 408 struct xfs_trans *tp, /* transaction pointer */ 409 struct xfs_buf *agbp, /* buffer for agi structure */ 410 xfs_agnumber_t agno, /* allocation group number */ 411 xfs_btnum_t btnum) /* ialloc or free ino btree */ 412 { 413 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 414 struct xfs_btree_cur *cur; 415 416 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); 417 418 cur->bc_tp = tp; 419 cur->bc_mp = mp; 420 cur->bc_btnum = btnum; 421 if (btnum == XFS_BTNUM_INO) { 422 cur->bc_nlevels = be32_to_cpu(agi->agi_level); 423 cur->bc_ops = &xfs_inobt_ops; 424 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); 425 } else { 426 cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); 427 cur->bc_ops = &xfs_finobt_ops; 428 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2); 429 } 430 431 cur->bc_blocklog = mp->m_sb.sb_blocklog; 432 433 if (xfs_sb_version_hascrc(&mp->m_sb)) 434 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; 435 436 cur->bc_private.a.agbp = agbp; 437 cur->bc_private.a.agno = agno; 438 439 return cur; 440 } 441 442 /* 443 * Calculate number of records in an inobt btree block. 444 */ 445 int 446 xfs_inobt_maxrecs( 447 struct xfs_mount *mp, 448 int blocklen, 449 int leaf) 450 { 451 blocklen -= XFS_INOBT_BLOCK_LEN(mp); 452 453 if (leaf) 454 return blocklen / sizeof(xfs_inobt_rec_t); 455 return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); 456 } 457 458 /* 459 * Convert the inode record holemask to an inode allocation bitmap. The inode 460 * allocation bitmap is inode granularity and specifies whether an inode is 461 * physically allocated on disk (not whether the inode is considered allocated 462 * or free by the fs). 463 * 464 * A bit value of 1 means the inode is allocated, a value of 0 means it is free. 465 */ 466 uint64_t 467 xfs_inobt_irec_to_allocmask( 468 struct xfs_inobt_rec_incore *rec) 469 { 470 uint64_t bitmap = 0; 471 uint64_t inodespbit; 472 int nextbit; 473 uint allocbitmap; 474 475 /* 476 * The holemask has 16-bits for a 64 inode record. Therefore each 477 * holemask bit represents multiple inodes. Create a mask of bits to set 478 * in the allocmask for each holemask bit. 479 */ 480 inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1; 481 482 /* 483 * Allocated inodes are represented by 0 bits in holemask. Invert the 0 484 * bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask 485 * anything beyond the 16 holemask bits since this casts to a larger 486 * type. 487 */ 488 allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1); 489 490 /* 491 * allocbitmap is the inverted holemask so every set bit represents 492 * allocated inodes. To expand from 16-bit holemask granularity to 493 * 64-bit (e.g., bit-per-inode), set inodespbit bits in the target 494 * bitmap for every holemask bit. 495 */ 496 nextbit = xfs_next_bit(&allocbitmap, 1, 0); 497 while (nextbit != -1) { 498 ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY)); 499 500 bitmap |= (inodespbit << 501 (nextbit * XFS_INODES_PER_HOLEMASK_BIT)); 502 503 nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1); 504 } 505 506 return bitmap; 507 } 508 509 #if defined(DEBUG) || defined(XFS_WARN) 510 /* 511 * Verify that an in-core inode record has a valid inode count. 512 */ 513 int 514 xfs_inobt_rec_check_count( 515 struct xfs_mount *mp, 516 struct xfs_inobt_rec_incore *rec) 517 { 518 int inocount = 0; 519 int nextbit = 0; 520 uint64_t allocbmap; 521 int wordsz; 522 523 wordsz = sizeof(allocbmap) / sizeof(unsigned int); 524 allocbmap = xfs_inobt_irec_to_allocmask(rec); 525 526 nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit); 527 while (nextbit != -1) { 528 inocount++; 529 nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, 530 nextbit + 1); 531 } 532 533 if (inocount != rec->ir_count) 534 return -EFSCORRUPTED; 535 536 return 0; 537 } 538 #endif /* DEBUG */ 539 540 static xfs_extlen_t 541 xfs_inobt_max_size( 542 struct xfs_mount *mp, 543 xfs_agnumber_t agno) 544 { 545 xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); 546 547 /* Bail out if we're uninitialized, which can happen in mkfs. */ 548 if (M_IGEO(mp)->inobt_mxr[0] == 0) 549 return 0; 550 551 /* 552 * The log is permanently allocated, so the space it occupies will 553 * never be available for the kinds of things that would require btree 554 * expansion. We therefore can pretend the space isn't there. 555 */ 556 if (mp->m_sb.sb_logstart && 557 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) 558 agblocks -= mp->m_sb.sb_logblocks; 559 560 return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, 561 (uint64_t)agblocks * mp->m_sb.sb_inopblock / 562 XFS_INODES_PER_CHUNK); 563 } 564 565 /* Read AGI and create inobt cursor. */ 566 int 567 xfs_inobt_cur( 568 struct xfs_mount *mp, 569 struct xfs_trans *tp, 570 xfs_agnumber_t agno, 571 xfs_btnum_t which, 572 struct xfs_btree_cur **curpp, 573 struct xfs_buf **agi_bpp) 574 { 575 struct xfs_btree_cur *cur; 576 int error; 577 578 ASSERT(*agi_bpp == NULL); 579 ASSERT(*curpp == NULL); 580 581 error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp); 582 if (error) 583 return error; 584 585 cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); 586 if (!cur) { 587 xfs_trans_brelse(tp, *agi_bpp); 588 *agi_bpp = NULL; 589 return -ENOMEM; 590 } 591 *curpp = cur; 592 return 0; 593 } 594 595 static int 596 xfs_inobt_count_blocks( 597 struct xfs_mount *mp, 598 struct xfs_trans *tp, 599 xfs_agnumber_t agno, 600 xfs_btnum_t btnum, 601 xfs_extlen_t *tree_blocks) 602 { 603 struct xfs_buf *agbp = NULL; 604 struct xfs_btree_cur *cur = NULL; 605 int error; 606 607 error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp); 608 if (error) 609 return error; 610 611 error = xfs_btree_count_blocks(cur, tree_blocks); 612 xfs_btree_del_cursor(cur, error); 613 xfs_trans_brelse(tp, agbp); 614 615 return error; 616 } 617 618 /* 619 * Figure out how many blocks to reserve and how many are used by this btree. 620 */ 621 int 622 xfs_finobt_calc_reserves( 623 struct xfs_mount *mp, 624 struct xfs_trans *tp, 625 xfs_agnumber_t agno, 626 xfs_extlen_t *ask, 627 xfs_extlen_t *used) 628 { 629 xfs_extlen_t tree_len = 0; 630 int error; 631 632 if (!xfs_sb_version_hasfinobt(&mp->m_sb)) 633 return 0; 634 635 error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, &tree_len); 636 if (error) 637 return error; 638 639 *ask += xfs_inobt_max_size(mp, agno); 640 *used += tree_len; 641 return 0; 642 } 643 644 /* Calculate the inobt btree size for some records. */ 645 xfs_extlen_t 646 xfs_iallocbt_calc_size( 647 struct xfs_mount *mp, 648 unsigned long long len) 649 { 650 return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len); 651 } 652