1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_inode.h" 15 #include "xfs_btree.h" 16 #include "xfs_ialloc.h" 17 #include "xfs_ialloc_btree.h" 18 #include "xfs_alloc.h" 19 #include "xfs_error.h" 20 #include "xfs_trace.h" 21 #include "xfs_cksum.h" 22 #include "xfs_trans.h" 23 #include "xfs_rmap.h" 24 25 26 STATIC int 27 xfs_inobt_get_minrecs( 28 struct xfs_btree_cur *cur, 29 int level) 30 { 31 return cur->bc_mp->m_inobt_mnr[level != 0]; 32 } 33 34 STATIC struct xfs_btree_cur * 35 xfs_inobt_dup_cursor( 36 struct xfs_btree_cur *cur) 37 { 38 return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, 39 cur->bc_private.a.agbp, cur->bc_private.a.agno, 40 cur->bc_btnum); 41 } 42 43 STATIC void 44 xfs_inobt_set_root( 45 struct xfs_btree_cur *cur, 46 union xfs_btree_ptr *nptr, 47 int inc) /* level change */ 48 { 49 struct xfs_buf *agbp = cur->bc_private.a.agbp; 50 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 51 52 agi->agi_root = nptr->s; 53 be32_add_cpu(&agi->agi_level, inc); 54 xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); 55 } 56 57 STATIC void 58 xfs_finobt_set_root( 59 struct xfs_btree_cur *cur, 60 union xfs_btree_ptr *nptr, 61 int inc) /* level change */ 62 { 63 struct xfs_buf *agbp = cur->bc_private.a.agbp; 64 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 65 66 agi->agi_free_root = nptr->s; 67 be32_add_cpu(&agi->agi_free_level, inc); 68 xfs_ialloc_log_agi(cur->bc_tp, agbp, 69 XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); 70 } 71 72 STATIC int 73 __xfs_inobt_alloc_block( 74 struct xfs_btree_cur *cur, 75 union xfs_btree_ptr *start, 76 union xfs_btree_ptr *new, 77 int *stat, 78 enum xfs_ag_resv_type resv) 79 { 80 xfs_alloc_arg_t args; /* block allocation args */ 81 int error; /* error return value */ 82 xfs_agblock_t sbno = be32_to_cpu(start->s); 83 84 memset(&args, 0, sizeof(args)); 85 args.tp = cur->bc_tp; 86 args.mp = cur->bc_mp; 87 args.oinfo = XFS_RMAP_OINFO_INOBT; 88 args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); 89 args.minlen = 1; 90 args.maxlen = 1; 91 args.prod = 1; 92 args.type = XFS_ALLOCTYPE_NEAR_BNO; 93 args.resv = resv; 94 95 error = xfs_alloc_vextent(&args); 96 if (error) 97 return error; 98 99 if (args.fsbno == NULLFSBLOCK) { 100 *stat = 0; 101 return 0; 102 } 103 ASSERT(args.len == 1); 104 105 new->s = cpu_to_be32(XFS_FSB_TO_AGBNO(args.mp, args.fsbno)); 106 *stat = 1; 107 return 0; 108 } 109 110 STATIC int 111 xfs_inobt_alloc_block( 112 struct xfs_btree_cur *cur, 113 union xfs_btree_ptr *start, 114 union xfs_btree_ptr *new, 115 int *stat) 116 { 117 return __xfs_inobt_alloc_block(cur, start, new, stat, XFS_AG_RESV_NONE); 118 } 119 120 STATIC int 121 xfs_finobt_alloc_block( 122 struct xfs_btree_cur *cur, 123 union xfs_btree_ptr *start, 124 union xfs_btree_ptr *new, 125 int *stat) 126 { 127 if (cur->bc_mp->m_finobt_nores) 128 return xfs_inobt_alloc_block(cur, start, new, stat); 129 return __xfs_inobt_alloc_block(cur, start, new, stat, 130 XFS_AG_RESV_METADATA); 131 } 132 133 STATIC int 134 __xfs_inobt_free_block( 135 struct xfs_btree_cur *cur, 136 struct xfs_buf *bp, 137 enum xfs_ag_resv_type resv) 138 { 139 return xfs_free_extent(cur->bc_tp, 140 XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, 141 &XFS_RMAP_OINFO_INOBT, resv); 142 } 143 144 STATIC int 145 xfs_inobt_free_block( 146 struct xfs_btree_cur *cur, 147 struct xfs_buf *bp) 148 { 149 return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_NONE); 150 } 151 152 STATIC int 153 xfs_finobt_free_block( 154 struct xfs_btree_cur *cur, 155 struct xfs_buf *bp) 156 { 157 if (cur->bc_mp->m_finobt_nores) 158 return xfs_inobt_free_block(cur, bp); 159 return __xfs_inobt_free_block(cur, bp, XFS_AG_RESV_METADATA); 160 } 161 162 STATIC int 163 xfs_inobt_get_maxrecs( 164 struct xfs_btree_cur *cur, 165 int level) 166 { 167 return cur->bc_mp->m_inobt_mxr[level != 0]; 168 } 169 170 STATIC void 171 xfs_inobt_init_key_from_rec( 172 union xfs_btree_key *key, 173 union xfs_btree_rec *rec) 174 { 175 key->inobt.ir_startino = rec->inobt.ir_startino; 176 } 177 178 STATIC void 179 xfs_inobt_init_high_key_from_rec( 180 union xfs_btree_key *key, 181 union xfs_btree_rec *rec) 182 { 183 __u32 x; 184 185 x = be32_to_cpu(rec->inobt.ir_startino); 186 x += XFS_INODES_PER_CHUNK - 1; 187 key->inobt.ir_startino = cpu_to_be32(x); 188 } 189 190 STATIC void 191 xfs_inobt_init_rec_from_cur( 192 struct xfs_btree_cur *cur, 193 union xfs_btree_rec *rec) 194 { 195 rec->inobt.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino); 196 if (xfs_sb_version_hassparseinodes(&cur->bc_mp->m_sb)) { 197 rec->inobt.ir_u.sp.ir_holemask = 198 cpu_to_be16(cur->bc_rec.i.ir_holemask); 199 rec->inobt.ir_u.sp.ir_count = cur->bc_rec.i.ir_count; 200 rec->inobt.ir_u.sp.ir_freecount = cur->bc_rec.i.ir_freecount; 201 } else { 202 /* ir_holemask/ir_count not supported on-disk */ 203 rec->inobt.ir_u.f.ir_freecount = 204 cpu_to_be32(cur->bc_rec.i.ir_freecount); 205 } 206 rec->inobt.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free); 207 } 208 209 /* 210 * initial value of ptr for lookup 211 */ 212 STATIC void 213 xfs_inobt_init_ptr_from_cur( 214 struct xfs_btree_cur *cur, 215 union xfs_btree_ptr *ptr) 216 { 217 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); 218 219 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); 220 221 ptr->s = agi->agi_root; 222 } 223 224 STATIC void 225 xfs_finobt_init_ptr_from_cur( 226 struct xfs_btree_cur *cur, 227 union xfs_btree_ptr *ptr) 228 { 229 struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); 230 231 ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); 232 ptr->s = agi->agi_free_root; 233 } 234 235 STATIC int64_t 236 xfs_inobt_key_diff( 237 struct xfs_btree_cur *cur, 238 union xfs_btree_key *key) 239 { 240 return (int64_t)be32_to_cpu(key->inobt.ir_startino) - 241 cur->bc_rec.i.ir_startino; 242 } 243 244 STATIC int64_t 245 xfs_inobt_diff_two_keys( 246 struct xfs_btree_cur *cur, 247 union xfs_btree_key *k1, 248 union xfs_btree_key *k2) 249 { 250 return (int64_t)be32_to_cpu(k1->inobt.ir_startino) - 251 be32_to_cpu(k2->inobt.ir_startino); 252 } 253 254 static xfs_failaddr_t 255 xfs_inobt_verify( 256 struct xfs_buf *bp) 257 { 258 struct xfs_mount *mp = bp->b_target->bt_mount; 259 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 260 xfs_failaddr_t fa; 261 unsigned int level; 262 263 if (!xfs_verify_magic(bp, block->bb_magic)) 264 return __this_address; 265 266 /* 267 * During growfs operations, we can't verify the exact owner as the 268 * perag is not fully initialised and hence not attached to the buffer. 269 * 270 * Similarly, during log recovery we will have a perag structure 271 * attached, but the agi information will not yet have been initialised 272 * from the on disk AGI. We don't currently use any of this information, 273 * but beware of the landmine (i.e. need to check pag->pagi_init) if we 274 * ever do. 275 */ 276 if (xfs_sb_version_hascrc(&mp->m_sb)) { 277 fa = xfs_btree_sblock_v5hdr_verify(bp); 278 if (fa) 279 return fa; 280 } 281 282 /* level verification */ 283 level = be16_to_cpu(block->bb_level); 284 if (level >= mp->m_in_maxlevels) 285 return __this_address; 286 287 return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); 288 } 289 290 static void 291 xfs_inobt_read_verify( 292 struct xfs_buf *bp) 293 { 294 xfs_failaddr_t fa; 295 296 if (!xfs_btree_sblock_verify_crc(bp)) 297 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 298 else { 299 fa = xfs_inobt_verify(bp); 300 if (fa) 301 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 302 } 303 304 if (bp->b_error) 305 trace_xfs_btree_corrupt(bp, _RET_IP_); 306 } 307 308 static void 309 xfs_inobt_write_verify( 310 struct xfs_buf *bp) 311 { 312 xfs_failaddr_t fa; 313 314 fa = xfs_inobt_verify(bp); 315 if (fa) { 316 trace_xfs_btree_corrupt(bp, _RET_IP_); 317 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 318 return; 319 } 320 xfs_btree_sblock_calc_crc(bp); 321 322 } 323 324 const struct xfs_buf_ops xfs_inobt_buf_ops = { 325 .name = "xfs_inobt", 326 .magic = { cpu_to_be32(XFS_IBT_MAGIC), cpu_to_be32(XFS_IBT_CRC_MAGIC) }, 327 .verify_read = xfs_inobt_read_verify, 328 .verify_write = xfs_inobt_write_verify, 329 .verify_struct = xfs_inobt_verify, 330 }; 331 332 const struct xfs_buf_ops xfs_finobt_buf_ops = { 333 .name = "xfs_finobt", 334 .magic = { cpu_to_be32(XFS_FIBT_MAGIC), 335 cpu_to_be32(XFS_FIBT_CRC_MAGIC) }, 336 .verify_read = xfs_inobt_read_verify, 337 .verify_write = xfs_inobt_write_verify, 338 .verify_struct = xfs_inobt_verify, 339 }; 340 341 STATIC int 342 xfs_inobt_keys_inorder( 343 struct xfs_btree_cur *cur, 344 union xfs_btree_key *k1, 345 union xfs_btree_key *k2) 346 { 347 return be32_to_cpu(k1->inobt.ir_startino) < 348 be32_to_cpu(k2->inobt.ir_startino); 349 } 350 351 STATIC int 352 xfs_inobt_recs_inorder( 353 struct xfs_btree_cur *cur, 354 union xfs_btree_rec *r1, 355 union xfs_btree_rec *r2) 356 { 357 return be32_to_cpu(r1->inobt.ir_startino) + XFS_INODES_PER_CHUNK <= 358 be32_to_cpu(r2->inobt.ir_startino); 359 } 360 361 static const struct xfs_btree_ops xfs_inobt_ops = { 362 .rec_len = sizeof(xfs_inobt_rec_t), 363 .key_len = sizeof(xfs_inobt_key_t), 364 365 .dup_cursor = xfs_inobt_dup_cursor, 366 .set_root = xfs_inobt_set_root, 367 .alloc_block = xfs_inobt_alloc_block, 368 .free_block = xfs_inobt_free_block, 369 .get_minrecs = xfs_inobt_get_minrecs, 370 .get_maxrecs = xfs_inobt_get_maxrecs, 371 .init_key_from_rec = xfs_inobt_init_key_from_rec, 372 .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec, 373 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 374 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, 375 .key_diff = xfs_inobt_key_diff, 376 .buf_ops = &xfs_inobt_buf_ops, 377 .diff_two_keys = xfs_inobt_diff_two_keys, 378 .keys_inorder = xfs_inobt_keys_inorder, 379 .recs_inorder = xfs_inobt_recs_inorder, 380 }; 381 382 static const struct xfs_btree_ops xfs_finobt_ops = { 383 .rec_len = sizeof(xfs_inobt_rec_t), 384 .key_len = sizeof(xfs_inobt_key_t), 385 386 .dup_cursor = xfs_inobt_dup_cursor, 387 .set_root = xfs_finobt_set_root, 388 .alloc_block = xfs_finobt_alloc_block, 389 .free_block = xfs_finobt_free_block, 390 .get_minrecs = xfs_inobt_get_minrecs, 391 .get_maxrecs = xfs_inobt_get_maxrecs, 392 .init_key_from_rec = xfs_inobt_init_key_from_rec, 393 .init_high_key_from_rec = xfs_inobt_init_high_key_from_rec, 394 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 395 .init_ptr_from_cur = xfs_finobt_init_ptr_from_cur, 396 .key_diff = xfs_inobt_key_diff, 397 .buf_ops = &xfs_finobt_buf_ops, 398 .diff_two_keys = xfs_inobt_diff_two_keys, 399 .keys_inorder = xfs_inobt_keys_inorder, 400 .recs_inorder = xfs_inobt_recs_inorder, 401 }; 402 403 /* 404 * Allocate a new inode btree cursor. 405 */ 406 struct xfs_btree_cur * /* new inode btree cursor */ 407 xfs_inobt_init_cursor( 408 struct xfs_mount *mp, /* file system mount point */ 409 struct xfs_trans *tp, /* transaction pointer */ 410 struct xfs_buf *agbp, /* buffer for agi structure */ 411 xfs_agnumber_t agno, /* allocation group number */ 412 xfs_btnum_t btnum) /* ialloc or free ino btree */ 413 { 414 struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp); 415 struct xfs_btree_cur *cur; 416 417 cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_NOFS); 418 419 cur->bc_tp = tp; 420 cur->bc_mp = mp; 421 cur->bc_btnum = btnum; 422 if (btnum == XFS_BTNUM_INO) { 423 cur->bc_nlevels = be32_to_cpu(agi->agi_level); 424 cur->bc_ops = &xfs_inobt_ops; 425 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); 426 } else { 427 cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); 428 cur->bc_ops = &xfs_finobt_ops; 429 cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2); 430 } 431 432 cur->bc_blocklog = mp->m_sb.sb_blocklog; 433 434 if (xfs_sb_version_hascrc(&mp->m_sb)) 435 cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; 436 437 cur->bc_private.a.agbp = agbp; 438 cur->bc_private.a.agno = agno; 439 440 return cur; 441 } 442 443 /* 444 * Calculate number of records in an inobt btree block. 445 */ 446 int 447 xfs_inobt_maxrecs( 448 struct xfs_mount *mp, 449 int blocklen, 450 int leaf) 451 { 452 blocklen -= XFS_INOBT_BLOCK_LEN(mp); 453 454 if (leaf) 455 return blocklen / sizeof(xfs_inobt_rec_t); 456 return blocklen / (sizeof(xfs_inobt_key_t) + sizeof(xfs_inobt_ptr_t)); 457 } 458 459 /* 460 * Convert the inode record holemask to an inode allocation bitmap. The inode 461 * allocation bitmap is inode granularity and specifies whether an inode is 462 * physically allocated on disk (not whether the inode is considered allocated 463 * or free by the fs). 464 * 465 * A bit value of 1 means the inode is allocated, a value of 0 means it is free. 466 */ 467 uint64_t 468 xfs_inobt_irec_to_allocmask( 469 struct xfs_inobt_rec_incore *rec) 470 { 471 uint64_t bitmap = 0; 472 uint64_t inodespbit; 473 int nextbit; 474 uint allocbitmap; 475 476 /* 477 * The holemask has 16-bits for a 64 inode record. Therefore each 478 * holemask bit represents multiple inodes. Create a mask of bits to set 479 * in the allocmask for each holemask bit. 480 */ 481 inodespbit = (1 << XFS_INODES_PER_HOLEMASK_BIT) - 1; 482 483 /* 484 * Allocated inodes are represented by 0 bits in holemask. Invert the 0 485 * bits to 1 and convert to a uint so we can use xfs_next_bit(). Mask 486 * anything beyond the 16 holemask bits since this casts to a larger 487 * type. 488 */ 489 allocbitmap = ~rec->ir_holemask & ((1 << XFS_INOBT_HOLEMASK_BITS) - 1); 490 491 /* 492 * allocbitmap is the inverted holemask so every set bit represents 493 * allocated inodes. To expand from 16-bit holemask granularity to 494 * 64-bit (e.g., bit-per-inode), set inodespbit bits in the target 495 * bitmap for every holemask bit. 496 */ 497 nextbit = xfs_next_bit(&allocbitmap, 1, 0); 498 while (nextbit != -1) { 499 ASSERT(nextbit < (sizeof(rec->ir_holemask) * NBBY)); 500 501 bitmap |= (inodespbit << 502 (nextbit * XFS_INODES_PER_HOLEMASK_BIT)); 503 504 nextbit = xfs_next_bit(&allocbitmap, 1, nextbit + 1); 505 } 506 507 return bitmap; 508 } 509 510 #if defined(DEBUG) || defined(XFS_WARN) 511 /* 512 * Verify that an in-core inode record has a valid inode count. 513 */ 514 int 515 xfs_inobt_rec_check_count( 516 struct xfs_mount *mp, 517 struct xfs_inobt_rec_incore *rec) 518 { 519 int inocount = 0; 520 int nextbit = 0; 521 uint64_t allocbmap; 522 int wordsz; 523 524 wordsz = sizeof(allocbmap) / sizeof(unsigned int); 525 allocbmap = xfs_inobt_irec_to_allocmask(rec); 526 527 nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, nextbit); 528 while (nextbit != -1) { 529 inocount++; 530 nextbit = xfs_next_bit((uint *) &allocbmap, wordsz, 531 nextbit + 1); 532 } 533 534 if (inocount != rec->ir_count) 535 return -EFSCORRUPTED; 536 537 return 0; 538 } 539 #endif /* DEBUG */ 540 541 static xfs_extlen_t 542 xfs_inobt_max_size( 543 struct xfs_mount *mp, 544 xfs_agnumber_t agno) 545 { 546 xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); 547 548 /* Bail out if we're uninitialized, which can happen in mkfs. */ 549 if (mp->m_inobt_mxr[0] == 0) 550 return 0; 551 552 /* 553 * The log is permanently allocated, so the space it occupies will 554 * never be available for the kinds of things that would require btree 555 * expansion. We therefore can pretend the space isn't there. 556 */ 557 if (mp->m_sb.sb_logstart && 558 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) 559 agblocks -= mp->m_sb.sb_logblocks; 560 561 return xfs_btree_calc_size(mp->m_inobt_mnr, 562 (uint64_t)agblocks * mp->m_sb.sb_inopblock / 563 XFS_INODES_PER_CHUNK); 564 } 565 566 static int 567 xfs_inobt_count_blocks( 568 struct xfs_mount *mp, 569 struct xfs_trans *tp, 570 xfs_agnumber_t agno, 571 xfs_btnum_t btnum, 572 xfs_extlen_t *tree_blocks) 573 { 574 struct xfs_buf *agbp; 575 struct xfs_btree_cur *cur; 576 int error; 577 578 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 579 if (error) 580 return error; 581 582 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); 583 error = xfs_btree_count_blocks(cur, tree_blocks); 584 xfs_btree_del_cursor(cur, error); 585 xfs_trans_brelse(tp, agbp); 586 587 return error; 588 } 589 590 /* 591 * Figure out how many blocks to reserve and how many are used by this btree. 592 */ 593 int 594 xfs_finobt_calc_reserves( 595 struct xfs_mount *mp, 596 struct xfs_trans *tp, 597 xfs_agnumber_t agno, 598 xfs_extlen_t *ask, 599 xfs_extlen_t *used) 600 { 601 xfs_extlen_t tree_len = 0; 602 int error; 603 604 if (!xfs_sb_version_hasfinobt(&mp->m_sb)) 605 return 0; 606 607 error = xfs_inobt_count_blocks(mp, tp, agno, XFS_BTNUM_FINO, &tree_len); 608 if (error) 609 return error; 610 611 *ask += xfs_inobt_max_size(mp, agno); 612 *used += tree_len; 613 return 0; 614 } 615 616 /* Calculate the inobt btree size for some records. */ 617 xfs_extlen_t 618 xfs_iallocbt_calc_size( 619 struct xfs_mount *mp, 620 unsigned long long len) 621 { 622 return xfs_btree_calc_size(mp->m_inobt_mnr, len); 623 } 624