1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_shared.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_sb.h" 14 #include "xfs_mount.h" 15 #include "xfs_defer.h" 16 #include "xfs_btree.h" 17 #include "xfs_rmap.h" 18 #include "xfs_alloc_btree.h" 19 #include "xfs_alloc.h" 20 #include "xfs_extent_busy.h" 21 #include "xfs_errortag.h" 22 #include "xfs_error.h" 23 #include "xfs_trace.h" 24 #include "xfs_trans.h" 25 #include "xfs_buf_item.h" 26 #include "xfs_log.h" 27 #include "xfs_ag_resv.h" 28 #include "xfs_bmap.h" 29 30 extern kmem_zone_t *xfs_bmap_free_item_zone; 31 32 struct workqueue_struct *xfs_alloc_wq; 33 34 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 35 36 #define XFSA_FIXUP_BNO_OK 1 37 #define XFSA_FIXUP_CNT_OK 2 38 39 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 40 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 41 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 42 43 /* 44 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in 45 * the beginning of the block for a proper header with the location information 46 * and CRC. 47 */ 48 unsigned int 49 xfs_agfl_size( 50 struct xfs_mount *mp) 51 { 52 unsigned int size = mp->m_sb.sb_sectsize; 53 54 if (xfs_sb_version_hascrc(&mp->m_sb)) 55 size -= sizeof(struct xfs_agfl); 56 57 return size / sizeof(xfs_agblock_t); 58 } 59 60 unsigned int 61 xfs_refc_block( 62 struct xfs_mount *mp) 63 { 64 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 65 return XFS_RMAP_BLOCK(mp) + 1; 66 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 67 return XFS_FIBT_BLOCK(mp) + 1; 68 return XFS_IBT_BLOCK(mp) + 1; 69 } 70 71 xfs_extlen_t 72 xfs_prealloc_blocks( 73 struct xfs_mount *mp) 74 { 75 if (xfs_sb_version_hasreflink(&mp->m_sb)) 76 return xfs_refc_block(mp) + 1; 77 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 78 return XFS_RMAP_BLOCK(mp) + 1; 79 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 80 return XFS_FIBT_BLOCK(mp) + 1; 81 return XFS_IBT_BLOCK(mp) + 1; 82 } 83 84 /* 85 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of 86 * AGF buffer (PV 947395), we place constraints on the relationship among 87 * actual allocations for data blocks, freelist blocks, and potential file data 88 * bmap btree blocks. However, these restrictions may result in no actual space 89 * allocated for a delayed extent, for example, a data block in a certain AG is 90 * allocated but there is no additional block for the additional bmap btree 91 * block due to a split of the bmap btree of the file. The result of this may 92 * lead to an infinite loop when the file gets flushed to disk and all delayed 93 * extents need to be actually allocated. To get around this, we explicitly set 94 * aside a few blocks which will not be reserved in delayed allocation. 95 * 96 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a 97 * potential split of the file's bmap btree. 98 */ 99 unsigned int 100 xfs_alloc_set_aside( 101 struct xfs_mount *mp) 102 { 103 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); 104 } 105 106 /* 107 * When deciding how much space to allocate out of an AG, we limit the 108 * allocation maximum size to the size the AG. However, we cannot use all the 109 * blocks in the AG - some are permanently used by metadata. These 110 * blocks are generally: 111 * - the AG superblock, AGF, AGI and AGFL 112 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally 113 * the AGI free inode and rmap btree root blocks. 114 * - blocks on the AGFL according to xfs_alloc_set_aside() limits 115 * - the rmapbt root block 116 * 117 * The AG headers are sector sized, so the amount of space they take up is 118 * dependent on filesystem geometry. The others are all single blocks. 119 */ 120 unsigned int 121 xfs_alloc_ag_max_usable( 122 struct xfs_mount *mp) 123 { 124 unsigned int blocks; 125 126 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */ 127 blocks += XFS_ALLOC_AGFL_RESERVE; 128 blocks += 3; /* AGF, AGI btree root blocks */ 129 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 130 blocks++; /* finobt root block */ 131 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 132 blocks++; /* rmap root block */ 133 if (xfs_sb_version_hasreflink(&mp->m_sb)) 134 blocks++; /* refcount root block */ 135 136 return mp->m_sb.sb_agblocks - blocks; 137 } 138 139 /* 140 * Lookup the record equal to [bno, len] in the btree given by cur. 141 */ 142 STATIC int /* error */ 143 xfs_alloc_lookup_eq( 144 struct xfs_btree_cur *cur, /* btree cursor */ 145 xfs_agblock_t bno, /* starting block of extent */ 146 xfs_extlen_t len, /* length of extent */ 147 int *stat) /* success/failure */ 148 { 149 int error; 150 151 cur->bc_rec.a.ar_startblock = bno; 152 cur->bc_rec.a.ar_blockcount = len; 153 error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 154 cur->bc_private.a.priv.abt.active = (*stat == 1); 155 return error; 156 } 157 158 /* 159 * Lookup the first record greater than or equal to [bno, len] 160 * in the btree given by cur. 161 */ 162 int /* error */ 163 xfs_alloc_lookup_ge( 164 struct xfs_btree_cur *cur, /* btree cursor */ 165 xfs_agblock_t bno, /* starting block of extent */ 166 xfs_extlen_t len, /* length of extent */ 167 int *stat) /* success/failure */ 168 { 169 int error; 170 171 cur->bc_rec.a.ar_startblock = bno; 172 cur->bc_rec.a.ar_blockcount = len; 173 error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 174 cur->bc_private.a.priv.abt.active = (*stat == 1); 175 return error; 176 } 177 178 /* 179 * Lookup the first record less than or equal to [bno, len] 180 * in the btree given by cur. 181 */ 182 int /* error */ 183 xfs_alloc_lookup_le( 184 struct xfs_btree_cur *cur, /* btree cursor */ 185 xfs_agblock_t bno, /* starting block of extent */ 186 xfs_extlen_t len, /* length of extent */ 187 int *stat) /* success/failure */ 188 { 189 int error; 190 cur->bc_rec.a.ar_startblock = bno; 191 cur->bc_rec.a.ar_blockcount = len; 192 error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 193 cur->bc_private.a.priv.abt.active = (*stat == 1); 194 return error; 195 } 196 197 static inline bool 198 xfs_alloc_cur_active( 199 struct xfs_btree_cur *cur) 200 { 201 return cur && cur->bc_private.a.priv.abt.active; 202 } 203 204 /* 205 * Update the record referred to by cur to the value given 206 * by [bno, len]. 207 * This either works (return 0) or gets an EFSCORRUPTED error. 208 */ 209 STATIC int /* error */ 210 xfs_alloc_update( 211 struct xfs_btree_cur *cur, /* btree cursor */ 212 xfs_agblock_t bno, /* starting block of extent */ 213 xfs_extlen_t len) /* length of extent */ 214 { 215 union xfs_btree_rec rec; 216 217 rec.alloc.ar_startblock = cpu_to_be32(bno); 218 rec.alloc.ar_blockcount = cpu_to_be32(len); 219 return xfs_btree_update(cur, &rec); 220 } 221 222 /* 223 * Get the data from the pointed-to record. 224 */ 225 int /* error */ 226 xfs_alloc_get_rec( 227 struct xfs_btree_cur *cur, /* btree cursor */ 228 xfs_agblock_t *bno, /* output: starting block of extent */ 229 xfs_extlen_t *len, /* output: length of extent */ 230 int *stat) /* output: success/failure */ 231 { 232 struct xfs_mount *mp = cur->bc_mp; 233 xfs_agnumber_t agno = cur->bc_private.a.agno; 234 union xfs_btree_rec *rec; 235 int error; 236 237 error = xfs_btree_get_rec(cur, &rec, stat); 238 if (error || !(*stat)) 239 return error; 240 241 *bno = be32_to_cpu(rec->alloc.ar_startblock); 242 *len = be32_to_cpu(rec->alloc.ar_blockcount); 243 244 if (*len == 0) 245 goto out_bad_rec; 246 247 /* check for valid extent range, including overflow */ 248 if (!xfs_verify_agbno(mp, agno, *bno)) 249 goto out_bad_rec; 250 if (*bno > *bno + *len) 251 goto out_bad_rec; 252 if (!xfs_verify_agbno(mp, agno, *bno + *len - 1)) 253 goto out_bad_rec; 254 255 return 0; 256 257 out_bad_rec: 258 xfs_warn(mp, 259 "%s Freespace BTree record corruption in AG %d detected!", 260 cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno); 261 xfs_warn(mp, 262 "start block 0x%x block count 0x%x", *bno, *len); 263 return -EFSCORRUPTED; 264 } 265 266 /* 267 * Compute aligned version of the found extent. 268 * Takes alignment and min length into account. 269 */ 270 STATIC bool 271 xfs_alloc_compute_aligned( 272 xfs_alloc_arg_t *args, /* allocation argument structure */ 273 xfs_agblock_t foundbno, /* starting block in found extent */ 274 xfs_extlen_t foundlen, /* length in found extent */ 275 xfs_agblock_t *resbno, /* result block number */ 276 xfs_extlen_t *reslen, /* result length */ 277 unsigned *busy_gen) 278 { 279 xfs_agblock_t bno = foundbno; 280 xfs_extlen_t len = foundlen; 281 xfs_extlen_t diff; 282 bool busy; 283 284 /* Trim busy sections out of found extent */ 285 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); 286 287 /* 288 * If we have a largish extent that happens to start before min_agbno, 289 * see if we can shift it into range... 290 */ 291 if (bno < args->min_agbno && bno + len > args->min_agbno) { 292 diff = args->min_agbno - bno; 293 if (len > diff) { 294 bno += diff; 295 len -= diff; 296 } 297 } 298 299 if (args->alignment > 1 && len >= args->minlen) { 300 xfs_agblock_t aligned_bno = roundup(bno, args->alignment); 301 302 diff = aligned_bno - bno; 303 304 *resbno = aligned_bno; 305 *reslen = diff >= len ? 0 : len - diff; 306 } else { 307 *resbno = bno; 308 *reslen = len; 309 } 310 311 return busy; 312 } 313 314 /* 315 * Compute best start block and diff for "near" allocations. 316 * freelen >= wantlen already checked by caller. 317 */ 318 STATIC xfs_extlen_t /* difference value (absolute) */ 319 xfs_alloc_compute_diff( 320 xfs_agblock_t wantbno, /* target starting block */ 321 xfs_extlen_t wantlen, /* target length */ 322 xfs_extlen_t alignment, /* target alignment */ 323 int datatype, /* are we allocating data? */ 324 xfs_agblock_t freebno, /* freespace's starting block */ 325 xfs_extlen_t freelen, /* freespace's length */ 326 xfs_agblock_t *newbnop) /* result: best start block from free */ 327 { 328 xfs_agblock_t freeend; /* end of freespace extent */ 329 xfs_agblock_t newbno1; /* return block number */ 330 xfs_agblock_t newbno2; /* other new block number */ 331 xfs_extlen_t newlen1=0; /* length with newbno1 */ 332 xfs_extlen_t newlen2=0; /* length with newbno2 */ 333 xfs_agblock_t wantend; /* end of target extent */ 334 bool userdata = datatype & XFS_ALLOC_USERDATA; 335 336 ASSERT(freelen >= wantlen); 337 freeend = freebno + freelen; 338 wantend = wantbno + wantlen; 339 /* 340 * We want to allocate from the start of a free extent if it is past 341 * the desired block or if we are allocating user data and the free 342 * extent is before desired block. The second case is there to allow 343 * for contiguous allocation from the remaining free space if the file 344 * grows in the short term. 345 */ 346 if (freebno >= wantbno || (userdata && freeend < wantend)) { 347 if ((newbno1 = roundup(freebno, alignment)) >= freeend) 348 newbno1 = NULLAGBLOCK; 349 } else if (freeend >= wantend && alignment > 1) { 350 newbno1 = roundup(wantbno, alignment); 351 newbno2 = newbno1 - alignment; 352 if (newbno1 >= freeend) 353 newbno1 = NULLAGBLOCK; 354 else 355 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); 356 if (newbno2 < freebno) 357 newbno2 = NULLAGBLOCK; 358 else 359 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); 360 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { 361 if (newlen1 < newlen2 || 362 (newlen1 == newlen2 && 363 XFS_ABSDIFF(newbno1, wantbno) > 364 XFS_ABSDIFF(newbno2, wantbno))) 365 newbno1 = newbno2; 366 } else if (newbno2 != NULLAGBLOCK) 367 newbno1 = newbno2; 368 } else if (freeend >= wantend) { 369 newbno1 = wantbno; 370 } else if (alignment > 1) { 371 newbno1 = roundup(freeend - wantlen, alignment); 372 if (newbno1 > freeend - wantlen && 373 newbno1 - alignment >= freebno) 374 newbno1 -= alignment; 375 else if (newbno1 >= freeend) 376 newbno1 = NULLAGBLOCK; 377 } else 378 newbno1 = freeend - wantlen; 379 *newbnop = newbno1; 380 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); 381 } 382 383 /* 384 * Fix up the length, based on mod and prod. 385 * len should be k * prod + mod for some k. 386 * If len is too small it is returned unchanged. 387 * If len hits maxlen it is left alone. 388 */ 389 STATIC void 390 xfs_alloc_fix_len( 391 xfs_alloc_arg_t *args) /* allocation argument structure */ 392 { 393 xfs_extlen_t k; 394 xfs_extlen_t rlen; 395 396 ASSERT(args->mod < args->prod); 397 rlen = args->len; 398 ASSERT(rlen >= args->minlen); 399 ASSERT(rlen <= args->maxlen); 400 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || 401 (args->mod == 0 && rlen < args->prod)) 402 return; 403 k = rlen % args->prod; 404 if (k == args->mod) 405 return; 406 if (k > args->mod) 407 rlen = rlen - (k - args->mod); 408 else 409 rlen = rlen - args->prod + (args->mod - k); 410 /* casts to (int) catch length underflows */ 411 if ((int)rlen < (int)args->minlen) 412 return; 413 ASSERT(rlen >= args->minlen && rlen <= args->maxlen); 414 ASSERT(rlen % args->prod == args->mod); 415 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= 416 rlen + args->minleft); 417 args->len = rlen; 418 } 419 420 /* 421 * Update the two btrees, logically removing from freespace the extent 422 * starting at rbno, rlen blocks. The extent is contained within the 423 * actual (current) free extent fbno for flen blocks. 424 * Flags are passed in indicating whether the cursors are set to the 425 * relevant records. 426 */ 427 STATIC int /* error code */ 428 xfs_alloc_fixup_trees( 429 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ 430 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ 431 xfs_agblock_t fbno, /* starting block of free extent */ 432 xfs_extlen_t flen, /* length of free extent */ 433 xfs_agblock_t rbno, /* starting block of returned extent */ 434 xfs_extlen_t rlen, /* length of returned extent */ 435 int flags) /* flags, XFSA_FIXUP_... */ 436 { 437 int error; /* error code */ 438 int i; /* operation results */ 439 xfs_agblock_t nfbno1; /* first new free startblock */ 440 xfs_agblock_t nfbno2; /* second new free startblock */ 441 xfs_extlen_t nflen1=0; /* first new free length */ 442 xfs_extlen_t nflen2=0; /* second new free length */ 443 struct xfs_mount *mp; 444 445 mp = cnt_cur->bc_mp; 446 447 /* 448 * Look up the record in the by-size tree if necessary. 449 */ 450 if (flags & XFSA_FIXUP_CNT_OK) { 451 #ifdef DEBUG 452 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 453 return error; 454 if (XFS_IS_CORRUPT(mp, 455 i != 1 || 456 nfbno1 != fbno || 457 nflen1 != flen)) 458 return -EFSCORRUPTED; 459 #endif 460 } else { 461 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 462 return error; 463 if (XFS_IS_CORRUPT(mp, i != 1)) 464 return -EFSCORRUPTED; 465 } 466 /* 467 * Look up the record in the by-block tree if necessary. 468 */ 469 if (flags & XFSA_FIXUP_BNO_OK) { 470 #ifdef DEBUG 471 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 472 return error; 473 if (XFS_IS_CORRUPT(mp, 474 i != 1 || 475 nfbno1 != fbno || 476 nflen1 != flen)) 477 return -EFSCORRUPTED; 478 #endif 479 } else { 480 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 481 return error; 482 if (XFS_IS_CORRUPT(mp, i != 1)) 483 return -EFSCORRUPTED; 484 } 485 486 #ifdef DEBUG 487 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { 488 struct xfs_btree_block *bnoblock; 489 struct xfs_btree_block *cntblock; 490 491 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 492 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 493 494 if (XFS_IS_CORRUPT(mp, 495 bnoblock->bb_numrecs != 496 cntblock->bb_numrecs)) 497 return -EFSCORRUPTED; 498 } 499 #endif 500 501 /* 502 * Deal with all four cases: the allocated record is contained 503 * within the freespace record, so we can have new freespace 504 * at either (or both) end, or no freespace remaining. 505 */ 506 if (rbno == fbno && rlen == flen) 507 nfbno1 = nfbno2 = NULLAGBLOCK; 508 else if (rbno == fbno) { 509 nfbno1 = rbno + rlen; 510 nflen1 = flen - rlen; 511 nfbno2 = NULLAGBLOCK; 512 } else if (rbno + rlen == fbno + flen) { 513 nfbno1 = fbno; 514 nflen1 = flen - rlen; 515 nfbno2 = NULLAGBLOCK; 516 } else { 517 nfbno1 = fbno; 518 nflen1 = rbno - fbno; 519 nfbno2 = rbno + rlen; 520 nflen2 = (fbno + flen) - nfbno2; 521 } 522 /* 523 * Delete the entry from the by-size btree. 524 */ 525 if ((error = xfs_btree_delete(cnt_cur, &i))) 526 return error; 527 if (XFS_IS_CORRUPT(mp, i != 1)) 528 return -EFSCORRUPTED; 529 /* 530 * Add new by-size btree entry(s). 531 */ 532 if (nfbno1 != NULLAGBLOCK) { 533 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 534 return error; 535 if (XFS_IS_CORRUPT(mp, i != 0)) 536 return -EFSCORRUPTED; 537 if ((error = xfs_btree_insert(cnt_cur, &i))) 538 return error; 539 if (XFS_IS_CORRUPT(mp, i != 1)) 540 return -EFSCORRUPTED; 541 } 542 if (nfbno2 != NULLAGBLOCK) { 543 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 544 return error; 545 if (XFS_IS_CORRUPT(mp, i != 0)) 546 return -EFSCORRUPTED; 547 if ((error = xfs_btree_insert(cnt_cur, &i))) 548 return error; 549 if (XFS_IS_CORRUPT(mp, i != 1)) 550 return -EFSCORRUPTED; 551 } 552 /* 553 * Fix up the by-block btree entry(s). 554 */ 555 if (nfbno1 == NULLAGBLOCK) { 556 /* 557 * No remaining freespace, just delete the by-block tree entry. 558 */ 559 if ((error = xfs_btree_delete(bno_cur, &i))) 560 return error; 561 if (XFS_IS_CORRUPT(mp, i != 1)) 562 return -EFSCORRUPTED; 563 } else { 564 /* 565 * Update the by-block entry to start later|be shorter. 566 */ 567 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) 568 return error; 569 } 570 if (nfbno2 != NULLAGBLOCK) { 571 /* 572 * 2 resulting free entries, need to add one. 573 */ 574 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 575 return error; 576 if (XFS_IS_CORRUPT(mp, i != 0)) 577 return -EFSCORRUPTED; 578 if ((error = xfs_btree_insert(bno_cur, &i))) 579 return error; 580 if (XFS_IS_CORRUPT(mp, i != 1)) 581 return -EFSCORRUPTED; 582 } 583 return 0; 584 } 585 586 static xfs_failaddr_t 587 xfs_agfl_verify( 588 struct xfs_buf *bp) 589 { 590 struct xfs_mount *mp = bp->b_mount; 591 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 592 int i; 593 594 /* 595 * There is no verification of non-crc AGFLs because mkfs does not 596 * initialise the AGFL to zero or NULL. Hence the only valid part of the 597 * AGFL is what the AGF says is active. We can't get to the AGF, so we 598 * can't verify just those entries are valid. 599 */ 600 if (!xfs_sb_version_hascrc(&mp->m_sb)) 601 return NULL; 602 603 if (!xfs_verify_magic(bp, agfl->agfl_magicnum)) 604 return __this_address; 605 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) 606 return __this_address; 607 /* 608 * during growfs operations, the perag is not fully initialised, 609 * so we can't use it for any useful checking. growfs ensures we can't 610 * use it by using uncached buffers that don't have the perag attached 611 * so we can detect and avoid this problem. 612 */ 613 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 614 return __this_address; 615 616 for (i = 0; i < xfs_agfl_size(mp); i++) { 617 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 618 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 619 return __this_address; 620 } 621 622 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) 623 return __this_address; 624 return NULL; 625 } 626 627 static void 628 xfs_agfl_read_verify( 629 struct xfs_buf *bp) 630 { 631 struct xfs_mount *mp = bp->b_mount; 632 xfs_failaddr_t fa; 633 634 /* 635 * There is no verification of non-crc AGFLs because mkfs does not 636 * initialise the AGFL to zero or NULL. Hence the only valid part of the 637 * AGFL is what the AGF says is active. We can't get to the AGF, so we 638 * can't verify just those entries are valid. 639 */ 640 if (!xfs_sb_version_hascrc(&mp->m_sb)) 641 return; 642 643 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 644 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 645 else { 646 fa = xfs_agfl_verify(bp); 647 if (fa) 648 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 649 } 650 } 651 652 static void 653 xfs_agfl_write_verify( 654 struct xfs_buf *bp) 655 { 656 struct xfs_mount *mp = bp->b_mount; 657 struct xfs_buf_log_item *bip = bp->b_log_item; 658 xfs_failaddr_t fa; 659 660 /* no verification of non-crc AGFLs */ 661 if (!xfs_sb_version_hascrc(&mp->m_sb)) 662 return; 663 664 fa = xfs_agfl_verify(bp); 665 if (fa) { 666 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 667 return; 668 } 669 670 if (bip) 671 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 672 673 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); 674 } 675 676 const struct xfs_buf_ops xfs_agfl_buf_ops = { 677 .name = "xfs_agfl", 678 .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) }, 679 .verify_read = xfs_agfl_read_verify, 680 .verify_write = xfs_agfl_write_verify, 681 .verify_struct = xfs_agfl_verify, 682 }; 683 684 /* 685 * Read in the allocation group free block array. 686 */ 687 int /* error */ 688 xfs_alloc_read_agfl( 689 xfs_mount_t *mp, /* mount point structure */ 690 xfs_trans_t *tp, /* transaction pointer */ 691 xfs_agnumber_t agno, /* allocation group number */ 692 xfs_buf_t **bpp) /* buffer for the ag free block array */ 693 { 694 xfs_buf_t *bp; /* return value */ 695 int error; 696 697 ASSERT(agno != NULLAGNUMBER); 698 error = xfs_trans_read_buf( 699 mp, tp, mp->m_ddev_targp, 700 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), 701 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); 702 if (error) 703 return error; 704 xfs_buf_set_ref(bp, XFS_AGFL_REF); 705 *bpp = bp; 706 return 0; 707 } 708 709 STATIC int 710 xfs_alloc_update_counters( 711 struct xfs_trans *tp, 712 struct xfs_perag *pag, 713 struct xfs_buf *agbp, 714 long len) 715 { 716 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 717 718 pag->pagf_freeblks += len; 719 be32_add_cpu(&agf->agf_freeblks, len); 720 721 xfs_trans_agblocks_delta(tp, len); 722 if (unlikely(be32_to_cpu(agf->agf_freeblks) > 723 be32_to_cpu(agf->agf_length))) { 724 xfs_buf_corruption_error(agbp); 725 return -EFSCORRUPTED; 726 } 727 728 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 729 return 0; 730 } 731 732 /* 733 * Block allocation algorithm and data structures. 734 */ 735 struct xfs_alloc_cur { 736 struct xfs_btree_cur *cnt; /* btree cursors */ 737 struct xfs_btree_cur *bnolt; 738 struct xfs_btree_cur *bnogt; 739 xfs_extlen_t cur_len;/* current search length */ 740 xfs_agblock_t rec_bno;/* extent startblock */ 741 xfs_extlen_t rec_len;/* extent length */ 742 xfs_agblock_t bno; /* alloc bno */ 743 xfs_extlen_t len; /* alloc len */ 744 xfs_extlen_t diff; /* diff from search bno */ 745 unsigned int busy_gen;/* busy state */ 746 bool busy; 747 }; 748 749 /* 750 * Set up cursors, etc. in the extent allocation cursor. This function can be 751 * called multiple times to reset an initialized structure without having to 752 * reallocate cursors. 753 */ 754 static int 755 xfs_alloc_cur_setup( 756 struct xfs_alloc_arg *args, 757 struct xfs_alloc_cur *acur) 758 { 759 int error; 760 int i; 761 762 ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO); 763 764 acur->cur_len = args->maxlen; 765 acur->rec_bno = 0; 766 acur->rec_len = 0; 767 acur->bno = 0; 768 acur->len = 0; 769 acur->diff = -1; 770 acur->busy = false; 771 acur->busy_gen = 0; 772 773 /* 774 * Perform an initial cntbt lookup to check for availability of maxlen 775 * extents. If this fails, we'll return -ENOSPC to signal the caller to 776 * attempt a small allocation. 777 */ 778 if (!acur->cnt) 779 acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp, 780 args->agbp, args->agno, XFS_BTNUM_CNT); 781 error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i); 782 if (error) 783 return error; 784 785 /* 786 * Allocate the bnobt left and right search cursors. 787 */ 788 if (!acur->bnolt) 789 acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp, 790 args->agbp, args->agno, XFS_BTNUM_BNO); 791 if (!acur->bnogt) 792 acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp, 793 args->agbp, args->agno, XFS_BTNUM_BNO); 794 return i == 1 ? 0 : -ENOSPC; 795 } 796 797 static void 798 xfs_alloc_cur_close( 799 struct xfs_alloc_cur *acur, 800 bool error) 801 { 802 int cur_error = XFS_BTREE_NOERROR; 803 804 if (error) 805 cur_error = XFS_BTREE_ERROR; 806 807 if (acur->cnt) 808 xfs_btree_del_cursor(acur->cnt, cur_error); 809 if (acur->bnolt) 810 xfs_btree_del_cursor(acur->bnolt, cur_error); 811 if (acur->bnogt) 812 xfs_btree_del_cursor(acur->bnogt, cur_error); 813 acur->cnt = acur->bnolt = acur->bnogt = NULL; 814 } 815 816 /* 817 * Check an extent for allocation and track the best available candidate in the 818 * allocation structure. The cursor is deactivated if it has entered an out of 819 * range state based on allocation arguments. Optionally return the extent 820 * extent geometry and allocation status if requested by the caller. 821 */ 822 static int 823 xfs_alloc_cur_check( 824 struct xfs_alloc_arg *args, 825 struct xfs_alloc_cur *acur, 826 struct xfs_btree_cur *cur, 827 int *new) 828 { 829 int error, i; 830 xfs_agblock_t bno, bnoa, bnew; 831 xfs_extlen_t len, lena, diff = -1; 832 bool busy; 833 unsigned busy_gen = 0; 834 bool deactivate = false; 835 bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO; 836 837 *new = 0; 838 839 error = xfs_alloc_get_rec(cur, &bno, &len, &i); 840 if (error) 841 return error; 842 if (XFS_IS_CORRUPT(args->mp, i != 1)) 843 return -EFSCORRUPTED; 844 845 /* 846 * Check minlen and deactivate a cntbt cursor if out of acceptable size 847 * range (i.e., walking backwards looking for a minlen extent). 848 */ 849 if (len < args->minlen) { 850 deactivate = !isbnobt; 851 goto out; 852 } 853 854 busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena, 855 &busy_gen); 856 acur->busy |= busy; 857 if (busy) 858 acur->busy_gen = busy_gen; 859 /* deactivate a bnobt cursor outside of locality range */ 860 if (bnoa < args->min_agbno || bnoa > args->max_agbno) { 861 deactivate = isbnobt; 862 goto out; 863 } 864 if (lena < args->minlen) 865 goto out; 866 867 args->len = XFS_EXTLEN_MIN(lena, args->maxlen); 868 xfs_alloc_fix_len(args); 869 ASSERT(args->len >= args->minlen); 870 if (args->len < acur->len) 871 goto out; 872 873 /* 874 * We have an aligned record that satisfies minlen and beats or matches 875 * the candidate extent size. Compare locality for near allocation mode. 876 */ 877 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO); 878 diff = xfs_alloc_compute_diff(args->agbno, args->len, 879 args->alignment, args->datatype, 880 bnoa, lena, &bnew); 881 if (bnew == NULLAGBLOCK) 882 goto out; 883 884 /* 885 * Deactivate a bnobt cursor with worse locality than the current best. 886 */ 887 if (diff > acur->diff) { 888 deactivate = isbnobt; 889 goto out; 890 } 891 892 ASSERT(args->len > acur->len || 893 (args->len == acur->len && diff <= acur->diff)); 894 acur->rec_bno = bno; 895 acur->rec_len = len; 896 acur->bno = bnew; 897 acur->len = args->len; 898 acur->diff = diff; 899 *new = 1; 900 901 /* 902 * We're done if we found a perfect allocation. This only deactivates 903 * the current cursor, but this is just an optimization to terminate a 904 * cntbt search that otherwise runs to the edge of the tree. 905 */ 906 if (acur->diff == 0 && acur->len == args->maxlen) 907 deactivate = true; 908 out: 909 if (deactivate) 910 cur->bc_private.a.priv.abt.active = false; 911 trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff, 912 *new); 913 return 0; 914 } 915 916 /* 917 * Complete an allocation of a candidate extent. Remove the extent from both 918 * trees and update the args structure. 919 */ 920 STATIC int 921 xfs_alloc_cur_finish( 922 struct xfs_alloc_arg *args, 923 struct xfs_alloc_cur *acur) 924 { 925 int error; 926 927 ASSERT(acur->cnt && acur->bnolt); 928 ASSERT(acur->bno >= acur->rec_bno); 929 ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len); 930 ASSERT(acur->rec_bno + acur->rec_len <= 931 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 932 933 error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno, 934 acur->rec_len, acur->bno, acur->len, 0); 935 if (error) 936 return error; 937 938 args->agbno = acur->bno; 939 args->len = acur->len; 940 args->wasfromfl = 0; 941 942 trace_xfs_alloc_cur(args); 943 return 0; 944 } 945 946 /* 947 * Locality allocation lookup algorithm. This expects a cntbt cursor and uses 948 * bno optimized lookup to search for extents with ideal size and locality. 949 */ 950 STATIC int 951 xfs_alloc_cntbt_iter( 952 struct xfs_alloc_arg *args, 953 struct xfs_alloc_cur *acur) 954 { 955 struct xfs_btree_cur *cur = acur->cnt; 956 xfs_agblock_t bno; 957 xfs_extlen_t len, cur_len; 958 int error; 959 int i; 960 961 if (!xfs_alloc_cur_active(cur)) 962 return 0; 963 964 /* locality optimized lookup */ 965 cur_len = acur->cur_len; 966 error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i); 967 if (error) 968 return error; 969 if (i == 0) 970 return 0; 971 error = xfs_alloc_get_rec(cur, &bno, &len, &i); 972 if (error) 973 return error; 974 975 /* check the current record and update search length from it */ 976 error = xfs_alloc_cur_check(args, acur, cur, &i); 977 if (error) 978 return error; 979 ASSERT(len >= acur->cur_len); 980 acur->cur_len = len; 981 982 /* 983 * We looked up the first record >= [agbno, len] above. The agbno is a 984 * secondary key and so the current record may lie just before or after 985 * agbno. If it is past agbno, check the previous record too so long as 986 * the length matches as it may be closer. Don't check a smaller record 987 * because that could deactivate our cursor. 988 */ 989 if (bno > args->agbno) { 990 error = xfs_btree_decrement(cur, 0, &i); 991 if (!error && i) { 992 error = xfs_alloc_get_rec(cur, &bno, &len, &i); 993 if (!error && i && len == acur->cur_len) 994 error = xfs_alloc_cur_check(args, acur, cur, 995 &i); 996 } 997 if (error) 998 return error; 999 } 1000 1001 /* 1002 * Increment the search key until we find at least one allocation 1003 * candidate or if the extent we found was larger. Otherwise, double the 1004 * search key to optimize the search. Efficiency is more important here 1005 * than absolute best locality. 1006 */ 1007 cur_len <<= 1; 1008 if (!acur->len || acur->cur_len >= cur_len) 1009 acur->cur_len++; 1010 else 1011 acur->cur_len = cur_len; 1012 1013 return error; 1014 } 1015 1016 /* 1017 * Deal with the case where only small freespaces remain. Either return the 1018 * contents of the last freespace record, or allocate space from the freelist if 1019 * there is nothing in the tree. 1020 */ 1021 STATIC int /* error */ 1022 xfs_alloc_ag_vextent_small( 1023 struct xfs_alloc_arg *args, /* allocation argument structure */ 1024 struct xfs_btree_cur *ccur, /* optional by-size cursor */ 1025 xfs_agblock_t *fbnop, /* result block number */ 1026 xfs_extlen_t *flenp, /* result length */ 1027 int *stat) /* status: 0-freelist, 1-normal/none */ 1028 { 1029 int error = 0; 1030 xfs_agblock_t fbno = NULLAGBLOCK; 1031 xfs_extlen_t flen = 0; 1032 int i = 0; 1033 1034 /* 1035 * If a cntbt cursor is provided, try to allocate the largest record in 1036 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the 1037 * allocation. Make sure to respect minleft even when pulling from the 1038 * freelist. 1039 */ 1040 if (ccur) 1041 error = xfs_btree_decrement(ccur, 0, &i); 1042 if (error) 1043 goto error; 1044 if (i) { 1045 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i); 1046 if (error) 1047 goto error; 1048 if (XFS_IS_CORRUPT(args->mp, i != 1)) { 1049 error = -EFSCORRUPTED; 1050 goto error; 1051 } 1052 goto out; 1053 } 1054 1055 if (args->minlen != 1 || args->alignment != 1 || 1056 args->resv == XFS_AG_RESV_AGFL || 1057 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <= 1058 args->minleft)) 1059 goto out; 1060 1061 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1062 if (error) 1063 goto error; 1064 if (fbno == NULLAGBLOCK) 1065 goto out; 1066 1067 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 1068 (args->datatype & XFS_ALLOC_NOBUSY)); 1069 1070 if (args->datatype & XFS_ALLOC_USERDATA) { 1071 struct xfs_buf *bp; 1072 1073 bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno); 1074 if (XFS_IS_CORRUPT(args->mp, !bp)) { 1075 error = -EFSCORRUPTED; 1076 goto error; 1077 } 1078 xfs_trans_binval(args->tp, bp); 1079 } 1080 *fbnop = args->agbno = fbno; 1081 *flenp = args->len = 1; 1082 if (XFS_IS_CORRUPT(args->mp, 1083 fbno >= be32_to_cpu( 1084 XFS_BUF_TO_AGF(args->agbp)->agf_length))) { 1085 error = -EFSCORRUPTED; 1086 goto error; 1087 } 1088 args->wasfromfl = 1; 1089 trace_xfs_alloc_small_freelist(args); 1090 1091 /* 1092 * If we're feeding an AGFL block to something that doesn't live in the 1093 * free space, we need to clear out the OWN_AG rmap. 1094 */ 1095 error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1, 1096 &XFS_RMAP_OINFO_AG); 1097 if (error) 1098 goto error; 1099 1100 *stat = 0; 1101 return 0; 1102 1103 out: 1104 /* 1105 * Can't do the allocation, give up. 1106 */ 1107 if (flen < args->minlen) { 1108 args->agbno = NULLAGBLOCK; 1109 trace_xfs_alloc_small_notenough(args); 1110 flen = 0; 1111 } 1112 *fbnop = fbno; 1113 *flenp = flen; 1114 *stat = 1; 1115 trace_xfs_alloc_small_done(args); 1116 return 0; 1117 1118 error: 1119 trace_xfs_alloc_small_error(args); 1120 return error; 1121 } 1122 1123 /* 1124 * Allocate a variable extent in the allocation group agno. 1125 * Type and bno are used to determine where in the allocation group the 1126 * extent will start. 1127 * Extent's length (returned in *len) will be between minlen and maxlen, 1128 * and of the form k * prod + mod unless there's nothing that large. 1129 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1130 */ 1131 STATIC int /* error */ 1132 xfs_alloc_ag_vextent( 1133 xfs_alloc_arg_t *args) /* argument structure for allocation */ 1134 { 1135 int error=0; 1136 1137 ASSERT(args->minlen > 0); 1138 ASSERT(args->maxlen > 0); 1139 ASSERT(args->minlen <= args->maxlen); 1140 ASSERT(args->mod < args->prod); 1141 ASSERT(args->alignment > 0); 1142 1143 /* 1144 * Branch to correct routine based on the type. 1145 */ 1146 args->wasfromfl = 0; 1147 switch (args->type) { 1148 case XFS_ALLOCTYPE_THIS_AG: 1149 error = xfs_alloc_ag_vextent_size(args); 1150 break; 1151 case XFS_ALLOCTYPE_NEAR_BNO: 1152 error = xfs_alloc_ag_vextent_near(args); 1153 break; 1154 case XFS_ALLOCTYPE_THIS_BNO: 1155 error = xfs_alloc_ag_vextent_exact(args); 1156 break; 1157 default: 1158 ASSERT(0); 1159 /* NOTREACHED */ 1160 } 1161 1162 if (error || args->agbno == NULLAGBLOCK) 1163 return error; 1164 1165 ASSERT(args->len >= args->minlen); 1166 ASSERT(args->len <= args->maxlen); 1167 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL); 1168 ASSERT(args->agbno % args->alignment == 0); 1169 1170 /* if not file data, insert new block into the reverse map btree */ 1171 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { 1172 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 1173 args->agbno, args->len, &args->oinfo); 1174 if (error) 1175 return error; 1176 } 1177 1178 if (!args->wasfromfl) { 1179 error = xfs_alloc_update_counters(args->tp, args->pag, 1180 args->agbp, 1181 -((long)(args->len))); 1182 if (error) 1183 return error; 1184 1185 ASSERT(!xfs_extent_busy_search(args->mp, args->agno, 1186 args->agbno, args->len)); 1187 } 1188 1189 xfs_ag_resv_alloc_extent(args->pag, args->resv, args); 1190 1191 XFS_STATS_INC(args->mp, xs_allocx); 1192 XFS_STATS_ADD(args->mp, xs_allocb, args->len); 1193 return error; 1194 } 1195 1196 /* 1197 * Allocate a variable extent at exactly agno/bno. 1198 * Extent's length (returned in *len) will be between minlen and maxlen, 1199 * and of the form k * prod + mod unless there's nothing that large. 1200 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. 1201 */ 1202 STATIC int /* error */ 1203 xfs_alloc_ag_vextent_exact( 1204 xfs_alloc_arg_t *args) /* allocation argument structure */ 1205 { 1206 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 1207 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 1208 int error; 1209 xfs_agblock_t fbno; /* start block of found extent */ 1210 xfs_extlen_t flen; /* length of found extent */ 1211 xfs_agblock_t tbno; /* start block of busy extent */ 1212 xfs_extlen_t tlen; /* length of busy extent */ 1213 xfs_agblock_t tend; /* end block of busy extent */ 1214 int i; /* success/failure of operation */ 1215 unsigned busy_gen; 1216 1217 ASSERT(args->alignment == 1); 1218 1219 /* 1220 * Allocate/initialize a cursor for the by-number freespace btree. 1221 */ 1222 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1223 args->agno, XFS_BTNUM_BNO); 1224 1225 /* 1226 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 1227 * Look for the closest free block <= bno, it must contain bno 1228 * if any free block does. 1229 */ 1230 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); 1231 if (error) 1232 goto error0; 1233 if (!i) 1234 goto not_found; 1235 1236 /* 1237 * Grab the freespace record. 1238 */ 1239 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); 1240 if (error) 1241 goto error0; 1242 if (XFS_IS_CORRUPT(args->mp, i != 1)) { 1243 error = -EFSCORRUPTED; 1244 goto error0; 1245 } 1246 ASSERT(fbno <= args->agbno); 1247 1248 /* 1249 * Check for overlapping busy extents. 1250 */ 1251 tbno = fbno; 1252 tlen = flen; 1253 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); 1254 1255 /* 1256 * Give up if the start of the extent is busy, or the freespace isn't 1257 * long enough for the minimum request. 1258 */ 1259 if (tbno > args->agbno) 1260 goto not_found; 1261 if (tlen < args->minlen) 1262 goto not_found; 1263 tend = tbno + tlen; 1264 if (tend < args->agbno + args->minlen) 1265 goto not_found; 1266 1267 /* 1268 * End of extent will be smaller of the freespace end and the 1269 * maximal requested end. 1270 * 1271 * Fix the length according to mod and prod if given. 1272 */ 1273 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) 1274 - args->agbno; 1275 xfs_alloc_fix_len(args); 1276 ASSERT(args->agbno + args->len <= tend); 1277 1278 /* 1279 * We are allocating agbno for args->len 1280 * Allocate/initialize a cursor for the by-size btree. 1281 */ 1282 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1283 args->agno, XFS_BTNUM_CNT); 1284 ASSERT(args->agbno + args->len <= 1285 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1286 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, 1287 args->len, XFSA_FIXUP_BNO_OK); 1288 if (error) { 1289 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1290 goto error0; 1291 } 1292 1293 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1294 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1295 1296 args->wasfromfl = 0; 1297 trace_xfs_alloc_exact_done(args); 1298 return 0; 1299 1300 not_found: 1301 /* Didn't find it, return null. */ 1302 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1303 args->agbno = NULLAGBLOCK; 1304 trace_xfs_alloc_exact_notfound(args); 1305 return 0; 1306 1307 error0: 1308 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1309 trace_xfs_alloc_exact_error(args); 1310 return error; 1311 } 1312 1313 /* 1314 * Search a given number of btree records in a given direction. Check each 1315 * record against the good extent we've already found. 1316 */ 1317 STATIC int 1318 xfs_alloc_walk_iter( 1319 struct xfs_alloc_arg *args, 1320 struct xfs_alloc_cur *acur, 1321 struct xfs_btree_cur *cur, 1322 bool increment, 1323 bool find_one, /* quit on first candidate */ 1324 int count, /* rec count (-1 for infinite) */ 1325 int *stat) 1326 { 1327 int error; 1328 int i; 1329 1330 *stat = 0; 1331 1332 /* 1333 * Search so long as the cursor is active or we find a better extent. 1334 * The cursor is deactivated if it extends beyond the range of the 1335 * current allocation candidate. 1336 */ 1337 while (xfs_alloc_cur_active(cur) && count) { 1338 error = xfs_alloc_cur_check(args, acur, cur, &i); 1339 if (error) 1340 return error; 1341 if (i == 1) { 1342 *stat = 1; 1343 if (find_one) 1344 break; 1345 } 1346 if (!xfs_alloc_cur_active(cur)) 1347 break; 1348 1349 if (increment) 1350 error = xfs_btree_increment(cur, 0, &i); 1351 else 1352 error = xfs_btree_decrement(cur, 0, &i); 1353 if (error) 1354 return error; 1355 if (i == 0) 1356 cur->bc_private.a.priv.abt.active = false; 1357 1358 if (count > 0) 1359 count--; 1360 } 1361 1362 return 0; 1363 } 1364 1365 /* 1366 * Search the by-bno and by-size btrees in parallel in search of an extent with 1367 * ideal locality based on the NEAR mode ->agbno locality hint. 1368 */ 1369 STATIC int 1370 xfs_alloc_ag_vextent_locality( 1371 struct xfs_alloc_arg *args, 1372 struct xfs_alloc_cur *acur, 1373 int *stat) 1374 { 1375 struct xfs_btree_cur *fbcur = NULL; 1376 int error; 1377 int i; 1378 bool fbinc; 1379 1380 ASSERT(acur->len == 0); 1381 ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO); 1382 1383 *stat = 0; 1384 1385 error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i); 1386 if (error) 1387 return error; 1388 error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i); 1389 if (error) 1390 return error; 1391 error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i); 1392 if (error) 1393 return error; 1394 1395 /* 1396 * Search the bnobt and cntbt in parallel. Search the bnobt left and 1397 * right and lookup the closest extent to the locality hint for each 1398 * extent size key in the cntbt. The entire search terminates 1399 * immediately on a bnobt hit because that means we've found best case 1400 * locality. Otherwise the search continues until the cntbt cursor runs 1401 * off the end of the tree. If no allocation candidate is found at this 1402 * point, give up on locality, walk backwards from the end of the cntbt 1403 * and take the first available extent. 1404 * 1405 * The parallel tree searches balance each other out to provide fairly 1406 * consistent performance for various situations. The bnobt search can 1407 * have pathological behavior in the worst case scenario of larger 1408 * allocation requests and fragmented free space. On the other hand, the 1409 * bnobt is able to satisfy most smaller allocation requests much more 1410 * quickly than the cntbt. The cntbt search can sift through fragmented 1411 * free space and sets of free extents for larger allocation requests 1412 * more quickly than the bnobt. Since the locality hint is just a hint 1413 * and we don't want to scan the entire bnobt for perfect locality, the 1414 * cntbt search essentially bounds the bnobt search such that we can 1415 * find good enough locality at reasonable performance in most cases. 1416 */ 1417 while (xfs_alloc_cur_active(acur->bnolt) || 1418 xfs_alloc_cur_active(acur->bnogt) || 1419 xfs_alloc_cur_active(acur->cnt)) { 1420 1421 trace_xfs_alloc_cur_lookup(args); 1422 1423 /* 1424 * Search the bnobt left and right. In the case of a hit, finish 1425 * the search in the opposite direction and we're done. 1426 */ 1427 error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false, 1428 true, 1, &i); 1429 if (error) 1430 return error; 1431 if (i == 1) { 1432 trace_xfs_alloc_cur_left(args); 1433 fbcur = acur->bnogt; 1434 fbinc = true; 1435 break; 1436 } 1437 error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true, 1438 1, &i); 1439 if (error) 1440 return error; 1441 if (i == 1) { 1442 trace_xfs_alloc_cur_right(args); 1443 fbcur = acur->bnolt; 1444 fbinc = false; 1445 break; 1446 } 1447 1448 /* 1449 * Check the extent with best locality based on the current 1450 * extent size search key and keep track of the best candidate. 1451 */ 1452 error = xfs_alloc_cntbt_iter(args, acur); 1453 if (error) 1454 return error; 1455 if (!xfs_alloc_cur_active(acur->cnt)) { 1456 trace_xfs_alloc_cur_lookup_done(args); 1457 break; 1458 } 1459 } 1460 1461 /* 1462 * If we failed to find anything due to busy extents, return empty 1463 * handed so the caller can flush and retry. If no busy extents were 1464 * found, walk backwards from the end of the cntbt as a last resort. 1465 */ 1466 if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) { 1467 error = xfs_btree_decrement(acur->cnt, 0, &i); 1468 if (error) 1469 return error; 1470 if (i) { 1471 acur->cnt->bc_private.a.priv.abt.active = true; 1472 fbcur = acur->cnt; 1473 fbinc = false; 1474 } 1475 } 1476 1477 /* 1478 * Search in the opposite direction for a better entry in the case of 1479 * a bnobt hit or walk backwards from the end of the cntbt. 1480 */ 1481 if (fbcur) { 1482 error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1, 1483 &i); 1484 if (error) 1485 return error; 1486 } 1487 1488 if (acur->len) 1489 *stat = 1; 1490 1491 return 0; 1492 } 1493 1494 /* Check the last block of the cnt btree for allocations. */ 1495 static int 1496 xfs_alloc_ag_vextent_lastblock( 1497 struct xfs_alloc_arg *args, 1498 struct xfs_alloc_cur *acur, 1499 xfs_agblock_t *bno, 1500 xfs_extlen_t *len, 1501 bool *allocated) 1502 { 1503 int error; 1504 int i; 1505 1506 #ifdef DEBUG 1507 /* Randomly don't execute the first algorithm. */ 1508 if (prandom_u32() & 1) 1509 return 0; 1510 #endif 1511 1512 /* 1513 * Start from the entry that lookup found, sequence through all larger 1514 * free blocks. If we're actually pointing at a record smaller than 1515 * maxlen, go to the start of this block, and skip all those smaller 1516 * than minlen. 1517 */ 1518 if (len || args->alignment > 1) { 1519 acur->cnt->bc_ptrs[0] = 1; 1520 do { 1521 error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); 1522 if (error) 1523 return error; 1524 if (XFS_IS_CORRUPT(args->mp, i != 1)) 1525 return -EFSCORRUPTED; 1526 if (*len >= args->minlen) 1527 break; 1528 error = xfs_btree_increment(acur->cnt, 0, &i); 1529 if (error) 1530 return error; 1531 } while (i); 1532 ASSERT(*len >= args->minlen); 1533 if (!i) 1534 return 0; 1535 } 1536 1537 error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i); 1538 if (error) 1539 return error; 1540 1541 /* 1542 * It didn't work. We COULD be in a case where there's a good record 1543 * somewhere, so try again. 1544 */ 1545 if (acur->len == 0) 1546 return 0; 1547 1548 trace_xfs_alloc_near_first(args); 1549 *allocated = true; 1550 return 0; 1551 } 1552 1553 /* 1554 * Allocate a variable extent near bno in the allocation group agno. 1555 * Extent's length (returned in len) will be between minlen and maxlen, 1556 * and of the form k * prod + mod unless there's nothing that large. 1557 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1558 */ 1559 STATIC int 1560 xfs_alloc_ag_vextent_near( 1561 struct xfs_alloc_arg *args) 1562 { 1563 struct xfs_alloc_cur acur = {}; 1564 int error; /* error code */ 1565 int i; /* result code, temporary */ 1566 xfs_agblock_t bno; 1567 xfs_extlen_t len; 1568 1569 /* handle uninitialized agbno range so caller doesn't have to */ 1570 if (!args->min_agbno && !args->max_agbno) 1571 args->max_agbno = args->mp->m_sb.sb_agblocks - 1; 1572 ASSERT(args->min_agbno <= args->max_agbno); 1573 1574 /* clamp agbno to the range if it's outside */ 1575 if (args->agbno < args->min_agbno) 1576 args->agbno = args->min_agbno; 1577 if (args->agbno > args->max_agbno) 1578 args->agbno = args->max_agbno; 1579 1580 restart: 1581 len = 0; 1582 1583 /* 1584 * Set up cursors and see if there are any free extents as big as 1585 * maxlen. If not, pick the last entry in the tree unless the tree is 1586 * empty. 1587 */ 1588 error = xfs_alloc_cur_setup(args, &acur); 1589 if (error == -ENOSPC) { 1590 error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno, 1591 &len, &i); 1592 if (error) 1593 goto out; 1594 if (i == 0 || len == 0) { 1595 trace_xfs_alloc_near_noentry(args); 1596 goto out; 1597 } 1598 ASSERT(i == 1); 1599 } else if (error) { 1600 goto out; 1601 } 1602 1603 /* 1604 * First algorithm. 1605 * If the requested extent is large wrt the freespaces available 1606 * in this a.g., then the cursor will be pointing to a btree entry 1607 * near the right edge of the tree. If it's in the last btree leaf 1608 * block, then we just examine all the entries in that block 1609 * that are big enough, and pick the best one. 1610 */ 1611 if (xfs_btree_islastblock(acur.cnt, 0)) { 1612 bool allocated = false; 1613 1614 error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len, 1615 &allocated); 1616 if (error) 1617 goto out; 1618 if (allocated) 1619 goto alloc_finish; 1620 } 1621 1622 /* 1623 * Second algorithm. Combined cntbt and bnobt search to find ideal 1624 * locality. 1625 */ 1626 error = xfs_alloc_ag_vextent_locality(args, &acur, &i); 1627 if (error) 1628 goto out; 1629 1630 /* 1631 * If we couldn't get anything, give up. 1632 */ 1633 if (!acur.len) { 1634 if (acur.busy) { 1635 trace_xfs_alloc_near_busy(args); 1636 xfs_extent_busy_flush(args->mp, args->pag, 1637 acur.busy_gen); 1638 goto restart; 1639 } 1640 trace_xfs_alloc_size_neither(args); 1641 args->agbno = NULLAGBLOCK; 1642 goto out; 1643 } 1644 1645 alloc_finish: 1646 /* fix up btrees on a successful allocation */ 1647 error = xfs_alloc_cur_finish(args, &acur); 1648 1649 out: 1650 xfs_alloc_cur_close(&acur, error); 1651 return error; 1652 } 1653 1654 /* 1655 * Allocate a variable extent anywhere in the allocation group agno. 1656 * Extent's length (returned in len) will be between minlen and maxlen, 1657 * and of the form k * prod + mod unless there's nothing that large. 1658 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1659 */ 1660 STATIC int /* error */ 1661 xfs_alloc_ag_vextent_size( 1662 xfs_alloc_arg_t *args) /* allocation argument structure */ 1663 { 1664 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ 1665 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ 1666 int error; /* error result */ 1667 xfs_agblock_t fbno; /* start of found freespace */ 1668 xfs_extlen_t flen; /* length of found freespace */ 1669 int i; /* temp status variable */ 1670 xfs_agblock_t rbno; /* returned block number */ 1671 xfs_extlen_t rlen; /* length of returned extent */ 1672 bool busy; 1673 unsigned busy_gen; 1674 1675 restart: 1676 /* 1677 * Allocate and initialize a cursor for the by-size btree. 1678 */ 1679 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1680 args->agno, XFS_BTNUM_CNT); 1681 bno_cur = NULL; 1682 busy = false; 1683 1684 /* 1685 * Look for an entry >= maxlen+alignment-1 blocks. 1686 */ 1687 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1688 args->maxlen + args->alignment - 1, &i))) 1689 goto error0; 1690 1691 /* 1692 * If none then we have to settle for a smaller extent. In the case that 1693 * there are no large extents, this will return the last entry in the 1694 * tree unless the tree is empty. In the case that there are only busy 1695 * large extents, this will return the largest small extent unless there 1696 * are no smaller extents available. 1697 */ 1698 if (!i) { 1699 error = xfs_alloc_ag_vextent_small(args, cnt_cur, 1700 &fbno, &flen, &i); 1701 if (error) 1702 goto error0; 1703 if (i == 0 || flen == 0) { 1704 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1705 trace_xfs_alloc_size_noentry(args); 1706 return 0; 1707 } 1708 ASSERT(i == 1); 1709 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, 1710 &rlen, &busy_gen); 1711 } else { 1712 /* 1713 * Search for a non-busy extent that is large enough. 1714 */ 1715 for (;;) { 1716 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1717 if (error) 1718 goto error0; 1719 if (XFS_IS_CORRUPT(args->mp, i != 1)) { 1720 error = -EFSCORRUPTED; 1721 goto error0; 1722 } 1723 1724 busy = xfs_alloc_compute_aligned(args, fbno, flen, 1725 &rbno, &rlen, &busy_gen); 1726 1727 if (rlen >= args->maxlen) 1728 break; 1729 1730 error = xfs_btree_increment(cnt_cur, 0, &i); 1731 if (error) 1732 goto error0; 1733 if (i == 0) { 1734 /* 1735 * Our only valid extents must have been busy. 1736 * Make it unbusy by forcing the log out and 1737 * retrying. 1738 */ 1739 xfs_btree_del_cursor(cnt_cur, 1740 XFS_BTREE_NOERROR); 1741 trace_xfs_alloc_size_busy(args); 1742 xfs_extent_busy_flush(args->mp, 1743 args->pag, busy_gen); 1744 goto restart; 1745 } 1746 } 1747 } 1748 1749 /* 1750 * In the first case above, we got the last entry in the 1751 * by-size btree. Now we check to see if the space hits maxlen 1752 * once aligned; if not, we search left for something better. 1753 * This can't happen in the second case above. 1754 */ 1755 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1756 if (XFS_IS_CORRUPT(args->mp, 1757 rlen != 0 && 1758 (rlen > flen || 1759 rbno + rlen > fbno + flen))) { 1760 error = -EFSCORRUPTED; 1761 goto error0; 1762 } 1763 if (rlen < args->maxlen) { 1764 xfs_agblock_t bestfbno; 1765 xfs_extlen_t bestflen; 1766 xfs_agblock_t bestrbno; 1767 xfs_extlen_t bestrlen; 1768 1769 bestrlen = rlen; 1770 bestrbno = rbno; 1771 bestflen = flen; 1772 bestfbno = fbno; 1773 for (;;) { 1774 if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) 1775 goto error0; 1776 if (i == 0) 1777 break; 1778 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1779 &i))) 1780 goto error0; 1781 if (XFS_IS_CORRUPT(args->mp, i != 1)) { 1782 error = -EFSCORRUPTED; 1783 goto error0; 1784 } 1785 if (flen < bestrlen) 1786 break; 1787 busy = xfs_alloc_compute_aligned(args, fbno, flen, 1788 &rbno, &rlen, &busy_gen); 1789 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1790 if (XFS_IS_CORRUPT(args->mp, 1791 rlen != 0 && 1792 (rlen > flen || 1793 rbno + rlen > fbno + flen))) { 1794 error = -EFSCORRUPTED; 1795 goto error0; 1796 } 1797 if (rlen > bestrlen) { 1798 bestrlen = rlen; 1799 bestrbno = rbno; 1800 bestflen = flen; 1801 bestfbno = fbno; 1802 if (rlen == args->maxlen) 1803 break; 1804 } 1805 } 1806 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1807 &i))) 1808 goto error0; 1809 if (XFS_IS_CORRUPT(args->mp, i != 1)) { 1810 error = -EFSCORRUPTED; 1811 goto error0; 1812 } 1813 rlen = bestrlen; 1814 rbno = bestrbno; 1815 flen = bestflen; 1816 fbno = bestfbno; 1817 } 1818 args->wasfromfl = 0; 1819 /* 1820 * Fix up the length. 1821 */ 1822 args->len = rlen; 1823 if (rlen < args->minlen) { 1824 if (busy) { 1825 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1826 trace_xfs_alloc_size_busy(args); 1827 xfs_extent_busy_flush(args->mp, args->pag, busy_gen); 1828 goto restart; 1829 } 1830 goto out_nominleft; 1831 } 1832 xfs_alloc_fix_len(args); 1833 1834 rlen = args->len; 1835 if (XFS_IS_CORRUPT(args->mp, rlen > flen)) { 1836 error = -EFSCORRUPTED; 1837 goto error0; 1838 } 1839 /* 1840 * Allocate and initialize a cursor for the by-block tree. 1841 */ 1842 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1843 args->agno, XFS_BTNUM_BNO); 1844 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1845 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1846 goto error0; 1847 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1848 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1849 cnt_cur = bno_cur = NULL; 1850 args->len = rlen; 1851 args->agbno = rbno; 1852 if (XFS_IS_CORRUPT(args->mp, 1853 args->agbno + args->len > 1854 be32_to_cpu( 1855 XFS_BUF_TO_AGF(args->agbp)->agf_length))) { 1856 error = -EFSCORRUPTED; 1857 goto error0; 1858 } 1859 trace_xfs_alloc_size_done(args); 1860 return 0; 1861 1862 error0: 1863 trace_xfs_alloc_size_error(args); 1864 if (cnt_cur) 1865 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1866 if (bno_cur) 1867 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1868 return error; 1869 1870 out_nominleft: 1871 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1872 trace_xfs_alloc_size_nominleft(args); 1873 args->agbno = NULLAGBLOCK; 1874 return 0; 1875 } 1876 1877 /* 1878 * Free the extent starting at agno/bno for length. 1879 */ 1880 STATIC int 1881 xfs_free_ag_extent( 1882 struct xfs_trans *tp, 1883 struct xfs_buf *agbp, 1884 xfs_agnumber_t agno, 1885 xfs_agblock_t bno, 1886 xfs_extlen_t len, 1887 const struct xfs_owner_info *oinfo, 1888 enum xfs_ag_resv_type type) 1889 { 1890 struct xfs_mount *mp; 1891 struct xfs_perag *pag; 1892 struct xfs_btree_cur *bno_cur; 1893 struct xfs_btree_cur *cnt_cur; 1894 xfs_agblock_t gtbno; /* start of right neighbor */ 1895 xfs_extlen_t gtlen; /* length of right neighbor */ 1896 xfs_agblock_t ltbno; /* start of left neighbor */ 1897 xfs_extlen_t ltlen; /* length of left neighbor */ 1898 xfs_agblock_t nbno; /* new starting block of freesp */ 1899 xfs_extlen_t nlen; /* new length of freespace */ 1900 int haveleft; /* have a left neighbor */ 1901 int haveright; /* have a right neighbor */ 1902 int i; 1903 int error; 1904 1905 bno_cur = cnt_cur = NULL; 1906 mp = tp->t_mountp; 1907 1908 if (!xfs_rmap_should_skip_owner_update(oinfo)) { 1909 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1910 if (error) 1911 goto error0; 1912 } 1913 1914 /* 1915 * Allocate and initialize a cursor for the by-block btree. 1916 */ 1917 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1918 /* 1919 * Look for a neighboring block on the left (lower block numbers) 1920 * that is contiguous with this space. 1921 */ 1922 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) 1923 goto error0; 1924 if (haveleft) { 1925 /* 1926 * There is a block to our left. 1927 */ 1928 if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) 1929 goto error0; 1930 if (XFS_IS_CORRUPT(mp, i != 1)) { 1931 error = -EFSCORRUPTED; 1932 goto error0; 1933 } 1934 /* 1935 * It's not contiguous, though. 1936 */ 1937 if (ltbno + ltlen < bno) 1938 haveleft = 0; 1939 else { 1940 /* 1941 * If this failure happens the request to free this 1942 * space was invalid, it's (partly) already free. 1943 * Very bad. 1944 */ 1945 if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) { 1946 error = -EFSCORRUPTED; 1947 goto error0; 1948 } 1949 } 1950 } 1951 /* 1952 * Look for a neighboring block on the right (higher block numbers) 1953 * that is contiguous with this space. 1954 */ 1955 if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) 1956 goto error0; 1957 if (haveright) { 1958 /* 1959 * There is a block to our right. 1960 */ 1961 if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) 1962 goto error0; 1963 if (XFS_IS_CORRUPT(mp, i != 1)) { 1964 error = -EFSCORRUPTED; 1965 goto error0; 1966 } 1967 /* 1968 * It's not contiguous, though. 1969 */ 1970 if (bno + len < gtbno) 1971 haveright = 0; 1972 else { 1973 /* 1974 * If this failure happens the request to free this 1975 * space was invalid, it's (partly) already free. 1976 * Very bad. 1977 */ 1978 if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) { 1979 error = -EFSCORRUPTED; 1980 goto error0; 1981 } 1982 } 1983 } 1984 /* 1985 * Now allocate and initialize a cursor for the by-size tree. 1986 */ 1987 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); 1988 /* 1989 * Have both left and right contiguous neighbors. 1990 * Merge all three into a single free block. 1991 */ 1992 if (haveleft && haveright) { 1993 /* 1994 * Delete the old by-size entry on the left. 1995 */ 1996 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1997 goto error0; 1998 if (XFS_IS_CORRUPT(mp, i != 1)) { 1999 error = -EFSCORRUPTED; 2000 goto error0; 2001 } 2002 if ((error = xfs_btree_delete(cnt_cur, &i))) 2003 goto error0; 2004 if (XFS_IS_CORRUPT(mp, i != 1)) { 2005 error = -EFSCORRUPTED; 2006 goto error0; 2007 } 2008 /* 2009 * Delete the old by-size entry on the right. 2010 */ 2011 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 2012 goto error0; 2013 if (XFS_IS_CORRUPT(mp, i != 1)) { 2014 error = -EFSCORRUPTED; 2015 goto error0; 2016 } 2017 if ((error = xfs_btree_delete(cnt_cur, &i))) 2018 goto error0; 2019 if (XFS_IS_CORRUPT(mp, i != 1)) { 2020 error = -EFSCORRUPTED; 2021 goto error0; 2022 } 2023 /* 2024 * Delete the old by-block entry for the right block. 2025 */ 2026 if ((error = xfs_btree_delete(bno_cur, &i))) 2027 goto error0; 2028 if (XFS_IS_CORRUPT(mp, i != 1)) { 2029 error = -EFSCORRUPTED; 2030 goto error0; 2031 } 2032 /* 2033 * Move the by-block cursor back to the left neighbor. 2034 */ 2035 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 2036 goto error0; 2037 if (XFS_IS_CORRUPT(mp, i != 1)) { 2038 error = -EFSCORRUPTED; 2039 goto error0; 2040 } 2041 #ifdef DEBUG 2042 /* 2043 * Check that this is the right record: delete didn't 2044 * mangle the cursor. 2045 */ 2046 { 2047 xfs_agblock_t xxbno; 2048 xfs_extlen_t xxlen; 2049 2050 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 2051 &i))) 2052 goto error0; 2053 if (XFS_IS_CORRUPT(mp, 2054 i != 1 || 2055 xxbno != ltbno || 2056 xxlen != ltlen)) { 2057 error = -EFSCORRUPTED; 2058 goto error0; 2059 } 2060 } 2061 #endif 2062 /* 2063 * Update remaining by-block entry to the new, joined block. 2064 */ 2065 nbno = ltbno; 2066 nlen = len + ltlen + gtlen; 2067 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 2068 goto error0; 2069 } 2070 /* 2071 * Have only a left contiguous neighbor. 2072 * Merge it together with the new freespace. 2073 */ 2074 else if (haveleft) { 2075 /* 2076 * Delete the old by-size entry on the left. 2077 */ 2078 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 2079 goto error0; 2080 if (XFS_IS_CORRUPT(mp, i != 1)) { 2081 error = -EFSCORRUPTED; 2082 goto error0; 2083 } 2084 if ((error = xfs_btree_delete(cnt_cur, &i))) 2085 goto error0; 2086 if (XFS_IS_CORRUPT(mp, i != 1)) { 2087 error = -EFSCORRUPTED; 2088 goto error0; 2089 } 2090 /* 2091 * Back up the by-block cursor to the left neighbor, and 2092 * update its length. 2093 */ 2094 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 2095 goto error0; 2096 if (XFS_IS_CORRUPT(mp, i != 1)) { 2097 error = -EFSCORRUPTED; 2098 goto error0; 2099 } 2100 nbno = ltbno; 2101 nlen = len + ltlen; 2102 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 2103 goto error0; 2104 } 2105 /* 2106 * Have only a right contiguous neighbor. 2107 * Merge it together with the new freespace. 2108 */ 2109 else if (haveright) { 2110 /* 2111 * Delete the old by-size entry on the right. 2112 */ 2113 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 2114 goto error0; 2115 if (XFS_IS_CORRUPT(mp, i != 1)) { 2116 error = -EFSCORRUPTED; 2117 goto error0; 2118 } 2119 if ((error = xfs_btree_delete(cnt_cur, &i))) 2120 goto error0; 2121 if (XFS_IS_CORRUPT(mp, i != 1)) { 2122 error = -EFSCORRUPTED; 2123 goto error0; 2124 } 2125 /* 2126 * Update the starting block and length of the right 2127 * neighbor in the by-block tree. 2128 */ 2129 nbno = bno; 2130 nlen = len + gtlen; 2131 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 2132 goto error0; 2133 } 2134 /* 2135 * No contiguous neighbors. 2136 * Insert the new freespace into the by-block tree. 2137 */ 2138 else { 2139 nbno = bno; 2140 nlen = len; 2141 if ((error = xfs_btree_insert(bno_cur, &i))) 2142 goto error0; 2143 if (XFS_IS_CORRUPT(mp, i != 1)) { 2144 error = -EFSCORRUPTED; 2145 goto error0; 2146 } 2147 } 2148 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 2149 bno_cur = NULL; 2150 /* 2151 * In all cases we need to insert the new freespace in the by-size tree. 2152 */ 2153 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 2154 goto error0; 2155 if (XFS_IS_CORRUPT(mp, i != 0)) { 2156 error = -EFSCORRUPTED; 2157 goto error0; 2158 } 2159 if ((error = xfs_btree_insert(cnt_cur, &i))) 2160 goto error0; 2161 if (XFS_IS_CORRUPT(mp, i != 1)) { 2162 error = -EFSCORRUPTED; 2163 goto error0; 2164 } 2165 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 2166 cnt_cur = NULL; 2167 2168 /* 2169 * Update the freespace totals in the ag and superblock. 2170 */ 2171 pag = xfs_perag_get(mp, agno); 2172 error = xfs_alloc_update_counters(tp, pag, agbp, len); 2173 xfs_ag_resv_free_extent(pag, type, tp, len); 2174 xfs_perag_put(pag); 2175 if (error) 2176 goto error0; 2177 2178 XFS_STATS_INC(mp, xs_freex); 2179 XFS_STATS_ADD(mp, xs_freeb, len); 2180 2181 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright); 2182 2183 return 0; 2184 2185 error0: 2186 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1); 2187 if (bno_cur) 2188 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 2189 if (cnt_cur) 2190 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 2191 return error; 2192 } 2193 2194 /* 2195 * Visible (exported) allocation/free functions. 2196 * Some of these are used just by xfs_alloc_btree.c and this file. 2197 */ 2198 2199 /* 2200 * Compute and fill in value of m_ag_maxlevels. 2201 */ 2202 void 2203 xfs_alloc_compute_maxlevels( 2204 xfs_mount_t *mp) /* file system mount structure */ 2205 { 2206 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr, 2207 (mp->m_sb.sb_agblocks + 1) / 2); 2208 } 2209 2210 /* 2211 * Find the length of the longest extent in an AG. The 'need' parameter 2212 * specifies how much space we're going to need for the AGFL and the 2213 * 'reserved' parameter tells us how many blocks in this AG are reserved for 2214 * other callers. 2215 */ 2216 xfs_extlen_t 2217 xfs_alloc_longest_free_extent( 2218 struct xfs_perag *pag, 2219 xfs_extlen_t need, 2220 xfs_extlen_t reserved) 2221 { 2222 xfs_extlen_t delta = 0; 2223 2224 /* 2225 * If the AGFL needs a recharge, we'll have to subtract that from the 2226 * longest extent. 2227 */ 2228 if (need > pag->pagf_flcount) 2229 delta = need - pag->pagf_flcount; 2230 2231 /* 2232 * If we cannot maintain others' reservations with space from the 2233 * not-longest freesp extents, we'll have to subtract /that/ from 2234 * the longest extent too. 2235 */ 2236 if (pag->pagf_freeblks - pag->pagf_longest < reserved) 2237 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest); 2238 2239 /* 2240 * If the longest extent is long enough to satisfy all the 2241 * reservations and AGFL rules in place, we can return this extent. 2242 */ 2243 if (pag->pagf_longest > delta) 2244 return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable, 2245 pag->pagf_longest - delta); 2246 2247 /* Otherwise, let the caller try for 1 block if there's space. */ 2248 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 2249 } 2250 2251 unsigned int 2252 xfs_alloc_min_freelist( 2253 struct xfs_mount *mp, 2254 struct xfs_perag *pag) 2255 { 2256 unsigned int min_free; 2257 2258 /* space needed by-bno freespace btree */ 2259 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, 2260 mp->m_ag_maxlevels); 2261 /* space needed by-size freespace btree */ 2262 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 2263 mp->m_ag_maxlevels); 2264 /* space needed reverse mapping used space btree */ 2265 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 2266 min_free += min_t(unsigned int, 2267 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, 2268 mp->m_rmap_maxlevels); 2269 2270 return min_free; 2271 } 2272 2273 /* 2274 * Check if the operation we are fixing up the freelist for should go ahead or 2275 * not. If we are freeing blocks, we always allow it, otherwise the allocation 2276 * is dependent on whether the size and shape of free space available will 2277 * permit the requested allocation to take place. 2278 */ 2279 static bool 2280 xfs_alloc_space_available( 2281 struct xfs_alloc_arg *args, 2282 xfs_extlen_t min_free, 2283 int flags) 2284 { 2285 struct xfs_perag *pag = args->pag; 2286 xfs_extlen_t alloc_len, longest; 2287 xfs_extlen_t reservation; /* blocks that are still reserved */ 2288 int available; 2289 xfs_extlen_t agflcount; 2290 2291 if (flags & XFS_ALLOC_FLAG_FREEING) 2292 return true; 2293 2294 reservation = xfs_ag_resv_needed(pag, args->resv); 2295 2296 /* do we have enough contiguous free space for the allocation? */ 2297 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; 2298 longest = xfs_alloc_longest_free_extent(pag, min_free, reservation); 2299 if (longest < alloc_len) 2300 return false; 2301 2302 /* 2303 * Do we have enough free space remaining for the allocation? Don't 2304 * account extra agfl blocks because we are about to defer free them, 2305 * making them unavailable until the current transaction commits. 2306 */ 2307 agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free); 2308 available = (int)(pag->pagf_freeblks + agflcount - 2309 reservation - min_free - args->minleft); 2310 if (available < (int)max(args->total, alloc_len)) 2311 return false; 2312 2313 /* 2314 * Clamp maxlen to the amount of free space available for the actual 2315 * extent allocation. 2316 */ 2317 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { 2318 args->maxlen = available; 2319 ASSERT(args->maxlen > 0); 2320 ASSERT(args->maxlen >= args->minlen); 2321 } 2322 2323 return true; 2324 } 2325 2326 int 2327 xfs_free_agfl_block( 2328 struct xfs_trans *tp, 2329 xfs_agnumber_t agno, 2330 xfs_agblock_t agbno, 2331 struct xfs_buf *agbp, 2332 struct xfs_owner_info *oinfo) 2333 { 2334 int error; 2335 struct xfs_buf *bp; 2336 2337 error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo, 2338 XFS_AG_RESV_AGFL); 2339 if (error) 2340 return error; 2341 2342 bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno); 2343 if (XFS_IS_CORRUPT(tp->t_mountp, !bp)) 2344 return -EFSCORRUPTED; 2345 xfs_trans_binval(tp, bp); 2346 2347 return 0; 2348 } 2349 2350 /* 2351 * Check the agfl fields of the agf for inconsistency or corruption. The purpose 2352 * is to detect an agfl header padding mismatch between current and early v5 2353 * kernels. This problem manifests as a 1-slot size difference between the 2354 * on-disk flcount and the active [first, last] range of a wrapped agfl. This 2355 * may also catch variants of agfl count corruption unrelated to padding. Either 2356 * way, we'll reset the agfl and warn the user. 2357 * 2358 * Return true if a reset is required before the agfl can be used, false 2359 * otherwise. 2360 */ 2361 static bool 2362 xfs_agfl_needs_reset( 2363 struct xfs_mount *mp, 2364 struct xfs_agf *agf) 2365 { 2366 uint32_t f = be32_to_cpu(agf->agf_flfirst); 2367 uint32_t l = be32_to_cpu(agf->agf_fllast); 2368 uint32_t c = be32_to_cpu(agf->agf_flcount); 2369 int agfl_size = xfs_agfl_size(mp); 2370 int active; 2371 2372 /* no agfl header on v4 supers */ 2373 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2374 return false; 2375 2376 /* 2377 * The agf read verifier catches severe corruption of these fields. 2378 * Repeat some sanity checks to cover a packed -> unpacked mismatch if 2379 * the verifier allows it. 2380 */ 2381 if (f >= agfl_size || l >= agfl_size) 2382 return true; 2383 if (c > agfl_size) 2384 return true; 2385 2386 /* 2387 * Check consistency between the on-disk count and the active range. An 2388 * agfl padding mismatch manifests as an inconsistent flcount. 2389 */ 2390 if (c && l >= f) 2391 active = l - f + 1; 2392 else if (c) 2393 active = agfl_size - f + l + 1; 2394 else 2395 active = 0; 2396 2397 return active != c; 2398 } 2399 2400 /* 2401 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the 2402 * agfl content cannot be trusted. Warn the user that a repair is required to 2403 * recover leaked blocks. 2404 * 2405 * The purpose of this mechanism is to handle filesystems affected by the agfl 2406 * header padding mismatch problem. A reset keeps the filesystem online with a 2407 * relatively minor free space accounting inconsistency rather than suffer the 2408 * inevitable crash from use of an invalid agfl block. 2409 */ 2410 static void 2411 xfs_agfl_reset( 2412 struct xfs_trans *tp, 2413 struct xfs_buf *agbp, 2414 struct xfs_perag *pag) 2415 { 2416 struct xfs_mount *mp = tp->t_mountp; 2417 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 2418 2419 ASSERT(pag->pagf_agflreset); 2420 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); 2421 2422 xfs_warn(mp, 2423 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. " 2424 "Please unmount and run xfs_repair.", 2425 pag->pag_agno, pag->pagf_flcount); 2426 2427 agf->agf_flfirst = 0; 2428 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1); 2429 agf->agf_flcount = 0; 2430 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST | 2431 XFS_AGF_FLCOUNT); 2432 2433 pag->pagf_flcount = 0; 2434 pag->pagf_agflreset = false; 2435 } 2436 2437 /* 2438 * Defer an AGFL block free. This is effectively equivalent to 2439 * xfs_bmap_add_free() with some special handling particular to AGFL blocks. 2440 * 2441 * Deferring AGFL frees helps prevent log reservation overruns due to too many 2442 * allocation operations in a transaction. AGFL frees are prone to this problem 2443 * because for one they are always freed one at a time. Further, an immediate 2444 * AGFL block free can cause a btree join and require another block free before 2445 * the real allocation can proceed. Deferring the free disconnects freeing up 2446 * the AGFL slot from freeing the block. 2447 */ 2448 STATIC void 2449 xfs_defer_agfl_block( 2450 struct xfs_trans *tp, 2451 xfs_agnumber_t agno, 2452 xfs_fsblock_t agbno, 2453 struct xfs_owner_info *oinfo) 2454 { 2455 struct xfs_mount *mp = tp->t_mountp; 2456 struct xfs_extent_free_item *new; /* new element */ 2457 2458 ASSERT(xfs_bmap_free_item_zone != NULL); 2459 ASSERT(oinfo != NULL); 2460 2461 new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0); 2462 new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); 2463 new->xefi_blockcount = 1; 2464 new->xefi_oinfo = *oinfo; 2465 2466 trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); 2467 2468 xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); 2469 } 2470 2471 /* 2472 * Decide whether to use this allocation group for this allocation. 2473 * If so, fix up the btree freelist's size. 2474 */ 2475 int /* error */ 2476 xfs_alloc_fix_freelist( 2477 struct xfs_alloc_arg *args, /* allocation argument structure */ 2478 int flags) /* XFS_ALLOC_FLAG_... */ 2479 { 2480 struct xfs_mount *mp = args->mp; 2481 struct xfs_perag *pag = args->pag; 2482 struct xfs_trans *tp = args->tp; 2483 struct xfs_buf *agbp = NULL; 2484 struct xfs_buf *agflbp = NULL; 2485 struct xfs_alloc_arg targs; /* local allocation arguments */ 2486 xfs_agblock_t bno; /* freelist block */ 2487 xfs_extlen_t need; /* total blocks needed in freelist */ 2488 int error = 0; 2489 2490 /* deferred ops (AGFL block frees) require permanent transactions */ 2491 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); 2492 2493 if (!pag->pagf_init) { 2494 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 2495 if (error) 2496 goto out_no_agbp; 2497 if (!pag->pagf_init) { 2498 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 2499 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2500 goto out_agbp_relse; 2501 } 2502 } 2503 2504 /* 2505 * If this is a metadata preferred pag and we are user data then try 2506 * somewhere else if we are not being asked to try harder at this 2507 * point 2508 */ 2509 if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) && 2510 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 2511 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2512 goto out_agbp_relse; 2513 } 2514 2515 need = xfs_alloc_min_freelist(mp, pag); 2516 if (!xfs_alloc_space_available(args, need, flags | 2517 XFS_ALLOC_FLAG_CHECK)) 2518 goto out_agbp_relse; 2519 2520 /* 2521 * Get the a.g. freespace buffer. 2522 * Can fail if we're not blocking on locks, and it's held. 2523 */ 2524 if (!agbp) { 2525 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 2526 if (error) 2527 goto out_no_agbp; 2528 if (!agbp) { 2529 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 2530 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2531 goto out_no_agbp; 2532 } 2533 } 2534 2535 /* reset a padding mismatched agfl before final free space check */ 2536 if (pag->pagf_agflreset) 2537 xfs_agfl_reset(tp, agbp, pag); 2538 2539 /* If there isn't enough total space or single-extent, reject it. */ 2540 need = xfs_alloc_min_freelist(mp, pag); 2541 if (!xfs_alloc_space_available(args, need, flags)) 2542 goto out_agbp_relse; 2543 2544 /* 2545 * Make the freelist shorter if it's too long. 2546 * 2547 * Note that from this point onwards, we will always release the agf and 2548 * agfl buffers on error. This handles the case where we error out and 2549 * the buffers are clean or may not have been joined to the transaction 2550 * and hence need to be released manually. If they have been joined to 2551 * the transaction, then xfs_trans_brelse() will handle them 2552 * appropriately based on the recursion count and dirty state of the 2553 * buffer. 2554 * 2555 * XXX (dgc): When we have lots of free space, does this buy us 2556 * anything other than extra overhead when we need to put more blocks 2557 * back on the free list? Maybe we should only do this when space is 2558 * getting low or the AGFL is more than half full? 2559 * 2560 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too 2561 * big; the NORMAP flag prevents AGFL expand/shrink operations from 2562 * updating the rmapbt. Both flags are used in xfs_repair while we're 2563 * rebuilding the rmapbt, and neither are used by the kernel. They're 2564 * both required to ensure that rmaps are correctly recorded for the 2565 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and 2566 * repair/rmap.c in xfsprogs for details. 2567 */ 2568 memset(&targs, 0, sizeof(targs)); 2569 /* struct copy below */ 2570 if (flags & XFS_ALLOC_FLAG_NORMAP) 2571 targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; 2572 else 2573 targs.oinfo = XFS_RMAP_OINFO_AG; 2574 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { 2575 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2576 if (error) 2577 goto out_agbp_relse; 2578 2579 /* defer agfl frees */ 2580 xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo); 2581 } 2582 2583 targs.tp = tp; 2584 targs.mp = mp; 2585 targs.agbp = agbp; 2586 targs.agno = args->agno; 2587 targs.alignment = targs.minlen = targs.prod = 1; 2588 targs.type = XFS_ALLOCTYPE_THIS_AG; 2589 targs.pag = pag; 2590 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); 2591 if (error) 2592 goto out_agbp_relse; 2593 2594 /* Make the freelist longer if it's too short. */ 2595 while (pag->pagf_flcount < need) { 2596 targs.agbno = 0; 2597 targs.maxlen = need - pag->pagf_flcount; 2598 targs.resv = XFS_AG_RESV_AGFL; 2599 2600 /* Allocate as many blocks as possible at once. */ 2601 error = xfs_alloc_ag_vextent(&targs); 2602 if (error) 2603 goto out_agflbp_relse; 2604 2605 /* 2606 * Stop if we run out. Won't happen if callers are obeying 2607 * the restrictions correctly. Can happen for free calls 2608 * on a completely full ag. 2609 */ 2610 if (targs.agbno == NULLAGBLOCK) { 2611 if (flags & XFS_ALLOC_FLAG_FREEING) 2612 break; 2613 goto out_agflbp_relse; 2614 } 2615 /* 2616 * Put each allocated block on the list. 2617 */ 2618 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 2619 error = xfs_alloc_put_freelist(tp, agbp, 2620 agflbp, bno, 0); 2621 if (error) 2622 goto out_agflbp_relse; 2623 } 2624 } 2625 xfs_trans_brelse(tp, agflbp); 2626 args->agbp = agbp; 2627 return 0; 2628 2629 out_agflbp_relse: 2630 xfs_trans_brelse(tp, agflbp); 2631 out_agbp_relse: 2632 if (agbp) 2633 xfs_trans_brelse(tp, agbp); 2634 out_no_agbp: 2635 args->agbp = NULL; 2636 return error; 2637 } 2638 2639 /* 2640 * Get a block from the freelist. 2641 * Returns with the buffer for the block gotten. 2642 */ 2643 int /* error */ 2644 xfs_alloc_get_freelist( 2645 xfs_trans_t *tp, /* transaction pointer */ 2646 xfs_buf_t *agbp, /* buffer containing the agf structure */ 2647 xfs_agblock_t *bnop, /* block address retrieved from freelist */ 2648 int btreeblk) /* destination is a AGF btree */ 2649 { 2650 xfs_agf_t *agf; /* a.g. freespace structure */ 2651 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2652 xfs_agblock_t bno; /* block number returned */ 2653 __be32 *agfl_bno; 2654 int error; 2655 int logflags; 2656 xfs_mount_t *mp = tp->t_mountp; 2657 xfs_perag_t *pag; /* per allocation group data */ 2658 2659 /* 2660 * Freelist is empty, give up. 2661 */ 2662 agf = XFS_BUF_TO_AGF(agbp); 2663 if (!agf->agf_flcount) { 2664 *bnop = NULLAGBLOCK; 2665 return 0; 2666 } 2667 /* 2668 * Read the array of free blocks. 2669 */ 2670 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), 2671 &agflbp); 2672 if (error) 2673 return error; 2674 2675 2676 /* 2677 * Get the block number and update the data structures. 2678 */ 2679 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2680 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2681 be32_add_cpu(&agf->agf_flfirst, 1); 2682 xfs_trans_brelse(tp, agflbp); 2683 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) 2684 agf->agf_flfirst = 0; 2685 2686 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2687 ASSERT(!pag->pagf_agflreset); 2688 be32_add_cpu(&agf->agf_flcount, -1); 2689 xfs_trans_agflist_delta(tp, -1); 2690 pag->pagf_flcount--; 2691 2692 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 2693 if (btreeblk) { 2694 be32_add_cpu(&agf->agf_btreeblks, 1); 2695 pag->pagf_btreeblks++; 2696 logflags |= XFS_AGF_BTREEBLKS; 2697 } 2698 xfs_perag_put(pag); 2699 2700 xfs_alloc_log_agf(tp, agbp, logflags); 2701 *bnop = bno; 2702 2703 return 0; 2704 } 2705 2706 /* 2707 * Log the given fields from the agf structure. 2708 */ 2709 void 2710 xfs_alloc_log_agf( 2711 xfs_trans_t *tp, /* transaction pointer */ 2712 xfs_buf_t *bp, /* buffer for a.g. freelist header */ 2713 int fields) /* mask of fields to be logged (XFS_AGF_...) */ 2714 { 2715 int first; /* first byte offset */ 2716 int last; /* last byte offset */ 2717 static const short offsets[] = { 2718 offsetof(xfs_agf_t, agf_magicnum), 2719 offsetof(xfs_agf_t, agf_versionnum), 2720 offsetof(xfs_agf_t, agf_seqno), 2721 offsetof(xfs_agf_t, agf_length), 2722 offsetof(xfs_agf_t, agf_roots[0]), 2723 offsetof(xfs_agf_t, agf_levels[0]), 2724 offsetof(xfs_agf_t, agf_flfirst), 2725 offsetof(xfs_agf_t, agf_fllast), 2726 offsetof(xfs_agf_t, agf_flcount), 2727 offsetof(xfs_agf_t, agf_freeblks), 2728 offsetof(xfs_agf_t, agf_longest), 2729 offsetof(xfs_agf_t, agf_btreeblks), 2730 offsetof(xfs_agf_t, agf_uuid), 2731 offsetof(xfs_agf_t, agf_rmap_blocks), 2732 offsetof(xfs_agf_t, agf_refcount_blocks), 2733 offsetof(xfs_agf_t, agf_refcount_root), 2734 offsetof(xfs_agf_t, agf_refcount_level), 2735 /* needed so that we don't log the whole rest of the structure: */ 2736 offsetof(xfs_agf_t, agf_spare64), 2737 sizeof(xfs_agf_t) 2738 }; 2739 2740 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2741 2742 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); 2743 2744 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2745 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2746 } 2747 2748 /* 2749 * Interface for inode allocation to force the pag data to be initialized. 2750 */ 2751 int /* error */ 2752 xfs_alloc_pagf_init( 2753 xfs_mount_t *mp, /* file system mount structure */ 2754 xfs_trans_t *tp, /* transaction pointer */ 2755 xfs_agnumber_t agno, /* allocation group number */ 2756 int flags) /* XFS_ALLOC_FLAGS_... */ 2757 { 2758 xfs_buf_t *bp; 2759 int error; 2760 2761 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) 2762 return error; 2763 if (bp) 2764 xfs_trans_brelse(tp, bp); 2765 return 0; 2766 } 2767 2768 /* 2769 * Put the block on the freelist for the allocation group. 2770 */ 2771 int /* error */ 2772 xfs_alloc_put_freelist( 2773 xfs_trans_t *tp, /* transaction pointer */ 2774 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2775 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2776 xfs_agblock_t bno, /* block being freed */ 2777 int btreeblk) /* block came from a AGF btree */ 2778 { 2779 xfs_agf_t *agf; /* a.g. freespace structure */ 2780 __be32 *blockp;/* pointer to array entry */ 2781 int error; 2782 int logflags; 2783 xfs_mount_t *mp; /* mount structure */ 2784 xfs_perag_t *pag; /* per allocation group data */ 2785 __be32 *agfl_bno; 2786 int startoff; 2787 2788 agf = XFS_BUF_TO_AGF(agbp); 2789 mp = tp->t_mountp; 2790 2791 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2792 be32_to_cpu(agf->agf_seqno), &agflbp))) 2793 return error; 2794 be32_add_cpu(&agf->agf_fllast, 1); 2795 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) 2796 agf->agf_fllast = 0; 2797 2798 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2799 ASSERT(!pag->pagf_agflreset); 2800 be32_add_cpu(&agf->agf_flcount, 1); 2801 xfs_trans_agflist_delta(tp, 1); 2802 pag->pagf_flcount++; 2803 2804 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; 2805 if (btreeblk) { 2806 be32_add_cpu(&agf->agf_btreeblks, -1); 2807 pag->pagf_btreeblks--; 2808 logflags |= XFS_AGF_BTREEBLKS; 2809 } 2810 xfs_perag_put(pag); 2811 2812 xfs_alloc_log_agf(tp, agbp, logflags); 2813 2814 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); 2815 2816 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2817 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2818 *blockp = cpu_to_be32(bno); 2819 startoff = (char *)blockp - (char *)agflbp->b_addr; 2820 2821 xfs_alloc_log_agf(tp, agbp, logflags); 2822 2823 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); 2824 xfs_trans_log_buf(tp, agflbp, startoff, 2825 startoff + sizeof(xfs_agblock_t) - 1); 2826 return 0; 2827 } 2828 2829 static xfs_failaddr_t 2830 xfs_agf_verify( 2831 struct xfs_buf *bp) 2832 { 2833 struct xfs_mount *mp = bp->b_mount; 2834 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2835 2836 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2837 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2838 return __this_address; 2839 if (!xfs_log_check_lsn(mp, 2840 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) 2841 return __this_address; 2842 } 2843 2844 if (!xfs_verify_magic(bp, agf->agf_magicnum)) 2845 return __this_address; 2846 2847 if (!(XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2848 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2849 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && 2850 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && 2851 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) 2852 return __this_address; 2853 2854 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || 2855 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || 2856 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2857 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2858 return __this_address; 2859 2860 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2861 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2862 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) 2863 return __this_address; 2864 2865 /* 2866 * during growfs operations, the perag is not fully initialised, 2867 * so we can't use it for any useful checking. growfs ensures we can't 2868 * use it by using uncached buffers that don't have the perag attached 2869 * so we can detect and avoid this problem. 2870 */ 2871 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2872 return __this_address; 2873 2874 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2875 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2876 return __this_address; 2877 2878 if (xfs_sb_version_hasreflink(&mp->m_sb) && 2879 (be32_to_cpu(agf->agf_refcount_level) < 1 || 2880 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) 2881 return __this_address; 2882 2883 return NULL; 2884 2885 } 2886 2887 static void 2888 xfs_agf_read_verify( 2889 struct xfs_buf *bp) 2890 { 2891 struct xfs_mount *mp = bp->b_mount; 2892 xfs_failaddr_t fa; 2893 2894 if (xfs_sb_version_hascrc(&mp->m_sb) && 2895 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2896 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 2897 else { 2898 fa = xfs_agf_verify(bp); 2899 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) 2900 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2901 } 2902 } 2903 2904 static void 2905 xfs_agf_write_verify( 2906 struct xfs_buf *bp) 2907 { 2908 struct xfs_mount *mp = bp->b_mount; 2909 struct xfs_buf_log_item *bip = bp->b_log_item; 2910 xfs_failaddr_t fa; 2911 2912 fa = xfs_agf_verify(bp); 2913 if (fa) { 2914 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2915 return; 2916 } 2917 2918 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2919 return; 2920 2921 if (bip) 2922 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2923 2924 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); 2925 } 2926 2927 const struct xfs_buf_ops xfs_agf_buf_ops = { 2928 .name = "xfs_agf", 2929 .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) }, 2930 .verify_read = xfs_agf_read_verify, 2931 .verify_write = xfs_agf_write_verify, 2932 .verify_struct = xfs_agf_verify, 2933 }; 2934 2935 /* 2936 * Read in the allocation group header (free/alloc section). 2937 */ 2938 int /* error */ 2939 xfs_read_agf( 2940 struct xfs_mount *mp, /* mount point structure */ 2941 struct xfs_trans *tp, /* transaction pointer */ 2942 xfs_agnumber_t agno, /* allocation group number */ 2943 int flags, /* XFS_BUF_ */ 2944 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2945 { 2946 int error; 2947 2948 trace_xfs_read_agf(mp, agno); 2949 2950 ASSERT(agno != NULLAGNUMBER); 2951 error = xfs_trans_read_buf( 2952 mp, tp, mp->m_ddev_targp, 2953 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2954 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); 2955 if (error) 2956 return error; 2957 if (!*bpp) 2958 return 0; 2959 2960 ASSERT(!(*bpp)->b_error); 2961 xfs_buf_set_ref(*bpp, XFS_AGF_REF); 2962 return 0; 2963 } 2964 2965 /* 2966 * Read in the allocation group header (free/alloc section). 2967 */ 2968 int /* error */ 2969 xfs_alloc_read_agf( 2970 struct xfs_mount *mp, /* mount point structure */ 2971 struct xfs_trans *tp, /* transaction pointer */ 2972 xfs_agnumber_t agno, /* allocation group number */ 2973 int flags, /* XFS_ALLOC_FLAG_... */ 2974 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2975 { 2976 struct xfs_agf *agf; /* ag freelist header */ 2977 struct xfs_perag *pag; /* per allocation group data */ 2978 int error; 2979 2980 trace_xfs_alloc_read_agf(mp, agno); 2981 2982 ASSERT(agno != NULLAGNUMBER); 2983 error = xfs_read_agf(mp, tp, agno, 2984 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, 2985 bpp); 2986 if (error) 2987 return error; 2988 if (!*bpp) 2989 return 0; 2990 ASSERT(!(*bpp)->b_error); 2991 2992 agf = XFS_BUF_TO_AGF(*bpp); 2993 pag = xfs_perag_get(mp, agno); 2994 if (!pag->pagf_init) { 2995 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2996 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2997 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2998 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2999 pag->pagf_levels[XFS_BTNUM_BNOi] = 3000 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 3001 pag->pagf_levels[XFS_BTNUM_CNTi] = 3002 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 3003 pag->pagf_levels[XFS_BTNUM_RMAPi] = 3004 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); 3005 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); 3006 pag->pagf_init = 1; 3007 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); 3008 } 3009 #ifdef DEBUG 3010 else if (!XFS_FORCED_SHUTDOWN(mp)) { 3011 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 3012 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); 3013 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 3014 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 3015 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 3016 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); 3017 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == 3018 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 3019 } 3020 #endif 3021 xfs_perag_put(pag); 3022 return 0; 3023 } 3024 3025 /* 3026 * Allocate an extent (variable-size). 3027 * Depending on the allocation type, we either look in a single allocation 3028 * group or loop over the allocation groups to find the result. 3029 */ 3030 int /* error */ 3031 xfs_alloc_vextent( 3032 struct xfs_alloc_arg *args) /* allocation argument structure */ 3033 { 3034 xfs_agblock_t agsize; /* allocation group size */ 3035 int error; 3036 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 3037 struct xfs_mount *mp; /* mount structure pointer */ 3038 xfs_agnumber_t sagno; /* starting allocation group number */ 3039 xfs_alloctype_t type; /* input allocation type */ 3040 int bump_rotor = 0; 3041 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ 3042 3043 mp = args->mp; 3044 type = args->otype = args->type; 3045 args->agbno = NULLAGBLOCK; 3046 /* 3047 * Just fix this up, for the case where the last a.g. is shorter 3048 * (or there's only one a.g.) and the caller couldn't easily figure 3049 * that out (xfs_bmap_alloc). 3050 */ 3051 agsize = mp->m_sb.sb_agblocks; 3052 if (args->maxlen > agsize) 3053 args->maxlen = agsize; 3054 if (args->alignment == 0) 3055 args->alignment = 1; 3056 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); 3057 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); 3058 ASSERT(args->minlen <= args->maxlen); 3059 ASSERT(args->minlen <= agsize); 3060 ASSERT(args->mod < args->prod); 3061 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || 3062 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || 3063 args->minlen > args->maxlen || args->minlen > agsize || 3064 args->mod >= args->prod) { 3065 args->fsbno = NULLFSBLOCK; 3066 trace_xfs_alloc_vextent_badargs(args); 3067 return 0; 3068 } 3069 3070 switch (type) { 3071 case XFS_ALLOCTYPE_THIS_AG: 3072 case XFS_ALLOCTYPE_NEAR_BNO: 3073 case XFS_ALLOCTYPE_THIS_BNO: 3074 /* 3075 * These three force us into a single a.g. 3076 */ 3077 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 3078 args->pag = xfs_perag_get(mp, args->agno); 3079 error = xfs_alloc_fix_freelist(args, 0); 3080 if (error) { 3081 trace_xfs_alloc_vextent_nofix(args); 3082 goto error0; 3083 } 3084 if (!args->agbp) { 3085 trace_xfs_alloc_vextent_noagbp(args); 3086 break; 3087 } 3088 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 3089 if ((error = xfs_alloc_ag_vextent(args))) 3090 goto error0; 3091 break; 3092 case XFS_ALLOCTYPE_START_BNO: 3093 /* 3094 * Try near allocation first, then anywhere-in-ag after 3095 * the first a.g. fails. 3096 */ 3097 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && 3098 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 3099 args->fsbno = XFS_AGB_TO_FSB(mp, 3100 ((mp->m_agfrotor / rotorstep) % 3101 mp->m_sb.sb_agcount), 0); 3102 bump_rotor = 1; 3103 } 3104 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 3105 args->type = XFS_ALLOCTYPE_NEAR_BNO; 3106 /* FALLTHROUGH */ 3107 case XFS_ALLOCTYPE_FIRST_AG: 3108 /* 3109 * Rotate through the allocation groups looking for a winner. 3110 */ 3111 if (type == XFS_ALLOCTYPE_FIRST_AG) { 3112 /* 3113 * Start with allocation group given by bno. 3114 */ 3115 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 3116 args->type = XFS_ALLOCTYPE_THIS_AG; 3117 sagno = 0; 3118 flags = 0; 3119 } else { 3120 /* 3121 * Start with the given allocation group. 3122 */ 3123 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); 3124 flags = XFS_ALLOC_FLAG_TRYLOCK; 3125 } 3126 /* 3127 * Loop over allocation groups twice; first time with 3128 * trylock set, second time without. 3129 */ 3130 for (;;) { 3131 args->pag = xfs_perag_get(mp, args->agno); 3132 error = xfs_alloc_fix_freelist(args, flags); 3133 if (error) { 3134 trace_xfs_alloc_vextent_nofix(args); 3135 goto error0; 3136 } 3137 /* 3138 * If we get a buffer back then the allocation will fly. 3139 */ 3140 if (args->agbp) { 3141 if ((error = xfs_alloc_ag_vextent(args))) 3142 goto error0; 3143 break; 3144 } 3145 3146 trace_xfs_alloc_vextent_loopfailed(args); 3147 3148 /* 3149 * Didn't work, figure out the next iteration. 3150 */ 3151 if (args->agno == sagno && 3152 type == XFS_ALLOCTYPE_START_BNO) 3153 args->type = XFS_ALLOCTYPE_THIS_AG; 3154 /* 3155 * For the first allocation, we can try any AG to get 3156 * space. However, if we already have allocated a 3157 * block, we don't want to try AGs whose number is below 3158 * sagno. Otherwise, we may end up with out-of-order 3159 * locking of AGF, which might cause deadlock. 3160 */ 3161 if (++(args->agno) == mp->m_sb.sb_agcount) { 3162 if (args->tp->t_firstblock != NULLFSBLOCK) 3163 args->agno = sagno; 3164 else 3165 args->agno = 0; 3166 } 3167 /* 3168 * Reached the starting a.g., must either be done 3169 * or switch to non-trylock mode. 3170 */ 3171 if (args->agno == sagno) { 3172 if (flags == 0) { 3173 args->agbno = NULLAGBLOCK; 3174 trace_xfs_alloc_vextent_allfailed(args); 3175 break; 3176 } 3177 3178 flags = 0; 3179 if (type == XFS_ALLOCTYPE_START_BNO) { 3180 args->agbno = XFS_FSB_TO_AGBNO(mp, 3181 args->fsbno); 3182 args->type = XFS_ALLOCTYPE_NEAR_BNO; 3183 } 3184 } 3185 xfs_perag_put(args->pag); 3186 } 3187 if (bump_rotor) { 3188 if (args->agno == sagno) 3189 mp->m_agfrotor = (mp->m_agfrotor + 1) % 3190 (mp->m_sb.sb_agcount * rotorstep); 3191 else 3192 mp->m_agfrotor = (args->agno * rotorstep + 1) % 3193 (mp->m_sb.sb_agcount * rotorstep); 3194 } 3195 break; 3196 default: 3197 ASSERT(0); 3198 /* NOTREACHED */ 3199 } 3200 if (args->agbno == NULLAGBLOCK) 3201 args->fsbno = NULLFSBLOCK; 3202 else { 3203 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); 3204 #ifdef DEBUG 3205 ASSERT(args->len >= args->minlen); 3206 ASSERT(args->len <= args->maxlen); 3207 ASSERT(args->agbno % args->alignment == 0); 3208 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 3209 args->len); 3210 #endif 3211 3212 } 3213 xfs_perag_put(args->pag); 3214 return 0; 3215 error0: 3216 xfs_perag_put(args->pag); 3217 return error; 3218 } 3219 3220 /* Ensure that the freelist is at full capacity. */ 3221 int 3222 xfs_free_extent_fix_freelist( 3223 struct xfs_trans *tp, 3224 xfs_agnumber_t agno, 3225 struct xfs_buf **agbp) 3226 { 3227 struct xfs_alloc_arg args; 3228 int error; 3229 3230 memset(&args, 0, sizeof(struct xfs_alloc_arg)); 3231 args.tp = tp; 3232 args.mp = tp->t_mountp; 3233 args.agno = agno; 3234 3235 /* 3236 * validate that the block number is legal - the enables us to detect 3237 * and handle a silent filesystem corruption rather than crashing. 3238 */ 3239 if (args.agno >= args.mp->m_sb.sb_agcount) 3240 return -EFSCORRUPTED; 3241 3242 args.pag = xfs_perag_get(args.mp, args.agno); 3243 ASSERT(args.pag); 3244 3245 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); 3246 if (error) 3247 goto out; 3248 3249 *agbp = args.agbp; 3250 out: 3251 xfs_perag_put(args.pag); 3252 return error; 3253 } 3254 3255 /* 3256 * Free an extent. 3257 * Just break up the extent address and hand off to xfs_free_ag_extent 3258 * after fixing up the freelist. 3259 */ 3260 int 3261 __xfs_free_extent( 3262 struct xfs_trans *tp, 3263 xfs_fsblock_t bno, 3264 xfs_extlen_t len, 3265 const struct xfs_owner_info *oinfo, 3266 enum xfs_ag_resv_type type, 3267 bool skip_discard) 3268 { 3269 struct xfs_mount *mp = tp->t_mountp; 3270 struct xfs_buf *agbp; 3271 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); 3272 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); 3273 int error; 3274 unsigned int busy_flags = 0; 3275 3276 ASSERT(len != 0); 3277 ASSERT(type != XFS_AG_RESV_AGFL); 3278 3279 if (XFS_TEST_ERROR(false, mp, 3280 XFS_ERRTAG_FREE_EXTENT)) 3281 return -EIO; 3282 3283 error = xfs_free_extent_fix_freelist(tp, agno, &agbp); 3284 if (error) 3285 return error; 3286 3287 if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { 3288 error = -EFSCORRUPTED; 3289 goto err; 3290 } 3291 3292 /* validate the extent size is legal now we have the agf locked */ 3293 if (XFS_IS_CORRUPT(mp, 3294 agbno + len > 3295 be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length))) { 3296 error = -EFSCORRUPTED; 3297 goto err; 3298 } 3299 3300 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); 3301 if (error) 3302 goto err; 3303 3304 if (skip_discard) 3305 busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD; 3306 xfs_extent_busy_insert(tp, agno, agbno, len, busy_flags); 3307 return 0; 3308 3309 err: 3310 xfs_trans_brelse(tp, agbp); 3311 return error; 3312 } 3313 3314 struct xfs_alloc_query_range_info { 3315 xfs_alloc_query_range_fn fn; 3316 void *priv; 3317 }; 3318 3319 /* Format btree record and pass to our callback. */ 3320 STATIC int 3321 xfs_alloc_query_range_helper( 3322 struct xfs_btree_cur *cur, 3323 union xfs_btree_rec *rec, 3324 void *priv) 3325 { 3326 struct xfs_alloc_query_range_info *query = priv; 3327 struct xfs_alloc_rec_incore irec; 3328 3329 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); 3330 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); 3331 return query->fn(cur, &irec, query->priv); 3332 } 3333 3334 /* Find all free space within a given range of blocks. */ 3335 int 3336 xfs_alloc_query_range( 3337 struct xfs_btree_cur *cur, 3338 struct xfs_alloc_rec_incore *low_rec, 3339 struct xfs_alloc_rec_incore *high_rec, 3340 xfs_alloc_query_range_fn fn, 3341 void *priv) 3342 { 3343 union xfs_btree_irec low_brec; 3344 union xfs_btree_irec high_brec; 3345 struct xfs_alloc_query_range_info query; 3346 3347 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 3348 low_brec.a = *low_rec; 3349 high_brec.a = *high_rec; 3350 query.priv = priv; 3351 query.fn = fn; 3352 return xfs_btree_query_range(cur, &low_brec, &high_brec, 3353 xfs_alloc_query_range_helper, &query); 3354 } 3355 3356 /* Find all free space records. */ 3357 int 3358 xfs_alloc_query_all( 3359 struct xfs_btree_cur *cur, 3360 xfs_alloc_query_range_fn fn, 3361 void *priv) 3362 { 3363 struct xfs_alloc_query_range_info query; 3364 3365 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 3366 query.priv = priv; 3367 query.fn = fn; 3368 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); 3369 } 3370 3371 /* Is there a record covering a given extent? */ 3372 int 3373 xfs_alloc_has_record( 3374 struct xfs_btree_cur *cur, 3375 xfs_agblock_t bno, 3376 xfs_extlen_t len, 3377 bool *exists) 3378 { 3379 union xfs_btree_irec low; 3380 union xfs_btree_irec high; 3381 3382 memset(&low, 0, sizeof(low)); 3383 low.a.ar_startblock = bno; 3384 memset(&high, 0xFF, sizeof(high)); 3385 high.a.ar_startblock = bno + len - 1; 3386 3387 return xfs_btree_has_record(cur, &low, &high, exists); 3388 } 3389 3390 /* 3391 * Walk all the blocks in the AGFL. The @walk_fn can return any negative 3392 * error code or XFS_ITER_*. 3393 */ 3394 int 3395 xfs_agfl_walk( 3396 struct xfs_mount *mp, 3397 struct xfs_agf *agf, 3398 struct xfs_buf *agflbp, 3399 xfs_agfl_walk_fn walk_fn, 3400 void *priv) 3401 { 3402 __be32 *agfl_bno; 3403 unsigned int i; 3404 int error; 3405 3406 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 3407 i = be32_to_cpu(agf->agf_flfirst); 3408 3409 /* Nothing to walk in an empty AGFL. */ 3410 if (agf->agf_flcount == cpu_to_be32(0)) 3411 return 0; 3412 3413 /* Otherwise, walk from first to last, wrapping as needed. */ 3414 for (;;) { 3415 error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv); 3416 if (error) 3417 return error; 3418 if (i == be32_to_cpu(agf->agf_fllast)) 3419 break; 3420 if (++i == xfs_agfl_size(mp)) 3421 i = 0; 3422 } 3423 3424 return 0; 3425 } 3426