1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_format.h" 21 #include "xfs_log_format.h" 22 #include "xfs_shared.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_sb.h" 26 #include "xfs_mount.h" 27 #include "xfs_defer.h" 28 #include "xfs_inode.h" 29 #include "xfs_btree.h" 30 #include "xfs_rmap.h" 31 #include "xfs_alloc_btree.h" 32 #include "xfs_alloc.h" 33 #include "xfs_extent_busy.h" 34 #include "xfs_errortag.h" 35 #include "xfs_error.h" 36 #include "xfs_cksum.h" 37 #include "xfs_trace.h" 38 #include "xfs_trans.h" 39 #include "xfs_buf_item.h" 40 #include "xfs_log.h" 41 #include "xfs_ag_resv.h" 42 43 struct workqueue_struct *xfs_alloc_wq; 44 45 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 46 47 #define XFSA_FIXUP_BNO_OK 1 48 #define XFSA_FIXUP_CNT_OK 2 49 50 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 51 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 52 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 53 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 54 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 55 56 /* 57 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in 58 * the beginning of the block for a proper header with the location information 59 * and CRC. 60 */ 61 unsigned int 62 xfs_agfl_size( 63 struct xfs_mount *mp) 64 { 65 unsigned int size = mp->m_sb.sb_sectsize; 66 67 if (xfs_sb_version_hascrc(&mp->m_sb)) 68 size -= sizeof(struct xfs_agfl); 69 70 return size / sizeof(xfs_agblock_t); 71 } 72 73 unsigned int 74 xfs_refc_block( 75 struct xfs_mount *mp) 76 { 77 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 78 return XFS_RMAP_BLOCK(mp) + 1; 79 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 80 return XFS_FIBT_BLOCK(mp) + 1; 81 return XFS_IBT_BLOCK(mp) + 1; 82 } 83 84 xfs_extlen_t 85 xfs_prealloc_blocks( 86 struct xfs_mount *mp) 87 { 88 if (xfs_sb_version_hasreflink(&mp->m_sb)) 89 return xfs_refc_block(mp) + 1; 90 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 91 return XFS_RMAP_BLOCK(mp) + 1; 92 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 93 return XFS_FIBT_BLOCK(mp) + 1; 94 return XFS_IBT_BLOCK(mp) + 1; 95 } 96 97 /* 98 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of 99 * AGF buffer (PV 947395), we place constraints on the relationship among 100 * actual allocations for data blocks, freelist blocks, and potential file data 101 * bmap btree blocks. However, these restrictions may result in no actual space 102 * allocated for a delayed extent, for example, a data block in a certain AG is 103 * allocated but there is no additional block for the additional bmap btree 104 * block due to a split of the bmap btree of the file. The result of this may 105 * lead to an infinite loop when the file gets flushed to disk and all delayed 106 * extents need to be actually allocated. To get around this, we explicitly set 107 * aside a few blocks which will not be reserved in delayed allocation. 108 * 109 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a 110 * potential split of the file's bmap btree. 111 */ 112 unsigned int 113 xfs_alloc_set_aside( 114 struct xfs_mount *mp) 115 { 116 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); 117 } 118 119 /* 120 * When deciding how much space to allocate out of an AG, we limit the 121 * allocation maximum size to the size the AG. However, we cannot use all the 122 * blocks in the AG - some are permanently used by metadata. These 123 * blocks are generally: 124 * - the AG superblock, AGF, AGI and AGFL 125 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally 126 * the AGI free inode and rmap btree root blocks. 127 * - blocks on the AGFL according to xfs_alloc_set_aside() limits 128 * - the rmapbt root block 129 * 130 * The AG headers are sector sized, so the amount of space they take up is 131 * dependent on filesystem geometry. The others are all single blocks. 132 */ 133 unsigned int 134 xfs_alloc_ag_max_usable( 135 struct xfs_mount *mp) 136 { 137 unsigned int blocks; 138 139 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */ 140 blocks += XFS_ALLOC_AGFL_RESERVE; 141 blocks += 3; /* AGF, AGI btree root blocks */ 142 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 143 blocks++; /* finobt root block */ 144 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 145 blocks++; /* rmap root block */ 146 if (xfs_sb_version_hasreflink(&mp->m_sb)) 147 blocks++; /* refcount root block */ 148 149 return mp->m_sb.sb_agblocks - blocks; 150 } 151 152 /* 153 * Lookup the record equal to [bno, len] in the btree given by cur. 154 */ 155 STATIC int /* error */ 156 xfs_alloc_lookup_eq( 157 struct xfs_btree_cur *cur, /* btree cursor */ 158 xfs_agblock_t bno, /* starting block of extent */ 159 xfs_extlen_t len, /* length of extent */ 160 int *stat) /* success/failure */ 161 { 162 cur->bc_rec.a.ar_startblock = bno; 163 cur->bc_rec.a.ar_blockcount = len; 164 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 165 } 166 167 /* 168 * Lookup the first record greater than or equal to [bno, len] 169 * in the btree given by cur. 170 */ 171 int /* error */ 172 xfs_alloc_lookup_ge( 173 struct xfs_btree_cur *cur, /* btree cursor */ 174 xfs_agblock_t bno, /* starting block of extent */ 175 xfs_extlen_t len, /* length of extent */ 176 int *stat) /* success/failure */ 177 { 178 cur->bc_rec.a.ar_startblock = bno; 179 cur->bc_rec.a.ar_blockcount = len; 180 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 181 } 182 183 /* 184 * Lookup the first record less than or equal to [bno, len] 185 * in the btree given by cur. 186 */ 187 int /* error */ 188 xfs_alloc_lookup_le( 189 struct xfs_btree_cur *cur, /* btree cursor */ 190 xfs_agblock_t bno, /* starting block of extent */ 191 xfs_extlen_t len, /* length of extent */ 192 int *stat) /* success/failure */ 193 { 194 cur->bc_rec.a.ar_startblock = bno; 195 cur->bc_rec.a.ar_blockcount = len; 196 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 197 } 198 199 /* 200 * Update the record referred to by cur to the value given 201 * by [bno, len]. 202 * This either works (return 0) or gets an EFSCORRUPTED error. 203 */ 204 STATIC int /* error */ 205 xfs_alloc_update( 206 struct xfs_btree_cur *cur, /* btree cursor */ 207 xfs_agblock_t bno, /* starting block of extent */ 208 xfs_extlen_t len) /* length of extent */ 209 { 210 union xfs_btree_rec rec; 211 212 rec.alloc.ar_startblock = cpu_to_be32(bno); 213 rec.alloc.ar_blockcount = cpu_to_be32(len); 214 return xfs_btree_update(cur, &rec); 215 } 216 217 /* 218 * Get the data from the pointed-to record. 219 */ 220 int /* error */ 221 xfs_alloc_get_rec( 222 struct xfs_btree_cur *cur, /* btree cursor */ 223 xfs_agblock_t *bno, /* output: starting block of extent */ 224 xfs_extlen_t *len, /* output: length of extent */ 225 int *stat) /* output: success/failure */ 226 { 227 union xfs_btree_rec *rec; 228 int error; 229 230 error = xfs_btree_get_rec(cur, &rec, stat); 231 if (!error && *stat == 1) { 232 *bno = be32_to_cpu(rec->alloc.ar_startblock); 233 *len = be32_to_cpu(rec->alloc.ar_blockcount); 234 } 235 return error; 236 } 237 238 /* 239 * Compute aligned version of the found extent. 240 * Takes alignment and min length into account. 241 */ 242 STATIC bool 243 xfs_alloc_compute_aligned( 244 xfs_alloc_arg_t *args, /* allocation argument structure */ 245 xfs_agblock_t foundbno, /* starting block in found extent */ 246 xfs_extlen_t foundlen, /* length in found extent */ 247 xfs_agblock_t *resbno, /* result block number */ 248 xfs_extlen_t *reslen, /* result length */ 249 unsigned *busy_gen) 250 { 251 xfs_agblock_t bno = foundbno; 252 xfs_extlen_t len = foundlen; 253 xfs_extlen_t diff; 254 bool busy; 255 256 /* Trim busy sections out of found extent */ 257 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); 258 259 /* 260 * If we have a largish extent that happens to start before min_agbno, 261 * see if we can shift it into range... 262 */ 263 if (bno < args->min_agbno && bno + len > args->min_agbno) { 264 diff = args->min_agbno - bno; 265 if (len > diff) { 266 bno += diff; 267 len -= diff; 268 } 269 } 270 271 if (args->alignment > 1 && len >= args->minlen) { 272 xfs_agblock_t aligned_bno = roundup(bno, args->alignment); 273 274 diff = aligned_bno - bno; 275 276 *resbno = aligned_bno; 277 *reslen = diff >= len ? 0 : len - diff; 278 } else { 279 *resbno = bno; 280 *reslen = len; 281 } 282 283 return busy; 284 } 285 286 /* 287 * Compute best start block and diff for "near" allocations. 288 * freelen >= wantlen already checked by caller. 289 */ 290 STATIC xfs_extlen_t /* difference value (absolute) */ 291 xfs_alloc_compute_diff( 292 xfs_agblock_t wantbno, /* target starting block */ 293 xfs_extlen_t wantlen, /* target length */ 294 xfs_extlen_t alignment, /* target alignment */ 295 int datatype, /* are we allocating data? */ 296 xfs_agblock_t freebno, /* freespace's starting block */ 297 xfs_extlen_t freelen, /* freespace's length */ 298 xfs_agblock_t *newbnop) /* result: best start block from free */ 299 { 300 xfs_agblock_t freeend; /* end of freespace extent */ 301 xfs_agblock_t newbno1; /* return block number */ 302 xfs_agblock_t newbno2; /* other new block number */ 303 xfs_extlen_t newlen1=0; /* length with newbno1 */ 304 xfs_extlen_t newlen2=0; /* length with newbno2 */ 305 xfs_agblock_t wantend; /* end of target extent */ 306 bool userdata = xfs_alloc_is_userdata(datatype); 307 308 ASSERT(freelen >= wantlen); 309 freeend = freebno + freelen; 310 wantend = wantbno + wantlen; 311 /* 312 * We want to allocate from the start of a free extent if it is past 313 * the desired block or if we are allocating user data and the free 314 * extent is before desired block. The second case is there to allow 315 * for contiguous allocation from the remaining free space if the file 316 * grows in the short term. 317 */ 318 if (freebno >= wantbno || (userdata && freeend < wantend)) { 319 if ((newbno1 = roundup(freebno, alignment)) >= freeend) 320 newbno1 = NULLAGBLOCK; 321 } else if (freeend >= wantend && alignment > 1) { 322 newbno1 = roundup(wantbno, alignment); 323 newbno2 = newbno1 - alignment; 324 if (newbno1 >= freeend) 325 newbno1 = NULLAGBLOCK; 326 else 327 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); 328 if (newbno2 < freebno) 329 newbno2 = NULLAGBLOCK; 330 else 331 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); 332 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { 333 if (newlen1 < newlen2 || 334 (newlen1 == newlen2 && 335 XFS_ABSDIFF(newbno1, wantbno) > 336 XFS_ABSDIFF(newbno2, wantbno))) 337 newbno1 = newbno2; 338 } else if (newbno2 != NULLAGBLOCK) 339 newbno1 = newbno2; 340 } else if (freeend >= wantend) { 341 newbno1 = wantbno; 342 } else if (alignment > 1) { 343 newbno1 = roundup(freeend - wantlen, alignment); 344 if (newbno1 > freeend - wantlen && 345 newbno1 - alignment >= freebno) 346 newbno1 -= alignment; 347 else if (newbno1 >= freeend) 348 newbno1 = NULLAGBLOCK; 349 } else 350 newbno1 = freeend - wantlen; 351 *newbnop = newbno1; 352 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); 353 } 354 355 /* 356 * Fix up the length, based on mod and prod. 357 * len should be k * prod + mod for some k. 358 * If len is too small it is returned unchanged. 359 * If len hits maxlen it is left alone. 360 */ 361 STATIC void 362 xfs_alloc_fix_len( 363 xfs_alloc_arg_t *args) /* allocation argument structure */ 364 { 365 xfs_extlen_t k; 366 xfs_extlen_t rlen; 367 368 ASSERT(args->mod < args->prod); 369 rlen = args->len; 370 ASSERT(rlen >= args->minlen); 371 ASSERT(rlen <= args->maxlen); 372 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || 373 (args->mod == 0 && rlen < args->prod)) 374 return; 375 k = rlen % args->prod; 376 if (k == args->mod) 377 return; 378 if (k > args->mod) 379 rlen = rlen - (k - args->mod); 380 else 381 rlen = rlen - args->prod + (args->mod - k); 382 /* casts to (int) catch length underflows */ 383 if ((int)rlen < (int)args->minlen) 384 return; 385 ASSERT(rlen >= args->minlen && rlen <= args->maxlen); 386 ASSERT(rlen % args->prod == args->mod); 387 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= 388 rlen + args->minleft); 389 args->len = rlen; 390 } 391 392 /* 393 * Update the two btrees, logically removing from freespace the extent 394 * starting at rbno, rlen blocks. The extent is contained within the 395 * actual (current) free extent fbno for flen blocks. 396 * Flags are passed in indicating whether the cursors are set to the 397 * relevant records. 398 */ 399 STATIC int /* error code */ 400 xfs_alloc_fixup_trees( 401 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ 402 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ 403 xfs_agblock_t fbno, /* starting block of free extent */ 404 xfs_extlen_t flen, /* length of free extent */ 405 xfs_agblock_t rbno, /* starting block of returned extent */ 406 xfs_extlen_t rlen, /* length of returned extent */ 407 int flags) /* flags, XFSA_FIXUP_... */ 408 { 409 int error; /* error code */ 410 int i; /* operation results */ 411 xfs_agblock_t nfbno1; /* first new free startblock */ 412 xfs_agblock_t nfbno2; /* second new free startblock */ 413 xfs_extlen_t nflen1=0; /* first new free length */ 414 xfs_extlen_t nflen2=0; /* second new free length */ 415 struct xfs_mount *mp; 416 417 mp = cnt_cur->bc_mp; 418 419 /* 420 * Look up the record in the by-size tree if necessary. 421 */ 422 if (flags & XFSA_FIXUP_CNT_OK) { 423 #ifdef DEBUG 424 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 425 return error; 426 XFS_WANT_CORRUPTED_RETURN(mp, 427 i == 1 && nfbno1 == fbno && nflen1 == flen); 428 #endif 429 } else { 430 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 431 return error; 432 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 433 } 434 /* 435 * Look up the record in the by-block tree if necessary. 436 */ 437 if (flags & XFSA_FIXUP_BNO_OK) { 438 #ifdef DEBUG 439 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 440 return error; 441 XFS_WANT_CORRUPTED_RETURN(mp, 442 i == 1 && nfbno1 == fbno && nflen1 == flen); 443 #endif 444 } else { 445 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 446 return error; 447 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 448 } 449 450 #ifdef DEBUG 451 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { 452 struct xfs_btree_block *bnoblock; 453 struct xfs_btree_block *cntblock; 454 455 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 456 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 457 458 XFS_WANT_CORRUPTED_RETURN(mp, 459 bnoblock->bb_numrecs == cntblock->bb_numrecs); 460 } 461 #endif 462 463 /* 464 * Deal with all four cases: the allocated record is contained 465 * within the freespace record, so we can have new freespace 466 * at either (or both) end, or no freespace remaining. 467 */ 468 if (rbno == fbno && rlen == flen) 469 nfbno1 = nfbno2 = NULLAGBLOCK; 470 else if (rbno == fbno) { 471 nfbno1 = rbno + rlen; 472 nflen1 = flen - rlen; 473 nfbno2 = NULLAGBLOCK; 474 } else if (rbno + rlen == fbno + flen) { 475 nfbno1 = fbno; 476 nflen1 = flen - rlen; 477 nfbno2 = NULLAGBLOCK; 478 } else { 479 nfbno1 = fbno; 480 nflen1 = rbno - fbno; 481 nfbno2 = rbno + rlen; 482 nflen2 = (fbno + flen) - nfbno2; 483 } 484 /* 485 * Delete the entry from the by-size btree. 486 */ 487 if ((error = xfs_btree_delete(cnt_cur, &i))) 488 return error; 489 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 490 /* 491 * Add new by-size btree entry(s). 492 */ 493 if (nfbno1 != NULLAGBLOCK) { 494 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 495 return error; 496 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 497 if ((error = xfs_btree_insert(cnt_cur, &i))) 498 return error; 499 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 500 } 501 if (nfbno2 != NULLAGBLOCK) { 502 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 503 return error; 504 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 505 if ((error = xfs_btree_insert(cnt_cur, &i))) 506 return error; 507 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 508 } 509 /* 510 * Fix up the by-block btree entry(s). 511 */ 512 if (nfbno1 == NULLAGBLOCK) { 513 /* 514 * No remaining freespace, just delete the by-block tree entry. 515 */ 516 if ((error = xfs_btree_delete(bno_cur, &i))) 517 return error; 518 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 519 } else { 520 /* 521 * Update the by-block entry to start later|be shorter. 522 */ 523 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) 524 return error; 525 } 526 if (nfbno2 != NULLAGBLOCK) { 527 /* 528 * 2 resulting free entries, need to add one. 529 */ 530 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 531 return error; 532 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 533 if ((error = xfs_btree_insert(bno_cur, &i))) 534 return error; 535 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 536 } 537 return 0; 538 } 539 540 static xfs_failaddr_t 541 xfs_agfl_verify( 542 struct xfs_buf *bp) 543 { 544 struct xfs_mount *mp = bp->b_target->bt_mount; 545 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 546 int i; 547 548 /* 549 * There is no verification of non-crc AGFLs because mkfs does not 550 * initialise the AGFL to zero or NULL. Hence the only valid part of the 551 * AGFL is what the AGF says is active. We can't get to the AGF, so we 552 * can't verify just those entries are valid. 553 */ 554 if (!xfs_sb_version_hascrc(&mp->m_sb)) 555 return NULL; 556 557 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) 558 return __this_address; 559 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) 560 return __this_address; 561 /* 562 * during growfs operations, the perag is not fully initialised, 563 * so we can't use it for any useful checking. growfs ensures we can't 564 * use it by using uncached buffers that don't have the perag attached 565 * so we can detect and avoid this problem. 566 */ 567 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 568 return __this_address; 569 570 for (i = 0; i < xfs_agfl_size(mp); i++) { 571 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 572 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 573 return __this_address; 574 } 575 576 if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn))) 577 return __this_address; 578 return NULL; 579 } 580 581 static void 582 xfs_agfl_read_verify( 583 struct xfs_buf *bp) 584 { 585 struct xfs_mount *mp = bp->b_target->bt_mount; 586 xfs_failaddr_t fa; 587 588 /* 589 * There is no verification of non-crc AGFLs because mkfs does not 590 * initialise the AGFL to zero or NULL. Hence the only valid part of the 591 * AGFL is what the AGF says is active. We can't get to the AGF, so we 592 * can't verify just those entries are valid. 593 */ 594 if (!xfs_sb_version_hascrc(&mp->m_sb)) 595 return; 596 597 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 598 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 599 else { 600 fa = xfs_agfl_verify(bp); 601 if (fa) 602 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 603 } 604 } 605 606 static void 607 xfs_agfl_write_verify( 608 struct xfs_buf *bp) 609 { 610 struct xfs_mount *mp = bp->b_target->bt_mount; 611 struct xfs_buf_log_item *bip = bp->b_log_item; 612 xfs_failaddr_t fa; 613 614 /* no verification of non-crc AGFLs */ 615 if (!xfs_sb_version_hascrc(&mp->m_sb)) 616 return; 617 618 fa = xfs_agfl_verify(bp); 619 if (fa) { 620 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 621 return; 622 } 623 624 if (bip) 625 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 626 627 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); 628 } 629 630 const struct xfs_buf_ops xfs_agfl_buf_ops = { 631 .name = "xfs_agfl", 632 .verify_read = xfs_agfl_read_verify, 633 .verify_write = xfs_agfl_write_verify, 634 .verify_struct = xfs_agfl_verify, 635 }; 636 637 /* 638 * Read in the allocation group free block array. 639 */ 640 int /* error */ 641 xfs_alloc_read_agfl( 642 xfs_mount_t *mp, /* mount point structure */ 643 xfs_trans_t *tp, /* transaction pointer */ 644 xfs_agnumber_t agno, /* allocation group number */ 645 xfs_buf_t **bpp) /* buffer for the ag free block array */ 646 { 647 xfs_buf_t *bp; /* return value */ 648 int error; 649 650 ASSERT(agno != NULLAGNUMBER); 651 error = xfs_trans_read_buf( 652 mp, tp, mp->m_ddev_targp, 653 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), 654 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); 655 if (error) 656 return error; 657 xfs_buf_set_ref(bp, XFS_AGFL_REF); 658 *bpp = bp; 659 return 0; 660 } 661 662 STATIC int 663 xfs_alloc_update_counters( 664 struct xfs_trans *tp, 665 struct xfs_perag *pag, 666 struct xfs_buf *agbp, 667 long len) 668 { 669 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 670 671 pag->pagf_freeblks += len; 672 be32_add_cpu(&agf->agf_freeblks, len); 673 674 xfs_trans_agblocks_delta(tp, len); 675 if (unlikely(be32_to_cpu(agf->agf_freeblks) > 676 be32_to_cpu(agf->agf_length))) 677 return -EFSCORRUPTED; 678 679 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 680 return 0; 681 } 682 683 /* 684 * Allocation group level functions. 685 */ 686 687 /* 688 * Allocate a variable extent in the allocation group agno. 689 * Type and bno are used to determine where in the allocation group the 690 * extent will start. 691 * Extent's length (returned in *len) will be between minlen and maxlen, 692 * and of the form k * prod + mod unless there's nothing that large. 693 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 694 */ 695 STATIC int /* error */ 696 xfs_alloc_ag_vextent( 697 xfs_alloc_arg_t *args) /* argument structure for allocation */ 698 { 699 int error=0; 700 701 ASSERT(args->minlen > 0); 702 ASSERT(args->maxlen > 0); 703 ASSERT(args->minlen <= args->maxlen); 704 ASSERT(args->mod < args->prod); 705 ASSERT(args->alignment > 0); 706 707 /* 708 * Branch to correct routine based on the type. 709 */ 710 args->wasfromfl = 0; 711 switch (args->type) { 712 case XFS_ALLOCTYPE_THIS_AG: 713 error = xfs_alloc_ag_vextent_size(args); 714 break; 715 case XFS_ALLOCTYPE_NEAR_BNO: 716 error = xfs_alloc_ag_vextent_near(args); 717 break; 718 case XFS_ALLOCTYPE_THIS_BNO: 719 error = xfs_alloc_ag_vextent_exact(args); 720 break; 721 default: 722 ASSERT(0); 723 /* NOTREACHED */ 724 } 725 726 if (error || args->agbno == NULLAGBLOCK) 727 return error; 728 729 ASSERT(args->len >= args->minlen); 730 ASSERT(args->len <= args->maxlen); 731 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL); 732 ASSERT(args->agbno % args->alignment == 0); 733 734 /* if not file data, insert new block into the reverse map btree */ 735 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { 736 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 737 args->agbno, args->len, &args->oinfo); 738 if (error) 739 return error; 740 } 741 742 if (!args->wasfromfl) { 743 error = xfs_alloc_update_counters(args->tp, args->pag, 744 args->agbp, 745 -((long)(args->len))); 746 if (error) 747 return error; 748 749 ASSERT(!xfs_extent_busy_search(args->mp, args->agno, 750 args->agbno, args->len)); 751 } 752 753 xfs_ag_resv_alloc_extent(args->pag, args->resv, args); 754 755 XFS_STATS_INC(args->mp, xs_allocx); 756 XFS_STATS_ADD(args->mp, xs_allocb, args->len); 757 return error; 758 } 759 760 /* 761 * Allocate a variable extent at exactly agno/bno. 762 * Extent's length (returned in *len) will be between minlen and maxlen, 763 * and of the form k * prod + mod unless there's nothing that large. 764 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. 765 */ 766 STATIC int /* error */ 767 xfs_alloc_ag_vextent_exact( 768 xfs_alloc_arg_t *args) /* allocation argument structure */ 769 { 770 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 771 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 772 int error; 773 xfs_agblock_t fbno; /* start block of found extent */ 774 xfs_extlen_t flen; /* length of found extent */ 775 xfs_agblock_t tbno; /* start block of busy extent */ 776 xfs_extlen_t tlen; /* length of busy extent */ 777 xfs_agblock_t tend; /* end block of busy extent */ 778 int i; /* success/failure of operation */ 779 unsigned busy_gen; 780 781 ASSERT(args->alignment == 1); 782 783 /* 784 * Allocate/initialize a cursor for the by-number freespace btree. 785 */ 786 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 787 args->agno, XFS_BTNUM_BNO); 788 789 /* 790 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 791 * Look for the closest free block <= bno, it must contain bno 792 * if any free block does. 793 */ 794 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); 795 if (error) 796 goto error0; 797 if (!i) 798 goto not_found; 799 800 /* 801 * Grab the freespace record. 802 */ 803 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); 804 if (error) 805 goto error0; 806 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 807 ASSERT(fbno <= args->agbno); 808 809 /* 810 * Check for overlapping busy extents. 811 */ 812 tbno = fbno; 813 tlen = flen; 814 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); 815 816 /* 817 * Give up if the start of the extent is busy, or the freespace isn't 818 * long enough for the minimum request. 819 */ 820 if (tbno > args->agbno) 821 goto not_found; 822 if (tlen < args->minlen) 823 goto not_found; 824 tend = tbno + tlen; 825 if (tend < args->agbno + args->minlen) 826 goto not_found; 827 828 /* 829 * End of extent will be smaller of the freespace end and the 830 * maximal requested end. 831 * 832 * Fix the length according to mod and prod if given. 833 */ 834 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) 835 - args->agbno; 836 xfs_alloc_fix_len(args); 837 ASSERT(args->agbno + args->len <= tend); 838 839 /* 840 * We are allocating agbno for args->len 841 * Allocate/initialize a cursor for the by-size btree. 842 */ 843 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 844 args->agno, XFS_BTNUM_CNT); 845 ASSERT(args->agbno + args->len <= 846 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 847 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, 848 args->len, XFSA_FIXUP_BNO_OK); 849 if (error) { 850 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 851 goto error0; 852 } 853 854 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 855 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 856 857 args->wasfromfl = 0; 858 trace_xfs_alloc_exact_done(args); 859 return 0; 860 861 not_found: 862 /* Didn't find it, return null. */ 863 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 864 args->agbno = NULLAGBLOCK; 865 trace_xfs_alloc_exact_notfound(args); 866 return 0; 867 868 error0: 869 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 870 trace_xfs_alloc_exact_error(args); 871 return error; 872 } 873 874 /* 875 * Search the btree in a given direction via the search cursor and compare 876 * the records found against the good extent we've already found. 877 */ 878 STATIC int 879 xfs_alloc_find_best_extent( 880 struct xfs_alloc_arg *args, /* allocation argument structure */ 881 struct xfs_btree_cur **gcur, /* good cursor */ 882 struct xfs_btree_cur **scur, /* searching cursor */ 883 xfs_agblock_t gdiff, /* difference for search comparison */ 884 xfs_agblock_t *sbno, /* extent found by search */ 885 xfs_extlen_t *slen, /* extent length */ 886 xfs_agblock_t *sbnoa, /* aligned extent found by search */ 887 xfs_extlen_t *slena, /* aligned extent length */ 888 int dir) /* 0 = search right, 1 = search left */ 889 { 890 xfs_agblock_t new; 891 xfs_agblock_t sdiff; 892 int error; 893 int i; 894 unsigned busy_gen; 895 896 /* The good extent is perfect, no need to search. */ 897 if (!gdiff) 898 goto out_use_good; 899 900 /* 901 * Look until we find a better one, run out of space or run off the end. 902 */ 903 do { 904 error = xfs_alloc_get_rec(*scur, sbno, slen, &i); 905 if (error) 906 goto error0; 907 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 908 xfs_alloc_compute_aligned(args, *sbno, *slen, 909 sbnoa, slena, &busy_gen); 910 911 /* 912 * The good extent is closer than this one. 913 */ 914 if (!dir) { 915 if (*sbnoa > args->max_agbno) 916 goto out_use_good; 917 if (*sbnoa >= args->agbno + gdiff) 918 goto out_use_good; 919 } else { 920 if (*sbnoa < args->min_agbno) 921 goto out_use_good; 922 if (*sbnoa <= args->agbno - gdiff) 923 goto out_use_good; 924 } 925 926 /* 927 * Same distance, compare length and pick the best. 928 */ 929 if (*slena >= args->minlen) { 930 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); 931 xfs_alloc_fix_len(args); 932 933 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 934 args->alignment, 935 args->datatype, *sbnoa, 936 *slena, &new); 937 938 /* 939 * Choose closer size and invalidate other cursor. 940 */ 941 if (sdiff < gdiff) 942 goto out_use_search; 943 goto out_use_good; 944 } 945 946 if (!dir) 947 error = xfs_btree_increment(*scur, 0, &i); 948 else 949 error = xfs_btree_decrement(*scur, 0, &i); 950 if (error) 951 goto error0; 952 } while (i); 953 954 out_use_good: 955 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); 956 *scur = NULL; 957 return 0; 958 959 out_use_search: 960 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); 961 *gcur = NULL; 962 return 0; 963 964 error0: 965 /* caller invalidates cursors */ 966 return error; 967 } 968 969 /* 970 * Allocate a variable extent near bno in the allocation group agno. 971 * Extent's length (returned in len) will be between minlen and maxlen, 972 * and of the form k * prod + mod unless there's nothing that large. 973 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 974 */ 975 STATIC int /* error */ 976 xfs_alloc_ag_vextent_near( 977 xfs_alloc_arg_t *args) /* allocation argument structure */ 978 { 979 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 980 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 981 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 982 xfs_agblock_t gtbno; /* start bno of right side entry */ 983 xfs_agblock_t gtbnoa; /* aligned ... */ 984 xfs_extlen_t gtdiff; /* difference to right side entry */ 985 xfs_extlen_t gtlen; /* length of right side entry */ 986 xfs_extlen_t gtlena; /* aligned ... */ 987 xfs_agblock_t gtnew; /* useful start bno of right side */ 988 int error; /* error code */ 989 int i; /* result code, temporary */ 990 int j; /* result code, temporary */ 991 xfs_agblock_t ltbno; /* start bno of left side entry */ 992 xfs_agblock_t ltbnoa; /* aligned ... */ 993 xfs_extlen_t ltdiff; /* difference to left side entry */ 994 xfs_extlen_t ltlen; /* length of left side entry */ 995 xfs_extlen_t ltlena; /* aligned ... */ 996 xfs_agblock_t ltnew; /* useful start bno of left side */ 997 xfs_extlen_t rlen; /* length of returned extent */ 998 bool busy; 999 unsigned busy_gen; 1000 #ifdef DEBUG 1001 /* 1002 * Randomly don't execute the first algorithm. 1003 */ 1004 int dofirst; /* set to do first algorithm */ 1005 1006 dofirst = prandom_u32() & 1; 1007 #endif 1008 1009 /* handle unitialized agbno range so caller doesn't have to */ 1010 if (!args->min_agbno && !args->max_agbno) 1011 args->max_agbno = args->mp->m_sb.sb_agblocks - 1; 1012 ASSERT(args->min_agbno <= args->max_agbno); 1013 1014 /* clamp agbno to the range if it's outside */ 1015 if (args->agbno < args->min_agbno) 1016 args->agbno = args->min_agbno; 1017 if (args->agbno > args->max_agbno) 1018 args->agbno = args->max_agbno; 1019 1020 restart: 1021 bno_cur_lt = NULL; 1022 bno_cur_gt = NULL; 1023 ltlen = 0; 1024 gtlena = 0; 1025 ltlena = 0; 1026 busy = false; 1027 1028 /* 1029 * Get a cursor for the by-size btree. 1030 */ 1031 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1032 args->agno, XFS_BTNUM_CNT); 1033 1034 /* 1035 * See if there are any free extents as big as maxlen. 1036 */ 1037 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) 1038 goto error0; 1039 /* 1040 * If none, then pick up the last entry in the tree unless the 1041 * tree is empty. 1042 */ 1043 if (!i) { 1044 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, 1045 <len, &i))) 1046 goto error0; 1047 if (i == 0 || ltlen == 0) { 1048 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1049 trace_xfs_alloc_near_noentry(args); 1050 return 0; 1051 } 1052 ASSERT(i == 1); 1053 } 1054 args->wasfromfl = 0; 1055 1056 /* 1057 * First algorithm. 1058 * If the requested extent is large wrt the freespaces available 1059 * in this a.g., then the cursor will be pointing to a btree entry 1060 * near the right edge of the tree. If it's in the last btree leaf 1061 * block, then we just examine all the entries in that block 1062 * that are big enough, and pick the best one. 1063 * This is written as a while loop so we can break out of it, 1064 * but we never loop back to the top. 1065 */ 1066 while (xfs_btree_islastblock(cnt_cur, 0)) { 1067 xfs_extlen_t bdiff; 1068 int besti=0; 1069 xfs_extlen_t blen=0; 1070 xfs_agblock_t bnew=0; 1071 1072 #ifdef DEBUG 1073 if (dofirst) 1074 break; 1075 #endif 1076 /* 1077 * Start from the entry that lookup found, sequence through 1078 * all larger free blocks. If we're actually pointing at a 1079 * record smaller than maxlen, go to the start of this block, 1080 * and skip all those smaller than minlen. 1081 */ 1082 if (ltlen || args->alignment > 1) { 1083 cnt_cur->bc_ptrs[0] = 1; 1084 do { 1085 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, 1086 <len, &i))) 1087 goto error0; 1088 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1089 if (ltlen >= args->minlen) 1090 break; 1091 if ((error = xfs_btree_increment(cnt_cur, 0, &i))) 1092 goto error0; 1093 } while (i); 1094 ASSERT(ltlen >= args->minlen); 1095 if (!i) 1096 break; 1097 } 1098 i = cnt_cur->bc_ptrs[0]; 1099 for (j = 1, blen = 0, bdiff = 0; 1100 !error && j && (blen < args->maxlen || bdiff > 0); 1101 error = xfs_btree_increment(cnt_cur, 0, &j)) { 1102 /* 1103 * For each entry, decide if it's better than 1104 * the previous best entry. 1105 */ 1106 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1107 goto error0; 1108 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1109 busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, 1110 <bnoa, <lena, &busy_gen); 1111 if (ltlena < args->minlen) 1112 continue; 1113 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) 1114 continue; 1115 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1116 xfs_alloc_fix_len(args); 1117 ASSERT(args->len >= args->minlen); 1118 if (args->len < blen) 1119 continue; 1120 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1121 args->alignment, args->datatype, ltbnoa, 1122 ltlena, <new); 1123 if (ltnew != NULLAGBLOCK && 1124 (args->len > blen || ltdiff < bdiff)) { 1125 bdiff = ltdiff; 1126 bnew = ltnew; 1127 blen = args->len; 1128 besti = cnt_cur->bc_ptrs[0]; 1129 } 1130 } 1131 /* 1132 * It didn't work. We COULD be in a case where 1133 * there's a good record somewhere, so try again. 1134 */ 1135 if (blen == 0) 1136 break; 1137 /* 1138 * Point at the best entry, and retrieve it again. 1139 */ 1140 cnt_cur->bc_ptrs[0] = besti; 1141 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1142 goto error0; 1143 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1144 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1145 args->len = blen; 1146 1147 /* 1148 * We are allocating starting at bnew for blen blocks. 1149 */ 1150 args->agbno = bnew; 1151 ASSERT(bnew >= ltbno); 1152 ASSERT(bnew + blen <= ltbno + ltlen); 1153 /* 1154 * Set up a cursor for the by-bno tree. 1155 */ 1156 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, 1157 args->agbp, args->agno, XFS_BTNUM_BNO); 1158 /* 1159 * Fix up the btree entries. 1160 */ 1161 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, 1162 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) 1163 goto error0; 1164 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1165 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1166 1167 trace_xfs_alloc_near_first(args); 1168 return 0; 1169 } 1170 /* 1171 * Second algorithm. 1172 * Search in the by-bno tree to the left and to the right 1173 * simultaneously, until in each case we find a space big enough, 1174 * or run into the edge of the tree. When we run into the edge, 1175 * we deallocate that cursor. 1176 * If both searches succeed, we compare the two spaces and pick 1177 * the better one. 1178 * With alignment, it's possible for both to fail; the upper 1179 * level algorithm that picks allocation groups for allocations 1180 * is not supposed to do this. 1181 */ 1182 /* 1183 * Allocate and initialize the cursor for the leftward search. 1184 */ 1185 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1186 args->agno, XFS_BTNUM_BNO); 1187 /* 1188 * Lookup <= bno to find the leftward search's starting point. 1189 */ 1190 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) 1191 goto error0; 1192 if (!i) { 1193 /* 1194 * Didn't find anything; use this cursor for the rightward 1195 * search. 1196 */ 1197 bno_cur_gt = bno_cur_lt; 1198 bno_cur_lt = NULL; 1199 } 1200 /* 1201 * Found something. Duplicate the cursor for the rightward search. 1202 */ 1203 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) 1204 goto error0; 1205 /* 1206 * Increment the cursor, so we will point at the entry just right 1207 * of the leftward entry if any, or to the leftmost entry. 1208 */ 1209 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1210 goto error0; 1211 if (!i) { 1212 /* 1213 * It failed, there are no rightward entries. 1214 */ 1215 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); 1216 bno_cur_gt = NULL; 1217 } 1218 /* 1219 * Loop going left with the leftward cursor, right with the 1220 * rightward cursor, until either both directions give up or 1221 * we find an entry at least as big as minlen. 1222 */ 1223 do { 1224 if (bno_cur_lt) { 1225 if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) 1226 goto error0; 1227 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1228 busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, 1229 <bnoa, <lena, &busy_gen); 1230 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) 1231 break; 1232 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1233 goto error0; 1234 if (!i || ltbnoa < args->min_agbno) { 1235 xfs_btree_del_cursor(bno_cur_lt, 1236 XFS_BTREE_NOERROR); 1237 bno_cur_lt = NULL; 1238 } 1239 } 1240 if (bno_cur_gt) { 1241 if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) 1242 goto error0; 1243 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1244 busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, 1245 >bnoa, >lena, &busy_gen); 1246 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) 1247 break; 1248 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1249 goto error0; 1250 if (!i || gtbnoa > args->max_agbno) { 1251 xfs_btree_del_cursor(bno_cur_gt, 1252 XFS_BTREE_NOERROR); 1253 bno_cur_gt = NULL; 1254 } 1255 } 1256 } while (bno_cur_lt || bno_cur_gt); 1257 1258 /* 1259 * Got both cursors still active, need to find better entry. 1260 */ 1261 if (bno_cur_lt && bno_cur_gt) { 1262 if (ltlena >= args->minlen) { 1263 /* 1264 * Left side is good, look for a right side entry. 1265 */ 1266 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1267 xfs_alloc_fix_len(args); 1268 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1269 args->alignment, args->datatype, ltbnoa, 1270 ltlena, <new); 1271 1272 error = xfs_alloc_find_best_extent(args, 1273 &bno_cur_lt, &bno_cur_gt, 1274 ltdiff, >bno, >len, 1275 >bnoa, >lena, 1276 0 /* search right */); 1277 } else { 1278 ASSERT(gtlena >= args->minlen); 1279 1280 /* 1281 * Right side is good, look for a left side entry. 1282 */ 1283 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1284 xfs_alloc_fix_len(args); 1285 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1286 args->alignment, args->datatype, gtbnoa, 1287 gtlena, >new); 1288 1289 error = xfs_alloc_find_best_extent(args, 1290 &bno_cur_gt, &bno_cur_lt, 1291 gtdiff, <bno, <len, 1292 <bnoa, <lena, 1293 1 /* search left */); 1294 } 1295 1296 if (error) 1297 goto error0; 1298 } 1299 1300 /* 1301 * If we couldn't get anything, give up. 1302 */ 1303 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1304 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1305 1306 if (busy) { 1307 trace_xfs_alloc_near_busy(args); 1308 xfs_extent_busy_flush(args->mp, args->pag, busy_gen); 1309 goto restart; 1310 } 1311 trace_xfs_alloc_size_neither(args); 1312 args->agbno = NULLAGBLOCK; 1313 return 0; 1314 } 1315 1316 /* 1317 * At this point we have selected a freespace entry, either to the 1318 * left or to the right. If it's on the right, copy all the 1319 * useful variables to the "left" set so we only have one 1320 * copy of this code. 1321 */ 1322 if (bno_cur_gt) { 1323 bno_cur_lt = bno_cur_gt; 1324 bno_cur_gt = NULL; 1325 ltbno = gtbno; 1326 ltbnoa = gtbnoa; 1327 ltlen = gtlen; 1328 ltlena = gtlena; 1329 j = 1; 1330 } else 1331 j = 0; 1332 1333 /* 1334 * Fix up the length and compute the useful address. 1335 */ 1336 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1337 xfs_alloc_fix_len(args); 1338 rlen = args->len; 1339 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, 1340 args->datatype, ltbnoa, ltlena, <new); 1341 ASSERT(ltnew >= ltbno); 1342 ASSERT(ltnew + rlen <= ltbnoa + ltlena); 1343 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1344 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno); 1345 args->agbno = ltnew; 1346 1347 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1348 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1349 goto error0; 1350 1351 if (j) 1352 trace_xfs_alloc_near_greater(args); 1353 else 1354 trace_xfs_alloc_near_lesser(args); 1355 1356 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1357 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1358 return 0; 1359 1360 error0: 1361 trace_xfs_alloc_near_error(args); 1362 if (cnt_cur != NULL) 1363 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1364 if (bno_cur_lt != NULL) 1365 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); 1366 if (bno_cur_gt != NULL) 1367 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); 1368 return error; 1369 } 1370 1371 /* 1372 * Allocate a variable extent anywhere in the allocation group agno. 1373 * Extent's length (returned in len) will be between minlen and maxlen, 1374 * and of the form k * prod + mod unless there's nothing that large. 1375 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1376 */ 1377 STATIC int /* error */ 1378 xfs_alloc_ag_vextent_size( 1379 xfs_alloc_arg_t *args) /* allocation argument structure */ 1380 { 1381 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ 1382 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ 1383 int error; /* error result */ 1384 xfs_agblock_t fbno; /* start of found freespace */ 1385 xfs_extlen_t flen; /* length of found freespace */ 1386 int i; /* temp status variable */ 1387 xfs_agblock_t rbno; /* returned block number */ 1388 xfs_extlen_t rlen; /* length of returned extent */ 1389 bool busy; 1390 unsigned busy_gen; 1391 1392 restart: 1393 /* 1394 * Allocate and initialize a cursor for the by-size btree. 1395 */ 1396 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1397 args->agno, XFS_BTNUM_CNT); 1398 bno_cur = NULL; 1399 busy = false; 1400 1401 /* 1402 * Look for an entry >= maxlen+alignment-1 blocks. 1403 */ 1404 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1405 args->maxlen + args->alignment - 1, &i))) 1406 goto error0; 1407 1408 /* 1409 * If none then we have to settle for a smaller extent. In the case that 1410 * there are no large extents, this will return the last entry in the 1411 * tree unless the tree is empty. In the case that there are only busy 1412 * large extents, this will return the largest small extent unless there 1413 * are no smaller extents available. 1414 */ 1415 if (!i) { 1416 error = xfs_alloc_ag_vextent_small(args, cnt_cur, 1417 &fbno, &flen, &i); 1418 if (error) 1419 goto error0; 1420 if (i == 0 || flen == 0) { 1421 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1422 trace_xfs_alloc_size_noentry(args); 1423 return 0; 1424 } 1425 ASSERT(i == 1); 1426 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, 1427 &rlen, &busy_gen); 1428 } else { 1429 /* 1430 * Search for a non-busy extent that is large enough. 1431 */ 1432 for (;;) { 1433 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1434 if (error) 1435 goto error0; 1436 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1437 1438 busy = xfs_alloc_compute_aligned(args, fbno, flen, 1439 &rbno, &rlen, &busy_gen); 1440 1441 if (rlen >= args->maxlen) 1442 break; 1443 1444 error = xfs_btree_increment(cnt_cur, 0, &i); 1445 if (error) 1446 goto error0; 1447 if (i == 0) { 1448 /* 1449 * Our only valid extents must have been busy. 1450 * Make it unbusy by forcing the log out and 1451 * retrying. 1452 */ 1453 xfs_btree_del_cursor(cnt_cur, 1454 XFS_BTREE_NOERROR); 1455 trace_xfs_alloc_size_busy(args); 1456 xfs_extent_busy_flush(args->mp, 1457 args->pag, busy_gen); 1458 goto restart; 1459 } 1460 } 1461 } 1462 1463 /* 1464 * In the first case above, we got the last entry in the 1465 * by-size btree. Now we check to see if the space hits maxlen 1466 * once aligned; if not, we search left for something better. 1467 * This can't happen in the second case above. 1468 */ 1469 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1470 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1471 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1472 if (rlen < args->maxlen) { 1473 xfs_agblock_t bestfbno; 1474 xfs_extlen_t bestflen; 1475 xfs_agblock_t bestrbno; 1476 xfs_extlen_t bestrlen; 1477 1478 bestrlen = rlen; 1479 bestrbno = rbno; 1480 bestflen = flen; 1481 bestfbno = fbno; 1482 for (;;) { 1483 if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) 1484 goto error0; 1485 if (i == 0) 1486 break; 1487 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1488 &i))) 1489 goto error0; 1490 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1491 if (flen < bestrlen) 1492 break; 1493 busy = xfs_alloc_compute_aligned(args, fbno, flen, 1494 &rbno, &rlen, &busy_gen); 1495 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1496 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1497 (rlen <= flen && rbno + rlen <= fbno + flen), 1498 error0); 1499 if (rlen > bestrlen) { 1500 bestrlen = rlen; 1501 bestrbno = rbno; 1502 bestflen = flen; 1503 bestfbno = fbno; 1504 if (rlen == args->maxlen) 1505 break; 1506 } 1507 } 1508 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1509 &i))) 1510 goto error0; 1511 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1512 rlen = bestrlen; 1513 rbno = bestrbno; 1514 flen = bestflen; 1515 fbno = bestfbno; 1516 } 1517 args->wasfromfl = 0; 1518 /* 1519 * Fix up the length. 1520 */ 1521 args->len = rlen; 1522 if (rlen < args->minlen) { 1523 if (busy) { 1524 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1525 trace_xfs_alloc_size_busy(args); 1526 xfs_extent_busy_flush(args->mp, args->pag, busy_gen); 1527 goto restart; 1528 } 1529 goto out_nominleft; 1530 } 1531 xfs_alloc_fix_len(args); 1532 1533 rlen = args->len; 1534 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); 1535 /* 1536 * Allocate and initialize a cursor for the by-block tree. 1537 */ 1538 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1539 args->agno, XFS_BTNUM_BNO); 1540 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1541 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1542 goto error0; 1543 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1544 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1545 cnt_cur = bno_cur = NULL; 1546 args->len = rlen; 1547 args->agbno = rbno; 1548 XFS_WANT_CORRUPTED_GOTO(args->mp, 1549 args->agbno + args->len <= 1550 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1551 error0); 1552 trace_xfs_alloc_size_done(args); 1553 return 0; 1554 1555 error0: 1556 trace_xfs_alloc_size_error(args); 1557 if (cnt_cur) 1558 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1559 if (bno_cur) 1560 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1561 return error; 1562 1563 out_nominleft: 1564 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1565 trace_xfs_alloc_size_nominleft(args); 1566 args->agbno = NULLAGBLOCK; 1567 return 0; 1568 } 1569 1570 /* 1571 * Deal with the case where only small freespaces remain. 1572 * Either return the contents of the last freespace record, 1573 * or allocate space from the freelist if there is nothing in the tree. 1574 */ 1575 STATIC int /* error */ 1576 xfs_alloc_ag_vextent_small( 1577 xfs_alloc_arg_t *args, /* allocation argument structure */ 1578 xfs_btree_cur_t *ccur, /* by-size cursor */ 1579 xfs_agblock_t *fbnop, /* result block number */ 1580 xfs_extlen_t *flenp, /* result length */ 1581 int *stat) /* status: 0-freelist, 1-normal/none */ 1582 { 1583 struct xfs_owner_info oinfo; 1584 int error; 1585 xfs_agblock_t fbno; 1586 xfs_extlen_t flen; 1587 int i; 1588 1589 if ((error = xfs_btree_decrement(ccur, 0, &i))) 1590 goto error0; 1591 if (i) { 1592 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1593 goto error0; 1594 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1595 } 1596 /* 1597 * Nothing in the btree, try the freelist. Make sure 1598 * to respect minleft even when pulling from the 1599 * freelist. 1600 */ 1601 else if (args->minlen == 1 && args->alignment == 1 && 1602 args->resv != XFS_AG_RESV_AGFL && 1603 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1604 > args->minleft)) { 1605 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1606 if (error) 1607 goto error0; 1608 if (fbno != NULLAGBLOCK) { 1609 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 1610 xfs_alloc_allow_busy_reuse(args->datatype)); 1611 1612 if (xfs_alloc_is_userdata(args->datatype)) { 1613 xfs_buf_t *bp; 1614 1615 bp = xfs_btree_get_bufs(args->mp, args->tp, 1616 args->agno, fbno, 0); 1617 if (!bp) { 1618 error = -EFSCORRUPTED; 1619 goto error0; 1620 } 1621 xfs_trans_binval(args->tp, bp); 1622 } 1623 args->len = 1; 1624 args->agbno = fbno; 1625 XFS_WANT_CORRUPTED_GOTO(args->mp, 1626 args->agbno + args->len <= 1627 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1628 error0); 1629 args->wasfromfl = 1; 1630 trace_xfs_alloc_small_freelist(args); 1631 1632 /* 1633 * If we're feeding an AGFL block to something that 1634 * doesn't live in the free space, we need to clear 1635 * out the OWN_AG rmap. 1636 */ 1637 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 1638 error = xfs_rmap_free(args->tp, args->agbp, args->agno, 1639 fbno, 1, &oinfo); 1640 if (error) 1641 goto error0; 1642 1643 *stat = 0; 1644 return 0; 1645 } 1646 /* 1647 * Nothing in the freelist. 1648 */ 1649 else 1650 flen = 0; 1651 } 1652 /* 1653 * Can't allocate from the freelist for some reason. 1654 */ 1655 else { 1656 fbno = NULLAGBLOCK; 1657 flen = 0; 1658 } 1659 /* 1660 * Can't do the allocation, give up. 1661 */ 1662 if (flen < args->minlen) { 1663 args->agbno = NULLAGBLOCK; 1664 trace_xfs_alloc_small_notenough(args); 1665 flen = 0; 1666 } 1667 *fbnop = fbno; 1668 *flenp = flen; 1669 *stat = 1; 1670 trace_xfs_alloc_small_done(args); 1671 return 0; 1672 1673 error0: 1674 trace_xfs_alloc_small_error(args); 1675 return error; 1676 } 1677 1678 /* 1679 * Free the extent starting at agno/bno for length. 1680 */ 1681 STATIC int 1682 xfs_free_ag_extent( 1683 xfs_trans_t *tp, 1684 xfs_buf_t *agbp, 1685 xfs_agnumber_t agno, 1686 xfs_agblock_t bno, 1687 xfs_extlen_t len, 1688 struct xfs_owner_info *oinfo, 1689 enum xfs_ag_resv_type type) 1690 { 1691 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1692 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1693 int error; /* error return value */ 1694 xfs_agblock_t gtbno; /* start of right neighbor block */ 1695 xfs_extlen_t gtlen; /* length of right neighbor block */ 1696 int haveleft; /* have a left neighbor block */ 1697 int haveright; /* have a right neighbor block */ 1698 int i; /* temp, result code */ 1699 xfs_agblock_t ltbno; /* start of left neighbor block */ 1700 xfs_extlen_t ltlen; /* length of left neighbor block */ 1701 xfs_mount_t *mp; /* mount point struct for filesystem */ 1702 xfs_agblock_t nbno; /* new starting block of freespace */ 1703 xfs_extlen_t nlen; /* new length of freespace */ 1704 xfs_perag_t *pag; /* per allocation group data */ 1705 1706 bno_cur = cnt_cur = NULL; 1707 mp = tp->t_mountp; 1708 1709 if (!xfs_rmap_should_skip_owner_update(oinfo)) { 1710 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1711 if (error) 1712 goto error0; 1713 } 1714 1715 /* 1716 * Allocate and initialize a cursor for the by-block btree. 1717 */ 1718 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1719 /* 1720 * Look for a neighboring block on the left (lower block numbers) 1721 * that is contiguous with this space. 1722 */ 1723 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) 1724 goto error0; 1725 if (haveleft) { 1726 /* 1727 * There is a block to our left. 1728 */ 1729 if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) 1730 goto error0; 1731 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1732 /* 1733 * It's not contiguous, though. 1734 */ 1735 if (ltbno + ltlen < bno) 1736 haveleft = 0; 1737 else { 1738 /* 1739 * If this failure happens the request to free this 1740 * space was invalid, it's (partly) already free. 1741 * Very bad. 1742 */ 1743 XFS_WANT_CORRUPTED_GOTO(mp, 1744 ltbno + ltlen <= bno, error0); 1745 } 1746 } 1747 /* 1748 * Look for a neighboring block on the right (higher block numbers) 1749 * that is contiguous with this space. 1750 */ 1751 if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) 1752 goto error0; 1753 if (haveright) { 1754 /* 1755 * There is a block to our right. 1756 */ 1757 if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) 1758 goto error0; 1759 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1760 /* 1761 * It's not contiguous, though. 1762 */ 1763 if (bno + len < gtbno) 1764 haveright = 0; 1765 else { 1766 /* 1767 * If this failure happens the request to free this 1768 * space was invalid, it's (partly) already free. 1769 * Very bad. 1770 */ 1771 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0); 1772 } 1773 } 1774 /* 1775 * Now allocate and initialize a cursor for the by-size tree. 1776 */ 1777 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); 1778 /* 1779 * Have both left and right contiguous neighbors. 1780 * Merge all three into a single free block. 1781 */ 1782 if (haveleft && haveright) { 1783 /* 1784 * Delete the old by-size entry on the left. 1785 */ 1786 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1787 goto error0; 1788 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1789 if ((error = xfs_btree_delete(cnt_cur, &i))) 1790 goto error0; 1791 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1792 /* 1793 * Delete the old by-size entry on the right. 1794 */ 1795 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1796 goto error0; 1797 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1798 if ((error = xfs_btree_delete(cnt_cur, &i))) 1799 goto error0; 1800 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1801 /* 1802 * Delete the old by-block entry for the right block. 1803 */ 1804 if ((error = xfs_btree_delete(bno_cur, &i))) 1805 goto error0; 1806 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1807 /* 1808 * Move the by-block cursor back to the left neighbor. 1809 */ 1810 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1811 goto error0; 1812 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1813 #ifdef DEBUG 1814 /* 1815 * Check that this is the right record: delete didn't 1816 * mangle the cursor. 1817 */ 1818 { 1819 xfs_agblock_t xxbno; 1820 xfs_extlen_t xxlen; 1821 1822 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1823 &i))) 1824 goto error0; 1825 XFS_WANT_CORRUPTED_GOTO(mp, 1826 i == 1 && xxbno == ltbno && xxlen == ltlen, 1827 error0); 1828 } 1829 #endif 1830 /* 1831 * Update remaining by-block entry to the new, joined block. 1832 */ 1833 nbno = ltbno; 1834 nlen = len + ltlen + gtlen; 1835 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1836 goto error0; 1837 } 1838 /* 1839 * Have only a left contiguous neighbor. 1840 * Merge it together with the new freespace. 1841 */ 1842 else if (haveleft) { 1843 /* 1844 * Delete the old by-size entry on the left. 1845 */ 1846 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1847 goto error0; 1848 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1849 if ((error = xfs_btree_delete(cnt_cur, &i))) 1850 goto error0; 1851 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1852 /* 1853 * Back up the by-block cursor to the left neighbor, and 1854 * update its length. 1855 */ 1856 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1857 goto error0; 1858 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1859 nbno = ltbno; 1860 nlen = len + ltlen; 1861 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1862 goto error0; 1863 } 1864 /* 1865 * Have only a right contiguous neighbor. 1866 * Merge it together with the new freespace. 1867 */ 1868 else if (haveright) { 1869 /* 1870 * Delete the old by-size entry on the right. 1871 */ 1872 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1873 goto error0; 1874 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1875 if ((error = xfs_btree_delete(cnt_cur, &i))) 1876 goto error0; 1877 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1878 /* 1879 * Update the starting block and length of the right 1880 * neighbor in the by-block tree. 1881 */ 1882 nbno = bno; 1883 nlen = len + gtlen; 1884 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1885 goto error0; 1886 } 1887 /* 1888 * No contiguous neighbors. 1889 * Insert the new freespace into the by-block tree. 1890 */ 1891 else { 1892 nbno = bno; 1893 nlen = len; 1894 if ((error = xfs_btree_insert(bno_cur, &i))) 1895 goto error0; 1896 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1897 } 1898 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1899 bno_cur = NULL; 1900 /* 1901 * In all cases we need to insert the new freespace in the by-size tree. 1902 */ 1903 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1904 goto error0; 1905 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0); 1906 if ((error = xfs_btree_insert(cnt_cur, &i))) 1907 goto error0; 1908 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1909 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1910 cnt_cur = NULL; 1911 1912 /* 1913 * Update the freespace totals in the ag and superblock. 1914 */ 1915 pag = xfs_perag_get(mp, agno); 1916 error = xfs_alloc_update_counters(tp, pag, agbp, len); 1917 xfs_ag_resv_free_extent(pag, type, tp, len); 1918 xfs_perag_put(pag); 1919 if (error) 1920 goto error0; 1921 1922 XFS_STATS_INC(mp, xs_freex); 1923 XFS_STATS_ADD(mp, xs_freeb, len); 1924 1925 trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright); 1926 1927 return 0; 1928 1929 error0: 1930 trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1); 1931 if (bno_cur) 1932 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1933 if (cnt_cur) 1934 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1935 return error; 1936 } 1937 1938 /* 1939 * Visible (exported) allocation/free functions. 1940 * Some of these are used just by xfs_alloc_btree.c and this file. 1941 */ 1942 1943 /* 1944 * Compute and fill in value of m_ag_maxlevels. 1945 */ 1946 void 1947 xfs_alloc_compute_maxlevels( 1948 xfs_mount_t *mp) /* file system mount structure */ 1949 { 1950 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr, 1951 (mp->m_sb.sb_agblocks + 1) / 2); 1952 } 1953 1954 /* 1955 * Find the length of the longest extent in an AG. The 'need' parameter 1956 * specifies how much space we're going to need for the AGFL and the 1957 * 'reserved' parameter tells us how many blocks in this AG are reserved for 1958 * other callers. 1959 */ 1960 xfs_extlen_t 1961 xfs_alloc_longest_free_extent( 1962 struct xfs_mount *mp, 1963 struct xfs_perag *pag, 1964 xfs_extlen_t need, 1965 xfs_extlen_t reserved) 1966 { 1967 xfs_extlen_t delta = 0; 1968 1969 /* 1970 * If the AGFL needs a recharge, we'll have to subtract that from the 1971 * longest extent. 1972 */ 1973 if (need > pag->pagf_flcount) 1974 delta = need - pag->pagf_flcount; 1975 1976 /* 1977 * If we cannot maintain others' reservations with space from the 1978 * not-longest freesp extents, we'll have to subtract /that/ from 1979 * the longest extent too. 1980 */ 1981 if (pag->pagf_freeblks - pag->pagf_longest < reserved) 1982 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest); 1983 1984 /* 1985 * If the longest extent is long enough to satisfy all the 1986 * reservations and AGFL rules in place, we can return this extent. 1987 */ 1988 if (pag->pagf_longest > delta) 1989 return pag->pagf_longest - delta; 1990 1991 /* Otherwise, let the caller try for 1 block if there's space. */ 1992 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 1993 } 1994 1995 unsigned int 1996 xfs_alloc_min_freelist( 1997 struct xfs_mount *mp, 1998 struct xfs_perag *pag) 1999 { 2000 unsigned int min_free; 2001 2002 /* space needed by-bno freespace btree */ 2003 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, 2004 mp->m_ag_maxlevels); 2005 /* space needed by-size freespace btree */ 2006 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 2007 mp->m_ag_maxlevels); 2008 /* space needed reverse mapping used space btree */ 2009 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 2010 min_free += min_t(unsigned int, 2011 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, 2012 mp->m_rmap_maxlevels); 2013 2014 return min_free; 2015 } 2016 2017 /* 2018 * Check if the operation we are fixing up the freelist for should go ahead or 2019 * not. If we are freeing blocks, we always allow it, otherwise the allocation 2020 * is dependent on whether the size and shape of free space available will 2021 * permit the requested allocation to take place. 2022 */ 2023 static bool 2024 xfs_alloc_space_available( 2025 struct xfs_alloc_arg *args, 2026 xfs_extlen_t min_free, 2027 int flags) 2028 { 2029 struct xfs_perag *pag = args->pag; 2030 xfs_extlen_t alloc_len, longest; 2031 xfs_extlen_t reservation; /* blocks that are still reserved */ 2032 int available; 2033 2034 if (flags & XFS_ALLOC_FLAG_FREEING) 2035 return true; 2036 2037 reservation = xfs_ag_resv_needed(pag, args->resv); 2038 2039 /* do we have enough contiguous free space for the allocation? */ 2040 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; 2041 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, 2042 reservation); 2043 if (longest < alloc_len) 2044 return false; 2045 2046 /* do we have enough free space remaining for the allocation? */ 2047 available = (int)(pag->pagf_freeblks + pag->pagf_flcount - 2048 reservation - min_free - args->minleft); 2049 if (available < (int)max(args->total, alloc_len)) 2050 return false; 2051 2052 /* 2053 * Clamp maxlen to the amount of free space available for the actual 2054 * extent allocation. 2055 */ 2056 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { 2057 args->maxlen = available; 2058 ASSERT(args->maxlen > 0); 2059 ASSERT(args->maxlen >= args->minlen); 2060 } 2061 2062 return true; 2063 } 2064 2065 /* 2066 * Check the agfl fields of the agf for inconsistency or corruption. The purpose 2067 * is to detect an agfl header padding mismatch between current and early v5 2068 * kernels. This problem manifests as a 1-slot size difference between the 2069 * on-disk flcount and the active [first, last] range of a wrapped agfl. This 2070 * may also catch variants of agfl count corruption unrelated to padding. Either 2071 * way, we'll reset the agfl and warn the user. 2072 * 2073 * Return true if a reset is required before the agfl can be used, false 2074 * otherwise. 2075 */ 2076 static bool 2077 xfs_agfl_needs_reset( 2078 struct xfs_mount *mp, 2079 struct xfs_agf *agf) 2080 { 2081 uint32_t f = be32_to_cpu(agf->agf_flfirst); 2082 uint32_t l = be32_to_cpu(agf->agf_fllast); 2083 uint32_t c = be32_to_cpu(agf->agf_flcount); 2084 int agfl_size = xfs_agfl_size(mp); 2085 int active; 2086 2087 /* no agfl header on v4 supers */ 2088 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2089 return false; 2090 2091 /* 2092 * The agf read verifier catches severe corruption of these fields. 2093 * Repeat some sanity checks to cover a packed -> unpacked mismatch if 2094 * the verifier allows it. 2095 */ 2096 if (f >= agfl_size || l >= agfl_size) 2097 return true; 2098 if (c > agfl_size) 2099 return true; 2100 2101 /* 2102 * Check consistency between the on-disk count and the active range. An 2103 * agfl padding mismatch manifests as an inconsistent flcount. 2104 */ 2105 if (c && l >= f) 2106 active = l - f + 1; 2107 else if (c) 2108 active = agfl_size - f + l + 1; 2109 else 2110 active = 0; 2111 2112 return active != c; 2113 } 2114 2115 /* 2116 * Reset the agfl to an empty state. Ignore/drop any existing blocks since the 2117 * agfl content cannot be trusted. Warn the user that a repair is required to 2118 * recover leaked blocks. 2119 * 2120 * The purpose of this mechanism is to handle filesystems affected by the agfl 2121 * header padding mismatch problem. A reset keeps the filesystem online with a 2122 * relatively minor free space accounting inconsistency rather than suffer the 2123 * inevitable crash from use of an invalid agfl block. 2124 */ 2125 static void 2126 xfs_agfl_reset( 2127 struct xfs_trans *tp, 2128 struct xfs_buf *agbp, 2129 struct xfs_perag *pag) 2130 { 2131 struct xfs_mount *mp = tp->t_mountp; 2132 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 2133 2134 ASSERT(pag->pagf_agflreset); 2135 trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); 2136 2137 xfs_warn(mp, 2138 "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. " 2139 "Please unmount and run xfs_repair.", 2140 pag->pag_agno, pag->pagf_flcount); 2141 2142 agf->agf_flfirst = 0; 2143 agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1); 2144 agf->agf_flcount = 0; 2145 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST | 2146 XFS_AGF_FLCOUNT); 2147 2148 pag->pagf_flcount = 0; 2149 pag->pagf_agflreset = false; 2150 } 2151 2152 /* 2153 * Decide whether to use this allocation group for this allocation. 2154 * If so, fix up the btree freelist's size. 2155 */ 2156 int /* error */ 2157 xfs_alloc_fix_freelist( 2158 struct xfs_alloc_arg *args, /* allocation argument structure */ 2159 int flags) /* XFS_ALLOC_FLAG_... */ 2160 { 2161 struct xfs_mount *mp = args->mp; 2162 struct xfs_perag *pag = args->pag; 2163 struct xfs_trans *tp = args->tp; 2164 struct xfs_buf *agbp = NULL; 2165 struct xfs_buf *agflbp = NULL; 2166 struct xfs_alloc_arg targs; /* local allocation arguments */ 2167 xfs_agblock_t bno; /* freelist block */ 2168 xfs_extlen_t need; /* total blocks needed in freelist */ 2169 int error = 0; 2170 2171 if (!pag->pagf_init) { 2172 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 2173 if (error) 2174 goto out_no_agbp; 2175 if (!pag->pagf_init) { 2176 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 2177 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2178 goto out_agbp_relse; 2179 } 2180 } 2181 2182 /* 2183 * If this is a metadata preferred pag and we are user data then try 2184 * somewhere else if we are not being asked to try harder at this 2185 * point 2186 */ 2187 if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) && 2188 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 2189 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2190 goto out_agbp_relse; 2191 } 2192 2193 need = xfs_alloc_min_freelist(mp, pag); 2194 if (!xfs_alloc_space_available(args, need, flags | 2195 XFS_ALLOC_FLAG_CHECK)) 2196 goto out_agbp_relse; 2197 2198 /* 2199 * Get the a.g. freespace buffer. 2200 * Can fail if we're not blocking on locks, and it's held. 2201 */ 2202 if (!agbp) { 2203 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 2204 if (error) 2205 goto out_no_agbp; 2206 if (!agbp) { 2207 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 2208 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2209 goto out_no_agbp; 2210 } 2211 } 2212 2213 /* reset a padding mismatched agfl before final free space check */ 2214 if (pag->pagf_agflreset) 2215 xfs_agfl_reset(tp, agbp, pag); 2216 2217 /* If there isn't enough total space or single-extent, reject it. */ 2218 need = xfs_alloc_min_freelist(mp, pag); 2219 if (!xfs_alloc_space_available(args, need, flags)) 2220 goto out_agbp_relse; 2221 2222 /* 2223 * Make the freelist shorter if it's too long. 2224 * 2225 * Note that from this point onwards, we will always release the agf and 2226 * agfl buffers on error. This handles the case where we error out and 2227 * the buffers are clean or may not have been joined to the transaction 2228 * and hence need to be released manually. If they have been joined to 2229 * the transaction, then xfs_trans_brelse() will handle them 2230 * appropriately based on the recursion count and dirty state of the 2231 * buffer. 2232 * 2233 * XXX (dgc): When we have lots of free space, does this buy us 2234 * anything other than extra overhead when we need to put more blocks 2235 * back on the free list? Maybe we should only do this when space is 2236 * getting low or the AGFL is more than half full? 2237 * 2238 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too 2239 * big; the NORMAP flag prevents AGFL expand/shrink operations from 2240 * updating the rmapbt. Both flags are used in xfs_repair while we're 2241 * rebuilding the rmapbt, and neither are used by the kernel. They're 2242 * both required to ensure that rmaps are correctly recorded for the 2243 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and 2244 * repair/rmap.c in xfsprogs for details. 2245 */ 2246 memset(&targs, 0, sizeof(targs)); 2247 if (flags & XFS_ALLOC_FLAG_NORMAP) 2248 xfs_rmap_skip_owner_update(&targs.oinfo); 2249 else 2250 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); 2251 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { 2252 struct xfs_buf *bp; 2253 2254 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2255 if (error) 2256 goto out_agbp_relse; 2257 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 2258 &targs.oinfo, XFS_AG_RESV_AGFL); 2259 if (error) 2260 goto out_agbp_relse; 2261 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2262 if (!bp) { 2263 error = -EFSCORRUPTED; 2264 goto out_agbp_relse; 2265 } 2266 xfs_trans_binval(tp, bp); 2267 } 2268 2269 targs.tp = tp; 2270 targs.mp = mp; 2271 targs.agbp = agbp; 2272 targs.agno = args->agno; 2273 targs.alignment = targs.minlen = targs.prod = 1; 2274 targs.type = XFS_ALLOCTYPE_THIS_AG; 2275 targs.pag = pag; 2276 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); 2277 if (error) 2278 goto out_agbp_relse; 2279 2280 /* Make the freelist longer if it's too short. */ 2281 while (pag->pagf_flcount < need) { 2282 targs.agbno = 0; 2283 targs.maxlen = need - pag->pagf_flcount; 2284 targs.resv = XFS_AG_RESV_AGFL; 2285 2286 /* Allocate as many blocks as possible at once. */ 2287 error = xfs_alloc_ag_vextent(&targs); 2288 if (error) 2289 goto out_agflbp_relse; 2290 2291 /* 2292 * Stop if we run out. Won't happen if callers are obeying 2293 * the restrictions correctly. Can happen for free calls 2294 * on a completely full ag. 2295 */ 2296 if (targs.agbno == NULLAGBLOCK) { 2297 if (flags & XFS_ALLOC_FLAG_FREEING) 2298 break; 2299 goto out_agflbp_relse; 2300 } 2301 /* 2302 * Put each allocated block on the list. 2303 */ 2304 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 2305 error = xfs_alloc_put_freelist(tp, agbp, 2306 agflbp, bno, 0); 2307 if (error) 2308 goto out_agflbp_relse; 2309 } 2310 } 2311 xfs_trans_brelse(tp, agflbp); 2312 args->agbp = agbp; 2313 return 0; 2314 2315 out_agflbp_relse: 2316 xfs_trans_brelse(tp, agflbp); 2317 out_agbp_relse: 2318 if (agbp) 2319 xfs_trans_brelse(tp, agbp); 2320 out_no_agbp: 2321 args->agbp = NULL; 2322 return error; 2323 } 2324 2325 /* 2326 * Get a block from the freelist. 2327 * Returns with the buffer for the block gotten. 2328 */ 2329 int /* error */ 2330 xfs_alloc_get_freelist( 2331 xfs_trans_t *tp, /* transaction pointer */ 2332 xfs_buf_t *agbp, /* buffer containing the agf structure */ 2333 xfs_agblock_t *bnop, /* block address retrieved from freelist */ 2334 int btreeblk) /* destination is a AGF btree */ 2335 { 2336 xfs_agf_t *agf; /* a.g. freespace structure */ 2337 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2338 xfs_agblock_t bno; /* block number returned */ 2339 __be32 *agfl_bno; 2340 int error; 2341 int logflags; 2342 xfs_mount_t *mp = tp->t_mountp; 2343 xfs_perag_t *pag; /* per allocation group data */ 2344 2345 /* 2346 * Freelist is empty, give up. 2347 */ 2348 agf = XFS_BUF_TO_AGF(agbp); 2349 if (!agf->agf_flcount) { 2350 *bnop = NULLAGBLOCK; 2351 return 0; 2352 } 2353 /* 2354 * Read the array of free blocks. 2355 */ 2356 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), 2357 &agflbp); 2358 if (error) 2359 return error; 2360 2361 2362 /* 2363 * Get the block number and update the data structures. 2364 */ 2365 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2366 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2367 be32_add_cpu(&agf->agf_flfirst, 1); 2368 xfs_trans_brelse(tp, agflbp); 2369 if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) 2370 agf->agf_flfirst = 0; 2371 2372 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2373 ASSERT(!pag->pagf_agflreset); 2374 be32_add_cpu(&agf->agf_flcount, -1); 2375 xfs_trans_agflist_delta(tp, -1); 2376 pag->pagf_flcount--; 2377 xfs_perag_put(pag); 2378 2379 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 2380 if (btreeblk) { 2381 be32_add_cpu(&agf->agf_btreeblks, 1); 2382 pag->pagf_btreeblks++; 2383 logflags |= XFS_AGF_BTREEBLKS; 2384 } 2385 2386 xfs_alloc_log_agf(tp, agbp, logflags); 2387 *bnop = bno; 2388 2389 return 0; 2390 } 2391 2392 /* 2393 * Log the given fields from the agf structure. 2394 */ 2395 void 2396 xfs_alloc_log_agf( 2397 xfs_trans_t *tp, /* transaction pointer */ 2398 xfs_buf_t *bp, /* buffer for a.g. freelist header */ 2399 int fields) /* mask of fields to be logged (XFS_AGF_...) */ 2400 { 2401 int first; /* first byte offset */ 2402 int last; /* last byte offset */ 2403 static const short offsets[] = { 2404 offsetof(xfs_agf_t, agf_magicnum), 2405 offsetof(xfs_agf_t, agf_versionnum), 2406 offsetof(xfs_agf_t, agf_seqno), 2407 offsetof(xfs_agf_t, agf_length), 2408 offsetof(xfs_agf_t, agf_roots[0]), 2409 offsetof(xfs_agf_t, agf_levels[0]), 2410 offsetof(xfs_agf_t, agf_flfirst), 2411 offsetof(xfs_agf_t, agf_fllast), 2412 offsetof(xfs_agf_t, agf_flcount), 2413 offsetof(xfs_agf_t, agf_freeblks), 2414 offsetof(xfs_agf_t, agf_longest), 2415 offsetof(xfs_agf_t, agf_btreeblks), 2416 offsetof(xfs_agf_t, agf_uuid), 2417 offsetof(xfs_agf_t, agf_rmap_blocks), 2418 offsetof(xfs_agf_t, agf_refcount_blocks), 2419 offsetof(xfs_agf_t, agf_refcount_root), 2420 offsetof(xfs_agf_t, agf_refcount_level), 2421 /* needed so that we don't log the whole rest of the structure: */ 2422 offsetof(xfs_agf_t, agf_spare64), 2423 sizeof(xfs_agf_t) 2424 }; 2425 2426 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2427 2428 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); 2429 2430 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2431 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2432 } 2433 2434 /* 2435 * Interface for inode allocation to force the pag data to be initialized. 2436 */ 2437 int /* error */ 2438 xfs_alloc_pagf_init( 2439 xfs_mount_t *mp, /* file system mount structure */ 2440 xfs_trans_t *tp, /* transaction pointer */ 2441 xfs_agnumber_t agno, /* allocation group number */ 2442 int flags) /* XFS_ALLOC_FLAGS_... */ 2443 { 2444 xfs_buf_t *bp; 2445 int error; 2446 2447 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) 2448 return error; 2449 if (bp) 2450 xfs_trans_brelse(tp, bp); 2451 return 0; 2452 } 2453 2454 /* 2455 * Put the block on the freelist for the allocation group. 2456 */ 2457 int /* error */ 2458 xfs_alloc_put_freelist( 2459 xfs_trans_t *tp, /* transaction pointer */ 2460 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2461 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2462 xfs_agblock_t bno, /* block being freed */ 2463 int btreeblk) /* block came from a AGF btree */ 2464 { 2465 xfs_agf_t *agf; /* a.g. freespace structure */ 2466 __be32 *blockp;/* pointer to array entry */ 2467 int error; 2468 int logflags; 2469 xfs_mount_t *mp; /* mount structure */ 2470 xfs_perag_t *pag; /* per allocation group data */ 2471 __be32 *agfl_bno; 2472 int startoff; 2473 2474 agf = XFS_BUF_TO_AGF(agbp); 2475 mp = tp->t_mountp; 2476 2477 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2478 be32_to_cpu(agf->agf_seqno), &agflbp))) 2479 return error; 2480 be32_add_cpu(&agf->agf_fllast, 1); 2481 if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) 2482 agf->agf_fllast = 0; 2483 2484 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2485 ASSERT(!pag->pagf_agflreset); 2486 be32_add_cpu(&agf->agf_flcount, 1); 2487 xfs_trans_agflist_delta(tp, 1); 2488 pag->pagf_flcount++; 2489 2490 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; 2491 if (btreeblk) { 2492 be32_add_cpu(&agf->agf_btreeblks, -1); 2493 pag->pagf_btreeblks--; 2494 logflags |= XFS_AGF_BTREEBLKS; 2495 } 2496 xfs_perag_put(pag); 2497 2498 xfs_alloc_log_agf(tp, agbp, logflags); 2499 2500 ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp)); 2501 2502 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2503 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2504 *blockp = cpu_to_be32(bno); 2505 startoff = (char *)blockp - (char *)agflbp->b_addr; 2506 2507 xfs_alloc_log_agf(tp, agbp, logflags); 2508 2509 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); 2510 xfs_trans_log_buf(tp, agflbp, startoff, 2511 startoff + sizeof(xfs_agblock_t) - 1); 2512 return 0; 2513 } 2514 2515 static xfs_failaddr_t 2516 xfs_agf_verify( 2517 struct xfs_buf *bp) 2518 { 2519 struct xfs_mount *mp = bp->b_target->bt_mount; 2520 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2521 2522 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2523 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2524 return __this_address; 2525 if (!xfs_log_check_lsn(mp, 2526 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) 2527 return __this_address; 2528 } 2529 2530 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2531 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2532 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2533 be32_to_cpu(agf->agf_flfirst) < xfs_agfl_size(mp) && 2534 be32_to_cpu(agf->agf_fllast) < xfs_agfl_size(mp) && 2535 be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp))) 2536 return __this_address; 2537 2538 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || 2539 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || 2540 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2541 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2542 return __this_address; 2543 2544 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2545 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2546 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) 2547 return __this_address; 2548 2549 /* 2550 * during growfs operations, the perag is not fully initialised, 2551 * so we can't use it for any useful checking. growfs ensures we can't 2552 * use it by using uncached buffers that don't have the perag attached 2553 * so we can detect and avoid this problem. 2554 */ 2555 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2556 return __this_address; 2557 2558 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2559 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2560 return __this_address; 2561 2562 if (xfs_sb_version_hasreflink(&mp->m_sb) && 2563 (be32_to_cpu(agf->agf_refcount_level) < 1 || 2564 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) 2565 return __this_address; 2566 2567 return NULL; 2568 2569 } 2570 2571 static void 2572 xfs_agf_read_verify( 2573 struct xfs_buf *bp) 2574 { 2575 struct xfs_mount *mp = bp->b_target->bt_mount; 2576 xfs_failaddr_t fa; 2577 2578 if (xfs_sb_version_hascrc(&mp->m_sb) && 2579 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2580 xfs_verifier_error(bp, -EFSBADCRC, __this_address); 2581 else { 2582 fa = xfs_agf_verify(bp); 2583 if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF)) 2584 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2585 } 2586 } 2587 2588 static void 2589 xfs_agf_write_verify( 2590 struct xfs_buf *bp) 2591 { 2592 struct xfs_mount *mp = bp->b_target->bt_mount; 2593 struct xfs_buf_log_item *bip = bp->b_log_item; 2594 xfs_failaddr_t fa; 2595 2596 fa = xfs_agf_verify(bp); 2597 if (fa) { 2598 xfs_verifier_error(bp, -EFSCORRUPTED, fa); 2599 return; 2600 } 2601 2602 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2603 return; 2604 2605 if (bip) 2606 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2607 2608 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); 2609 } 2610 2611 const struct xfs_buf_ops xfs_agf_buf_ops = { 2612 .name = "xfs_agf", 2613 .verify_read = xfs_agf_read_verify, 2614 .verify_write = xfs_agf_write_verify, 2615 .verify_struct = xfs_agf_verify, 2616 }; 2617 2618 /* 2619 * Read in the allocation group header (free/alloc section). 2620 */ 2621 int /* error */ 2622 xfs_read_agf( 2623 struct xfs_mount *mp, /* mount point structure */ 2624 struct xfs_trans *tp, /* transaction pointer */ 2625 xfs_agnumber_t agno, /* allocation group number */ 2626 int flags, /* XFS_BUF_ */ 2627 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2628 { 2629 int error; 2630 2631 trace_xfs_read_agf(mp, agno); 2632 2633 ASSERT(agno != NULLAGNUMBER); 2634 error = xfs_trans_read_buf( 2635 mp, tp, mp->m_ddev_targp, 2636 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2637 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); 2638 if (error) 2639 return error; 2640 if (!*bpp) 2641 return 0; 2642 2643 ASSERT(!(*bpp)->b_error); 2644 xfs_buf_set_ref(*bpp, XFS_AGF_REF); 2645 return 0; 2646 } 2647 2648 /* 2649 * Read in the allocation group header (free/alloc section). 2650 */ 2651 int /* error */ 2652 xfs_alloc_read_agf( 2653 struct xfs_mount *mp, /* mount point structure */ 2654 struct xfs_trans *tp, /* transaction pointer */ 2655 xfs_agnumber_t agno, /* allocation group number */ 2656 int flags, /* XFS_ALLOC_FLAG_... */ 2657 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2658 { 2659 struct xfs_agf *agf; /* ag freelist header */ 2660 struct xfs_perag *pag; /* per allocation group data */ 2661 int error; 2662 2663 trace_xfs_alloc_read_agf(mp, agno); 2664 2665 ASSERT(agno != NULLAGNUMBER); 2666 error = xfs_read_agf(mp, tp, agno, 2667 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, 2668 bpp); 2669 if (error) 2670 return error; 2671 if (!*bpp) 2672 return 0; 2673 ASSERT(!(*bpp)->b_error); 2674 2675 agf = XFS_BUF_TO_AGF(*bpp); 2676 pag = xfs_perag_get(mp, agno); 2677 if (!pag->pagf_init) { 2678 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2679 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2680 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2681 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2682 pag->pagf_levels[XFS_BTNUM_BNOi] = 2683 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2684 pag->pagf_levels[XFS_BTNUM_CNTi] = 2685 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2686 pag->pagf_levels[XFS_BTNUM_RMAPi] = 2687 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); 2688 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); 2689 spin_lock_init(&pag->pagb_lock); 2690 pag->pagb_count = 0; 2691 pag->pagb_tree = RB_ROOT; 2692 pag->pagf_init = 1; 2693 pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); 2694 } 2695 #ifdef DEBUG 2696 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2697 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2698 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); 2699 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2700 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2701 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2702 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); 2703 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == 2704 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2705 } 2706 #endif 2707 xfs_perag_put(pag); 2708 return 0; 2709 } 2710 2711 /* 2712 * Allocate an extent (variable-size). 2713 * Depending on the allocation type, we either look in a single allocation 2714 * group or loop over the allocation groups to find the result. 2715 */ 2716 int /* error */ 2717 xfs_alloc_vextent( 2718 xfs_alloc_arg_t *args) /* allocation argument structure */ 2719 { 2720 xfs_agblock_t agsize; /* allocation group size */ 2721 int error; 2722 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2723 xfs_mount_t *mp; /* mount structure pointer */ 2724 xfs_agnumber_t sagno; /* starting allocation group number */ 2725 xfs_alloctype_t type; /* input allocation type */ 2726 int bump_rotor = 0; 2727 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ 2728 2729 mp = args->mp; 2730 type = args->otype = args->type; 2731 args->agbno = NULLAGBLOCK; 2732 /* 2733 * Just fix this up, for the case where the last a.g. is shorter 2734 * (or there's only one a.g.) and the caller couldn't easily figure 2735 * that out (xfs_bmap_alloc). 2736 */ 2737 agsize = mp->m_sb.sb_agblocks; 2738 if (args->maxlen > agsize) 2739 args->maxlen = agsize; 2740 if (args->alignment == 0) 2741 args->alignment = 1; 2742 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); 2743 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); 2744 ASSERT(args->minlen <= args->maxlen); 2745 ASSERT(args->minlen <= agsize); 2746 ASSERT(args->mod < args->prod); 2747 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || 2748 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || 2749 args->minlen > args->maxlen || args->minlen > agsize || 2750 args->mod >= args->prod) { 2751 args->fsbno = NULLFSBLOCK; 2752 trace_xfs_alloc_vextent_badargs(args); 2753 return 0; 2754 } 2755 2756 switch (type) { 2757 case XFS_ALLOCTYPE_THIS_AG: 2758 case XFS_ALLOCTYPE_NEAR_BNO: 2759 case XFS_ALLOCTYPE_THIS_BNO: 2760 /* 2761 * These three force us into a single a.g. 2762 */ 2763 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2764 args->pag = xfs_perag_get(mp, args->agno); 2765 error = xfs_alloc_fix_freelist(args, 0); 2766 if (error) { 2767 trace_xfs_alloc_vextent_nofix(args); 2768 goto error0; 2769 } 2770 if (!args->agbp) { 2771 trace_xfs_alloc_vextent_noagbp(args); 2772 break; 2773 } 2774 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2775 if ((error = xfs_alloc_ag_vextent(args))) 2776 goto error0; 2777 break; 2778 case XFS_ALLOCTYPE_START_BNO: 2779 /* 2780 * Try near allocation first, then anywhere-in-ag after 2781 * the first a.g. fails. 2782 */ 2783 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && 2784 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2785 args->fsbno = XFS_AGB_TO_FSB(mp, 2786 ((mp->m_agfrotor / rotorstep) % 2787 mp->m_sb.sb_agcount), 0); 2788 bump_rotor = 1; 2789 } 2790 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2791 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2792 /* FALLTHROUGH */ 2793 case XFS_ALLOCTYPE_FIRST_AG: 2794 /* 2795 * Rotate through the allocation groups looking for a winner. 2796 */ 2797 if (type == XFS_ALLOCTYPE_FIRST_AG) { 2798 /* 2799 * Start with allocation group given by bno. 2800 */ 2801 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2802 args->type = XFS_ALLOCTYPE_THIS_AG; 2803 sagno = 0; 2804 flags = 0; 2805 } else { 2806 /* 2807 * Start with the given allocation group. 2808 */ 2809 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2810 flags = XFS_ALLOC_FLAG_TRYLOCK; 2811 } 2812 /* 2813 * Loop over allocation groups twice; first time with 2814 * trylock set, second time without. 2815 */ 2816 for (;;) { 2817 args->pag = xfs_perag_get(mp, args->agno); 2818 error = xfs_alloc_fix_freelist(args, flags); 2819 if (error) { 2820 trace_xfs_alloc_vextent_nofix(args); 2821 goto error0; 2822 } 2823 /* 2824 * If we get a buffer back then the allocation will fly. 2825 */ 2826 if (args->agbp) { 2827 if ((error = xfs_alloc_ag_vextent(args))) 2828 goto error0; 2829 break; 2830 } 2831 2832 trace_xfs_alloc_vextent_loopfailed(args); 2833 2834 /* 2835 * Didn't work, figure out the next iteration. 2836 */ 2837 if (args->agno == sagno && 2838 type == XFS_ALLOCTYPE_START_BNO) 2839 args->type = XFS_ALLOCTYPE_THIS_AG; 2840 /* 2841 * For the first allocation, we can try any AG to get 2842 * space. However, if we already have allocated a 2843 * block, we don't want to try AGs whose number is below 2844 * sagno. Otherwise, we may end up with out-of-order 2845 * locking of AGF, which might cause deadlock. 2846 */ 2847 if (++(args->agno) == mp->m_sb.sb_agcount) { 2848 if (args->firstblock != NULLFSBLOCK) 2849 args->agno = sagno; 2850 else 2851 args->agno = 0; 2852 } 2853 /* 2854 * Reached the starting a.g., must either be done 2855 * or switch to non-trylock mode. 2856 */ 2857 if (args->agno == sagno) { 2858 if (flags == 0) { 2859 args->agbno = NULLAGBLOCK; 2860 trace_xfs_alloc_vextent_allfailed(args); 2861 break; 2862 } 2863 2864 flags = 0; 2865 if (type == XFS_ALLOCTYPE_START_BNO) { 2866 args->agbno = XFS_FSB_TO_AGBNO(mp, 2867 args->fsbno); 2868 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2869 } 2870 } 2871 xfs_perag_put(args->pag); 2872 } 2873 if (bump_rotor) { 2874 if (args->agno == sagno) 2875 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2876 (mp->m_sb.sb_agcount * rotorstep); 2877 else 2878 mp->m_agfrotor = (args->agno * rotorstep + 1) % 2879 (mp->m_sb.sb_agcount * rotorstep); 2880 } 2881 break; 2882 default: 2883 ASSERT(0); 2884 /* NOTREACHED */ 2885 } 2886 if (args->agbno == NULLAGBLOCK) 2887 args->fsbno = NULLFSBLOCK; 2888 else { 2889 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); 2890 #ifdef DEBUG 2891 ASSERT(args->len >= args->minlen); 2892 ASSERT(args->len <= args->maxlen); 2893 ASSERT(args->agbno % args->alignment == 0); 2894 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2895 args->len); 2896 #endif 2897 2898 /* Zero the extent if we were asked to do so */ 2899 if (args->datatype & XFS_ALLOC_USERDATA_ZERO) { 2900 error = xfs_zero_extent(args->ip, args->fsbno, args->len); 2901 if (error) 2902 goto error0; 2903 } 2904 2905 } 2906 xfs_perag_put(args->pag); 2907 return 0; 2908 error0: 2909 xfs_perag_put(args->pag); 2910 return error; 2911 } 2912 2913 /* Ensure that the freelist is at full capacity. */ 2914 int 2915 xfs_free_extent_fix_freelist( 2916 struct xfs_trans *tp, 2917 xfs_agnumber_t agno, 2918 struct xfs_buf **agbp) 2919 { 2920 struct xfs_alloc_arg args; 2921 int error; 2922 2923 memset(&args, 0, sizeof(struct xfs_alloc_arg)); 2924 args.tp = tp; 2925 args.mp = tp->t_mountp; 2926 args.agno = agno; 2927 2928 /* 2929 * validate that the block number is legal - the enables us to detect 2930 * and handle a silent filesystem corruption rather than crashing. 2931 */ 2932 if (args.agno >= args.mp->m_sb.sb_agcount) 2933 return -EFSCORRUPTED; 2934 2935 args.pag = xfs_perag_get(args.mp, args.agno); 2936 ASSERT(args.pag); 2937 2938 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); 2939 if (error) 2940 goto out; 2941 2942 *agbp = args.agbp; 2943 out: 2944 xfs_perag_put(args.pag); 2945 return error; 2946 } 2947 2948 /* 2949 * Free an extent. 2950 * Just break up the extent address and hand off to xfs_free_ag_extent 2951 * after fixing up the freelist. 2952 */ 2953 int /* error */ 2954 xfs_free_extent( 2955 struct xfs_trans *tp, /* transaction pointer */ 2956 xfs_fsblock_t bno, /* starting block number of extent */ 2957 xfs_extlen_t len, /* length of extent */ 2958 struct xfs_owner_info *oinfo, /* extent owner */ 2959 enum xfs_ag_resv_type type) /* block reservation type */ 2960 { 2961 struct xfs_mount *mp = tp->t_mountp; 2962 struct xfs_buf *agbp; 2963 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); 2964 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); 2965 int error; 2966 2967 ASSERT(len != 0); 2968 ASSERT(type != XFS_AG_RESV_AGFL); 2969 2970 if (XFS_TEST_ERROR(false, mp, 2971 XFS_ERRTAG_FREE_EXTENT)) 2972 return -EIO; 2973 2974 error = xfs_free_extent_fix_freelist(tp, agno, &agbp); 2975 if (error) 2976 return error; 2977 2978 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err); 2979 2980 /* validate the extent size is legal now we have the agf locked */ 2981 XFS_WANT_CORRUPTED_GOTO(mp, 2982 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), 2983 err); 2984 2985 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); 2986 if (error) 2987 goto err; 2988 2989 xfs_extent_busy_insert(tp, agno, agbno, len, 0); 2990 return 0; 2991 2992 err: 2993 xfs_trans_brelse(tp, agbp); 2994 return error; 2995 } 2996 2997 struct xfs_alloc_query_range_info { 2998 xfs_alloc_query_range_fn fn; 2999 void *priv; 3000 }; 3001 3002 /* Format btree record and pass to our callback. */ 3003 STATIC int 3004 xfs_alloc_query_range_helper( 3005 struct xfs_btree_cur *cur, 3006 union xfs_btree_rec *rec, 3007 void *priv) 3008 { 3009 struct xfs_alloc_query_range_info *query = priv; 3010 struct xfs_alloc_rec_incore irec; 3011 3012 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); 3013 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); 3014 return query->fn(cur, &irec, query->priv); 3015 } 3016 3017 /* Find all free space within a given range of blocks. */ 3018 int 3019 xfs_alloc_query_range( 3020 struct xfs_btree_cur *cur, 3021 struct xfs_alloc_rec_incore *low_rec, 3022 struct xfs_alloc_rec_incore *high_rec, 3023 xfs_alloc_query_range_fn fn, 3024 void *priv) 3025 { 3026 union xfs_btree_irec low_brec; 3027 union xfs_btree_irec high_brec; 3028 struct xfs_alloc_query_range_info query; 3029 3030 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 3031 low_brec.a = *low_rec; 3032 high_brec.a = *high_rec; 3033 query.priv = priv; 3034 query.fn = fn; 3035 return xfs_btree_query_range(cur, &low_brec, &high_brec, 3036 xfs_alloc_query_range_helper, &query); 3037 } 3038 3039 /* Find all free space records. */ 3040 int 3041 xfs_alloc_query_all( 3042 struct xfs_btree_cur *cur, 3043 xfs_alloc_query_range_fn fn, 3044 void *priv) 3045 { 3046 struct xfs_alloc_query_range_info query; 3047 3048 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 3049 query.priv = priv; 3050 query.fn = fn; 3051 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); 3052 } 3053 3054 /* Find the size of the AG, in blocks. */ 3055 xfs_agblock_t 3056 xfs_ag_block_count( 3057 struct xfs_mount *mp, 3058 xfs_agnumber_t agno) 3059 { 3060 ASSERT(agno < mp->m_sb.sb_agcount); 3061 3062 if (agno < mp->m_sb.sb_agcount - 1) 3063 return mp->m_sb.sb_agblocks; 3064 return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks); 3065 } 3066 3067 /* 3068 * Verify that an AG block number pointer neither points outside the AG 3069 * nor points at static metadata. 3070 */ 3071 bool 3072 xfs_verify_agbno( 3073 struct xfs_mount *mp, 3074 xfs_agnumber_t agno, 3075 xfs_agblock_t agbno) 3076 { 3077 xfs_agblock_t eoag; 3078 3079 eoag = xfs_ag_block_count(mp, agno); 3080 if (agbno >= eoag) 3081 return false; 3082 if (agbno <= XFS_AGFL_BLOCK(mp)) 3083 return false; 3084 return true; 3085 } 3086 3087 /* 3088 * Verify that an FS block number pointer neither points outside the 3089 * filesystem nor points at static AG metadata. 3090 */ 3091 bool 3092 xfs_verify_fsbno( 3093 struct xfs_mount *mp, 3094 xfs_fsblock_t fsbno) 3095 { 3096 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno); 3097 3098 if (agno >= mp->m_sb.sb_agcount) 3099 return false; 3100 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); 3101 } 3102 3103 /* Is there a record covering a given extent? */ 3104 int 3105 xfs_alloc_has_record( 3106 struct xfs_btree_cur *cur, 3107 xfs_agblock_t bno, 3108 xfs_extlen_t len, 3109 bool *exists) 3110 { 3111 union xfs_btree_irec low; 3112 union xfs_btree_irec high; 3113 3114 memset(&low, 0, sizeof(low)); 3115 low.a.ar_startblock = bno; 3116 memset(&high, 0xFF, sizeof(high)); 3117 high.a.ar_startblock = bno + len - 1; 3118 3119 return xfs_btree_has_record(cur, &low, &high, exists); 3120 } 3121