1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_format.h" 21 #include "xfs_log_format.h" 22 #include "xfs_shared.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_sb.h" 26 #include "xfs_mount.h" 27 #include "xfs_defer.h" 28 #include "xfs_inode.h" 29 #include "xfs_btree.h" 30 #include "xfs_rmap.h" 31 #include "xfs_alloc_btree.h" 32 #include "xfs_alloc.h" 33 #include "xfs_extent_busy.h" 34 #include "xfs_errortag.h" 35 #include "xfs_error.h" 36 #include "xfs_cksum.h" 37 #include "xfs_trace.h" 38 #include "xfs_trans.h" 39 #include "xfs_buf_item.h" 40 #include "xfs_log.h" 41 #include "xfs_ag_resv.h" 42 43 struct workqueue_struct *xfs_alloc_wq; 44 45 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 46 47 #define XFSA_FIXUP_BNO_OK 1 48 #define XFSA_FIXUP_CNT_OK 2 49 50 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 51 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 52 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 53 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 54 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 55 56 unsigned int 57 xfs_refc_block( 58 struct xfs_mount *mp) 59 { 60 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 61 return XFS_RMAP_BLOCK(mp) + 1; 62 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 63 return XFS_FIBT_BLOCK(mp) + 1; 64 return XFS_IBT_BLOCK(mp) + 1; 65 } 66 67 xfs_extlen_t 68 xfs_prealloc_blocks( 69 struct xfs_mount *mp) 70 { 71 if (xfs_sb_version_hasreflink(&mp->m_sb)) 72 return xfs_refc_block(mp) + 1; 73 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 74 return XFS_RMAP_BLOCK(mp) + 1; 75 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 76 return XFS_FIBT_BLOCK(mp) + 1; 77 return XFS_IBT_BLOCK(mp) + 1; 78 } 79 80 /* 81 * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of 82 * AGF buffer (PV 947395), we place constraints on the relationship among 83 * actual allocations for data blocks, freelist blocks, and potential file data 84 * bmap btree blocks. However, these restrictions may result in no actual space 85 * allocated for a delayed extent, for example, a data block in a certain AG is 86 * allocated but there is no additional block for the additional bmap btree 87 * block due to a split of the bmap btree of the file. The result of this may 88 * lead to an infinite loop when the file gets flushed to disk and all delayed 89 * extents need to be actually allocated. To get around this, we explicitly set 90 * aside a few blocks which will not be reserved in delayed allocation. 91 * 92 * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a 93 * potential split of the file's bmap btree. 94 */ 95 unsigned int 96 xfs_alloc_set_aside( 97 struct xfs_mount *mp) 98 { 99 return mp->m_sb.sb_agcount * (XFS_ALLOC_AGFL_RESERVE + 4); 100 } 101 102 /* 103 * When deciding how much space to allocate out of an AG, we limit the 104 * allocation maximum size to the size the AG. However, we cannot use all the 105 * blocks in the AG - some are permanently used by metadata. These 106 * blocks are generally: 107 * - the AG superblock, AGF, AGI and AGFL 108 * - the AGF (bno and cnt) and AGI btree root blocks, and optionally 109 * the AGI free inode and rmap btree root blocks. 110 * - blocks on the AGFL according to xfs_alloc_set_aside() limits 111 * - the rmapbt root block 112 * 113 * The AG headers are sector sized, so the amount of space they take up is 114 * dependent on filesystem geometry. The others are all single blocks. 115 */ 116 unsigned int 117 xfs_alloc_ag_max_usable( 118 struct xfs_mount *mp) 119 { 120 unsigned int blocks; 121 122 blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */ 123 blocks += XFS_ALLOC_AGFL_RESERVE; 124 blocks += 3; /* AGF, AGI btree root blocks */ 125 if (xfs_sb_version_hasfinobt(&mp->m_sb)) 126 blocks++; /* finobt root block */ 127 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 128 blocks++; /* rmap root block */ 129 if (xfs_sb_version_hasreflink(&mp->m_sb)) 130 blocks++; /* refcount root block */ 131 132 return mp->m_sb.sb_agblocks - blocks; 133 } 134 135 /* 136 * Lookup the record equal to [bno, len] in the btree given by cur. 137 */ 138 STATIC int /* error */ 139 xfs_alloc_lookup_eq( 140 struct xfs_btree_cur *cur, /* btree cursor */ 141 xfs_agblock_t bno, /* starting block of extent */ 142 xfs_extlen_t len, /* length of extent */ 143 int *stat) /* success/failure */ 144 { 145 cur->bc_rec.a.ar_startblock = bno; 146 cur->bc_rec.a.ar_blockcount = len; 147 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 148 } 149 150 /* 151 * Lookup the first record greater than or equal to [bno, len] 152 * in the btree given by cur. 153 */ 154 int /* error */ 155 xfs_alloc_lookup_ge( 156 struct xfs_btree_cur *cur, /* btree cursor */ 157 xfs_agblock_t bno, /* starting block of extent */ 158 xfs_extlen_t len, /* length of extent */ 159 int *stat) /* success/failure */ 160 { 161 cur->bc_rec.a.ar_startblock = bno; 162 cur->bc_rec.a.ar_blockcount = len; 163 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 164 } 165 166 /* 167 * Lookup the first record less than or equal to [bno, len] 168 * in the btree given by cur. 169 */ 170 static int /* error */ 171 xfs_alloc_lookup_le( 172 struct xfs_btree_cur *cur, /* btree cursor */ 173 xfs_agblock_t bno, /* starting block of extent */ 174 xfs_extlen_t len, /* length of extent */ 175 int *stat) /* success/failure */ 176 { 177 cur->bc_rec.a.ar_startblock = bno; 178 cur->bc_rec.a.ar_blockcount = len; 179 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 180 } 181 182 /* 183 * Update the record referred to by cur to the value given 184 * by [bno, len]. 185 * This either works (return 0) or gets an EFSCORRUPTED error. 186 */ 187 STATIC int /* error */ 188 xfs_alloc_update( 189 struct xfs_btree_cur *cur, /* btree cursor */ 190 xfs_agblock_t bno, /* starting block of extent */ 191 xfs_extlen_t len) /* length of extent */ 192 { 193 union xfs_btree_rec rec; 194 195 rec.alloc.ar_startblock = cpu_to_be32(bno); 196 rec.alloc.ar_blockcount = cpu_to_be32(len); 197 return xfs_btree_update(cur, &rec); 198 } 199 200 /* 201 * Get the data from the pointed-to record. 202 */ 203 int /* error */ 204 xfs_alloc_get_rec( 205 struct xfs_btree_cur *cur, /* btree cursor */ 206 xfs_agblock_t *bno, /* output: starting block of extent */ 207 xfs_extlen_t *len, /* output: length of extent */ 208 int *stat) /* output: success/failure */ 209 { 210 union xfs_btree_rec *rec; 211 int error; 212 213 error = xfs_btree_get_rec(cur, &rec, stat); 214 if (!error && *stat == 1) { 215 *bno = be32_to_cpu(rec->alloc.ar_startblock); 216 *len = be32_to_cpu(rec->alloc.ar_blockcount); 217 } 218 return error; 219 } 220 221 /* 222 * Compute aligned version of the found extent. 223 * Takes alignment and min length into account. 224 */ 225 STATIC bool 226 xfs_alloc_compute_aligned( 227 xfs_alloc_arg_t *args, /* allocation argument structure */ 228 xfs_agblock_t foundbno, /* starting block in found extent */ 229 xfs_extlen_t foundlen, /* length in found extent */ 230 xfs_agblock_t *resbno, /* result block number */ 231 xfs_extlen_t *reslen, /* result length */ 232 unsigned *busy_gen) 233 { 234 xfs_agblock_t bno = foundbno; 235 xfs_extlen_t len = foundlen; 236 xfs_extlen_t diff; 237 bool busy; 238 239 /* Trim busy sections out of found extent */ 240 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); 241 242 /* 243 * If we have a largish extent that happens to start before min_agbno, 244 * see if we can shift it into range... 245 */ 246 if (bno < args->min_agbno && bno + len > args->min_agbno) { 247 diff = args->min_agbno - bno; 248 if (len > diff) { 249 bno += diff; 250 len -= diff; 251 } 252 } 253 254 if (args->alignment > 1 && len >= args->minlen) { 255 xfs_agblock_t aligned_bno = roundup(bno, args->alignment); 256 257 diff = aligned_bno - bno; 258 259 *resbno = aligned_bno; 260 *reslen = diff >= len ? 0 : len - diff; 261 } else { 262 *resbno = bno; 263 *reslen = len; 264 } 265 266 return busy; 267 } 268 269 /* 270 * Compute best start block and diff for "near" allocations. 271 * freelen >= wantlen already checked by caller. 272 */ 273 STATIC xfs_extlen_t /* difference value (absolute) */ 274 xfs_alloc_compute_diff( 275 xfs_agblock_t wantbno, /* target starting block */ 276 xfs_extlen_t wantlen, /* target length */ 277 xfs_extlen_t alignment, /* target alignment */ 278 int datatype, /* are we allocating data? */ 279 xfs_agblock_t freebno, /* freespace's starting block */ 280 xfs_extlen_t freelen, /* freespace's length */ 281 xfs_agblock_t *newbnop) /* result: best start block from free */ 282 { 283 xfs_agblock_t freeend; /* end of freespace extent */ 284 xfs_agblock_t newbno1; /* return block number */ 285 xfs_agblock_t newbno2; /* other new block number */ 286 xfs_extlen_t newlen1=0; /* length with newbno1 */ 287 xfs_extlen_t newlen2=0; /* length with newbno2 */ 288 xfs_agblock_t wantend; /* end of target extent */ 289 bool userdata = xfs_alloc_is_userdata(datatype); 290 291 ASSERT(freelen >= wantlen); 292 freeend = freebno + freelen; 293 wantend = wantbno + wantlen; 294 /* 295 * We want to allocate from the start of a free extent if it is past 296 * the desired block or if we are allocating user data and the free 297 * extent is before desired block. The second case is there to allow 298 * for contiguous allocation from the remaining free space if the file 299 * grows in the short term. 300 */ 301 if (freebno >= wantbno || (userdata && freeend < wantend)) { 302 if ((newbno1 = roundup(freebno, alignment)) >= freeend) 303 newbno1 = NULLAGBLOCK; 304 } else if (freeend >= wantend && alignment > 1) { 305 newbno1 = roundup(wantbno, alignment); 306 newbno2 = newbno1 - alignment; 307 if (newbno1 >= freeend) 308 newbno1 = NULLAGBLOCK; 309 else 310 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); 311 if (newbno2 < freebno) 312 newbno2 = NULLAGBLOCK; 313 else 314 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); 315 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { 316 if (newlen1 < newlen2 || 317 (newlen1 == newlen2 && 318 XFS_ABSDIFF(newbno1, wantbno) > 319 XFS_ABSDIFF(newbno2, wantbno))) 320 newbno1 = newbno2; 321 } else if (newbno2 != NULLAGBLOCK) 322 newbno1 = newbno2; 323 } else if (freeend >= wantend) { 324 newbno1 = wantbno; 325 } else if (alignment > 1) { 326 newbno1 = roundup(freeend - wantlen, alignment); 327 if (newbno1 > freeend - wantlen && 328 newbno1 - alignment >= freebno) 329 newbno1 -= alignment; 330 else if (newbno1 >= freeend) 331 newbno1 = NULLAGBLOCK; 332 } else 333 newbno1 = freeend - wantlen; 334 *newbnop = newbno1; 335 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); 336 } 337 338 /* 339 * Fix up the length, based on mod and prod. 340 * len should be k * prod + mod for some k. 341 * If len is too small it is returned unchanged. 342 * If len hits maxlen it is left alone. 343 */ 344 STATIC void 345 xfs_alloc_fix_len( 346 xfs_alloc_arg_t *args) /* allocation argument structure */ 347 { 348 xfs_extlen_t k; 349 xfs_extlen_t rlen; 350 351 ASSERT(args->mod < args->prod); 352 rlen = args->len; 353 ASSERT(rlen >= args->minlen); 354 ASSERT(rlen <= args->maxlen); 355 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || 356 (args->mod == 0 && rlen < args->prod)) 357 return; 358 k = rlen % args->prod; 359 if (k == args->mod) 360 return; 361 if (k > args->mod) 362 rlen = rlen - (k - args->mod); 363 else 364 rlen = rlen - args->prod + (args->mod - k); 365 /* casts to (int) catch length underflows */ 366 if ((int)rlen < (int)args->minlen) 367 return; 368 ASSERT(rlen >= args->minlen && rlen <= args->maxlen); 369 ASSERT(rlen % args->prod == args->mod); 370 ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >= 371 rlen + args->minleft); 372 args->len = rlen; 373 } 374 375 /* 376 * Update the two btrees, logically removing from freespace the extent 377 * starting at rbno, rlen blocks. The extent is contained within the 378 * actual (current) free extent fbno for flen blocks. 379 * Flags are passed in indicating whether the cursors are set to the 380 * relevant records. 381 */ 382 STATIC int /* error code */ 383 xfs_alloc_fixup_trees( 384 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ 385 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ 386 xfs_agblock_t fbno, /* starting block of free extent */ 387 xfs_extlen_t flen, /* length of free extent */ 388 xfs_agblock_t rbno, /* starting block of returned extent */ 389 xfs_extlen_t rlen, /* length of returned extent */ 390 int flags) /* flags, XFSA_FIXUP_... */ 391 { 392 int error; /* error code */ 393 int i; /* operation results */ 394 xfs_agblock_t nfbno1; /* first new free startblock */ 395 xfs_agblock_t nfbno2; /* second new free startblock */ 396 xfs_extlen_t nflen1=0; /* first new free length */ 397 xfs_extlen_t nflen2=0; /* second new free length */ 398 struct xfs_mount *mp; 399 400 mp = cnt_cur->bc_mp; 401 402 /* 403 * Look up the record in the by-size tree if necessary. 404 */ 405 if (flags & XFSA_FIXUP_CNT_OK) { 406 #ifdef DEBUG 407 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 408 return error; 409 XFS_WANT_CORRUPTED_RETURN(mp, 410 i == 1 && nfbno1 == fbno && nflen1 == flen); 411 #endif 412 } else { 413 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 414 return error; 415 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 416 } 417 /* 418 * Look up the record in the by-block tree if necessary. 419 */ 420 if (flags & XFSA_FIXUP_BNO_OK) { 421 #ifdef DEBUG 422 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 423 return error; 424 XFS_WANT_CORRUPTED_RETURN(mp, 425 i == 1 && nfbno1 == fbno && nflen1 == flen); 426 #endif 427 } else { 428 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 429 return error; 430 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 431 } 432 433 #ifdef DEBUG 434 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { 435 struct xfs_btree_block *bnoblock; 436 struct xfs_btree_block *cntblock; 437 438 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 439 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 440 441 XFS_WANT_CORRUPTED_RETURN(mp, 442 bnoblock->bb_numrecs == cntblock->bb_numrecs); 443 } 444 #endif 445 446 /* 447 * Deal with all four cases: the allocated record is contained 448 * within the freespace record, so we can have new freespace 449 * at either (or both) end, or no freespace remaining. 450 */ 451 if (rbno == fbno && rlen == flen) 452 nfbno1 = nfbno2 = NULLAGBLOCK; 453 else if (rbno == fbno) { 454 nfbno1 = rbno + rlen; 455 nflen1 = flen - rlen; 456 nfbno2 = NULLAGBLOCK; 457 } else if (rbno + rlen == fbno + flen) { 458 nfbno1 = fbno; 459 nflen1 = flen - rlen; 460 nfbno2 = NULLAGBLOCK; 461 } else { 462 nfbno1 = fbno; 463 nflen1 = rbno - fbno; 464 nfbno2 = rbno + rlen; 465 nflen2 = (fbno + flen) - nfbno2; 466 } 467 /* 468 * Delete the entry from the by-size btree. 469 */ 470 if ((error = xfs_btree_delete(cnt_cur, &i))) 471 return error; 472 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 473 /* 474 * Add new by-size btree entry(s). 475 */ 476 if (nfbno1 != NULLAGBLOCK) { 477 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 478 return error; 479 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 480 if ((error = xfs_btree_insert(cnt_cur, &i))) 481 return error; 482 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 483 } 484 if (nfbno2 != NULLAGBLOCK) { 485 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 486 return error; 487 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 488 if ((error = xfs_btree_insert(cnt_cur, &i))) 489 return error; 490 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 491 } 492 /* 493 * Fix up the by-block btree entry(s). 494 */ 495 if (nfbno1 == NULLAGBLOCK) { 496 /* 497 * No remaining freespace, just delete the by-block tree entry. 498 */ 499 if ((error = xfs_btree_delete(bno_cur, &i))) 500 return error; 501 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 502 } else { 503 /* 504 * Update the by-block entry to start later|be shorter. 505 */ 506 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) 507 return error; 508 } 509 if (nfbno2 != NULLAGBLOCK) { 510 /* 511 * 2 resulting free entries, need to add one. 512 */ 513 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 514 return error; 515 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 516 if ((error = xfs_btree_insert(bno_cur, &i))) 517 return error; 518 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 519 } 520 return 0; 521 } 522 523 static bool 524 xfs_agfl_verify( 525 struct xfs_buf *bp) 526 { 527 struct xfs_mount *mp = bp->b_target->bt_mount; 528 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 529 int i; 530 531 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) 532 return false; 533 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) 534 return false; 535 /* 536 * during growfs operations, the perag is not fully initialised, 537 * so we can't use it for any useful checking. growfs ensures we can't 538 * use it by using uncached buffers that don't have the perag attached 539 * so we can detect and avoid this problem. 540 */ 541 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 542 return false; 543 544 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { 545 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 546 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 547 return false; 548 } 549 550 return xfs_log_check_lsn(mp, 551 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)); 552 } 553 554 static void 555 xfs_agfl_read_verify( 556 struct xfs_buf *bp) 557 { 558 struct xfs_mount *mp = bp->b_target->bt_mount; 559 560 /* 561 * There is no verification of non-crc AGFLs because mkfs does not 562 * initialise the AGFL to zero or NULL. Hence the only valid part of the 563 * AGFL is what the AGF says is active. We can't get to the AGF, so we 564 * can't verify just those entries are valid. 565 */ 566 if (!xfs_sb_version_hascrc(&mp->m_sb)) 567 return; 568 569 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 570 xfs_buf_ioerror(bp, -EFSBADCRC); 571 else if (!xfs_agfl_verify(bp)) 572 xfs_buf_ioerror(bp, -EFSCORRUPTED); 573 574 if (bp->b_error) 575 xfs_verifier_error(bp); 576 } 577 578 static void 579 xfs_agfl_write_verify( 580 struct xfs_buf *bp) 581 { 582 struct xfs_mount *mp = bp->b_target->bt_mount; 583 struct xfs_buf_log_item *bip = bp->b_fspriv; 584 585 /* no verification of non-crc AGFLs */ 586 if (!xfs_sb_version_hascrc(&mp->m_sb)) 587 return; 588 589 if (!xfs_agfl_verify(bp)) { 590 xfs_buf_ioerror(bp, -EFSCORRUPTED); 591 xfs_verifier_error(bp); 592 return; 593 } 594 595 if (bip) 596 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 597 598 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); 599 } 600 601 const struct xfs_buf_ops xfs_agfl_buf_ops = { 602 .name = "xfs_agfl", 603 .verify_read = xfs_agfl_read_verify, 604 .verify_write = xfs_agfl_write_verify, 605 }; 606 607 /* 608 * Read in the allocation group free block array. 609 */ 610 int /* error */ 611 xfs_alloc_read_agfl( 612 xfs_mount_t *mp, /* mount point structure */ 613 xfs_trans_t *tp, /* transaction pointer */ 614 xfs_agnumber_t agno, /* allocation group number */ 615 xfs_buf_t **bpp) /* buffer for the ag free block array */ 616 { 617 xfs_buf_t *bp; /* return value */ 618 int error; 619 620 ASSERT(agno != NULLAGNUMBER); 621 error = xfs_trans_read_buf( 622 mp, tp, mp->m_ddev_targp, 623 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), 624 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); 625 if (error) 626 return error; 627 xfs_buf_set_ref(bp, XFS_AGFL_REF); 628 *bpp = bp; 629 return 0; 630 } 631 632 STATIC int 633 xfs_alloc_update_counters( 634 struct xfs_trans *tp, 635 struct xfs_perag *pag, 636 struct xfs_buf *agbp, 637 long len) 638 { 639 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 640 641 pag->pagf_freeblks += len; 642 be32_add_cpu(&agf->agf_freeblks, len); 643 644 xfs_trans_agblocks_delta(tp, len); 645 if (unlikely(be32_to_cpu(agf->agf_freeblks) > 646 be32_to_cpu(agf->agf_length))) 647 return -EFSCORRUPTED; 648 649 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 650 return 0; 651 } 652 653 /* 654 * Allocation group level functions. 655 */ 656 657 /* 658 * Allocate a variable extent in the allocation group agno. 659 * Type and bno are used to determine where in the allocation group the 660 * extent will start. 661 * Extent's length (returned in *len) will be between minlen and maxlen, 662 * and of the form k * prod + mod unless there's nothing that large. 663 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 664 */ 665 STATIC int /* error */ 666 xfs_alloc_ag_vextent( 667 xfs_alloc_arg_t *args) /* argument structure for allocation */ 668 { 669 int error=0; 670 671 ASSERT(args->minlen > 0); 672 ASSERT(args->maxlen > 0); 673 ASSERT(args->minlen <= args->maxlen); 674 ASSERT(args->mod < args->prod); 675 ASSERT(args->alignment > 0); 676 677 /* 678 * Branch to correct routine based on the type. 679 */ 680 args->wasfromfl = 0; 681 switch (args->type) { 682 case XFS_ALLOCTYPE_THIS_AG: 683 error = xfs_alloc_ag_vextent_size(args); 684 break; 685 case XFS_ALLOCTYPE_NEAR_BNO: 686 error = xfs_alloc_ag_vextent_near(args); 687 break; 688 case XFS_ALLOCTYPE_THIS_BNO: 689 error = xfs_alloc_ag_vextent_exact(args); 690 break; 691 default: 692 ASSERT(0); 693 /* NOTREACHED */ 694 } 695 696 if (error || args->agbno == NULLAGBLOCK) 697 return error; 698 699 ASSERT(args->len >= args->minlen); 700 ASSERT(args->len <= args->maxlen); 701 ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL); 702 ASSERT(args->agbno % args->alignment == 0); 703 704 /* if not file data, insert new block into the reverse map btree */ 705 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { 706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 707 args->agbno, args->len, &args->oinfo); 708 if (error) 709 return error; 710 } 711 712 if (!args->wasfromfl) { 713 error = xfs_alloc_update_counters(args->tp, args->pag, 714 args->agbp, 715 -((long)(args->len))); 716 if (error) 717 return error; 718 719 ASSERT(!xfs_extent_busy_search(args->mp, args->agno, 720 args->agbno, args->len)); 721 } 722 723 xfs_ag_resv_alloc_extent(args->pag, args->resv, args); 724 725 XFS_STATS_INC(args->mp, xs_allocx); 726 XFS_STATS_ADD(args->mp, xs_allocb, args->len); 727 return error; 728 } 729 730 /* 731 * Allocate a variable extent at exactly agno/bno. 732 * Extent's length (returned in *len) will be between minlen and maxlen, 733 * and of the form k * prod + mod unless there's nothing that large. 734 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. 735 */ 736 STATIC int /* error */ 737 xfs_alloc_ag_vextent_exact( 738 xfs_alloc_arg_t *args) /* allocation argument structure */ 739 { 740 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 741 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 742 int error; 743 xfs_agblock_t fbno; /* start block of found extent */ 744 xfs_extlen_t flen; /* length of found extent */ 745 xfs_agblock_t tbno; /* start block of busy extent */ 746 xfs_extlen_t tlen; /* length of busy extent */ 747 xfs_agblock_t tend; /* end block of busy extent */ 748 int i; /* success/failure of operation */ 749 unsigned busy_gen; 750 751 ASSERT(args->alignment == 1); 752 753 /* 754 * Allocate/initialize a cursor for the by-number freespace btree. 755 */ 756 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 757 args->agno, XFS_BTNUM_BNO); 758 759 /* 760 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 761 * Look for the closest free block <= bno, it must contain bno 762 * if any free block does. 763 */ 764 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); 765 if (error) 766 goto error0; 767 if (!i) 768 goto not_found; 769 770 /* 771 * Grab the freespace record. 772 */ 773 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); 774 if (error) 775 goto error0; 776 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 777 ASSERT(fbno <= args->agbno); 778 779 /* 780 * Check for overlapping busy extents. 781 */ 782 tbno = fbno; 783 tlen = flen; 784 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); 785 786 /* 787 * Give up if the start of the extent is busy, or the freespace isn't 788 * long enough for the minimum request. 789 */ 790 if (tbno > args->agbno) 791 goto not_found; 792 if (tlen < args->minlen) 793 goto not_found; 794 tend = tbno + tlen; 795 if (tend < args->agbno + args->minlen) 796 goto not_found; 797 798 /* 799 * End of extent will be smaller of the freespace end and the 800 * maximal requested end. 801 * 802 * Fix the length according to mod and prod if given. 803 */ 804 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) 805 - args->agbno; 806 xfs_alloc_fix_len(args); 807 ASSERT(args->agbno + args->len <= tend); 808 809 /* 810 * We are allocating agbno for args->len 811 * Allocate/initialize a cursor for the by-size btree. 812 */ 813 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 814 args->agno, XFS_BTNUM_CNT); 815 ASSERT(args->agbno + args->len <= 816 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 817 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, 818 args->len, XFSA_FIXUP_BNO_OK); 819 if (error) { 820 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 821 goto error0; 822 } 823 824 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 825 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 826 827 args->wasfromfl = 0; 828 trace_xfs_alloc_exact_done(args); 829 return 0; 830 831 not_found: 832 /* Didn't find it, return null. */ 833 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 834 args->agbno = NULLAGBLOCK; 835 trace_xfs_alloc_exact_notfound(args); 836 return 0; 837 838 error0: 839 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 840 trace_xfs_alloc_exact_error(args); 841 return error; 842 } 843 844 /* 845 * Search the btree in a given direction via the search cursor and compare 846 * the records found against the good extent we've already found. 847 */ 848 STATIC int 849 xfs_alloc_find_best_extent( 850 struct xfs_alloc_arg *args, /* allocation argument structure */ 851 struct xfs_btree_cur **gcur, /* good cursor */ 852 struct xfs_btree_cur **scur, /* searching cursor */ 853 xfs_agblock_t gdiff, /* difference for search comparison */ 854 xfs_agblock_t *sbno, /* extent found by search */ 855 xfs_extlen_t *slen, /* extent length */ 856 xfs_agblock_t *sbnoa, /* aligned extent found by search */ 857 xfs_extlen_t *slena, /* aligned extent length */ 858 int dir) /* 0 = search right, 1 = search left */ 859 { 860 xfs_agblock_t new; 861 xfs_agblock_t sdiff; 862 int error; 863 int i; 864 unsigned busy_gen; 865 866 /* The good extent is perfect, no need to search. */ 867 if (!gdiff) 868 goto out_use_good; 869 870 /* 871 * Look until we find a better one, run out of space or run off the end. 872 */ 873 do { 874 error = xfs_alloc_get_rec(*scur, sbno, slen, &i); 875 if (error) 876 goto error0; 877 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 878 xfs_alloc_compute_aligned(args, *sbno, *slen, 879 sbnoa, slena, &busy_gen); 880 881 /* 882 * The good extent is closer than this one. 883 */ 884 if (!dir) { 885 if (*sbnoa > args->max_agbno) 886 goto out_use_good; 887 if (*sbnoa >= args->agbno + gdiff) 888 goto out_use_good; 889 } else { 890 if (*sbnoa < args->min_agbno) 891 goto out_use_good; 892 if (*sbnoa <= args->agbno - gdiff) 893 goto out_use_good; 894 } 895 896 /* 897 * Same distance, compare length and pick the best. 898 */ 899 if (*slena >= args->minlen) { 900 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); 901 xfs_alloc_fix_len(args); 902 903 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 904 args->alignment, 905 args->datatype, *sbnoa, 906 *slena, &new); 907 908 /* 909 * Choose closer size and invalidate other cursor. 910 */ 911 if (sdiff < gdiff) 912 goto out_use_search; 913 goto out_use_good; 914 } 915 916 if (!dir) 917 error = xfs_btree_increment(*scur, 0, &i); 918 else 919 error = xfs_btree_decrement(*scur, 0, &i); 920 if (error) 921 goto error0; 922 } while (i); 923 924 out_use_good: 925 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); 926 *scur = NULL; 927 return 0; 928 929 out_use_search: 930 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); 931 *gcur = NULL; 932 return 0; 933 934 error0: 935 /* caller invalidates cursors */ 936 return error; 937 } 938 939 /* 940 * Allocate a variable extent near bno in the allocation group agno. 941 * Extent's length (returned in len) will be between minlen and maxlen, 942 * and of the form k * prod + mod unless there's nothing that large. 943 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 944 */ 945 STATIC int /* error */ 946 xfs_alloc_ag_vextent_near( 947 xfs_alloc_arg_t *args) /* allocation argument structure */ 948 { 949 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 950 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 951 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 952 xfs_agblock_t gtbno; /* start bno of right side entry */ 953 xfs_agblock_t gtbnoa; /* aligned ... */ 954 xfs_extlen_t gtdiff; /* difference to right side entry */ 955 xfs_extlen_t gtlen; /* length of right side entry */ 956 xfs_extlen_t gtlena; /* aligned ... */ 957 xfs_agblock_t gtnew; /* useful start bno of right side */ 958 int error; /* error code */ 959 int i; /* result code, temporary */ 960 int j; /* result code, temporary */ 961 xfs_agblock_t ltbno; /* start bno of left side entry */ 962 xfs_agblock_t ltbnoa; /* aligned ... */ 963 xfs_extlen_t ltdiff; /* difference to left side entry */ 964 xfs_extlen_t ltlen; /* length of left side entry */ 965 xfs_extlen_t ltlena; /* aligned ... */ 966 xfs_agblock_t ltnew; /* useful start bno of left side */ 967 xfs_extlen_t rlen; /* length of returned extent */ 968 bool busy; 969 unsigned busy_gen; 970 #ifdef DEBUG 971 /* 972 * Randomly don't execute the first algorithm. 973 */ 974 int dofirst; /* set to do first algorithm */ 975 976 dofirst = prandom_u32() & 1; 977 #endif 978 979 /* handle unitialized agbno range so caller doesn't have to */ 980 if (!args->min_agbno && !args->max_agbno) 981 args->max_agbno = args->mp->m_sb.sb_agblocks - 1; 982 ASSERT(args->min_agbno <= args->max_agbno); 983 984 /* clamp agbno to the range if it's outside */ 985 if (args->agbno < args->min_agbno) 986 args->agbno = args->min_agbno; 987 if (args->agbno > args->max_agbno) 988 args->agbno = args->max_agbno; 989 990 restart: 991 bno_cur_lt = NULL; 992 bno_cur_gt = NULL; 993 ltlen = 0; 994 gtlena = 0; 995 ltlena = 0; 996 busy = false; 997 998 /* 999 * Get a cursor for the by-size btree. 1000 */ 1001 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1002 args->agno, XFS_BTNUM_CNT); 1003 1004 /* 1005 * See if there are any free extents as big as maxlen. 1006 */ 1007 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) 1008 goto error0; 1009 /* 1010 * If none, then pick up the last entry in the tree unless the 1011 * tree is empty. 1012 */ 1013 if (!i) { 1014 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, 1015 <len, &i))) 1016 goto error0; 1017 if (i == 0 || ltlen == 0) { 1018 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1019 trace_xfs_alloc_near_noentry(args); 1020 return 0; 1021 } 1022 ASSERT(i == 1); 1023 } 1024 args->wasfromfl = 0; 1025 1026 /* 1027 * First algorithm. 1028 * If the requested extent is large wrt the freespaces available 1029 * in this a.g., then the cursor will be pointing to a btree entry 1030 * near the right edge of the tree. If it's in the last btree leaf 1031 * block, then we just examine all the entries in that block 1032 * that are big enough, and pick the best one. 1033 * This is written as a while loop so we can break out of it, 1034 * but we never loop back to the top. 1035 */ 1036 while (xfs_btree_islastblock(cnt_cur, 0)) { 1037 xfs_extlen_t bdiff; 1038 int besti=0; 1039 xfs_extlen_t blen=0; 1040 xfs_agblock_t bnew=0; 1041 1042 #ifdef DEBUG 1043 if (dofirst) 1044 break; 1045 #endif 1046 /* 1047 * Start from the entry that lookup found, sequence through 1048 * all larger free blocks. If we're actually pointing at a 1049 * record smaller than maxlen, go to the start of this block, 1050 * and skip all those smaller than minlen. 1051 */ 1052 if (ltlen || args->alignment > 1) { 1053 cnt_cur->bc_ptrs[0] = 1; 1054 do { 1055 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, 1056 <len, &i))) 1057 goto error0; 1058 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1059 if (ltlen >= args->minlen) 1060 break; 1061 if ((error = xfs_btree_increment(cnt_cur, 0, &i))) 1062 goto error0; 1063 } while (i); 1064 ASSERT(ltlen >= args->minlen); 1065 if (!i) 1066 break; 1067 } 1068 i = cnt_cur->bc_ptrs[0]; 1069 for (j = 1, blen = 0, bdiff = 0; 1070 !error && j && (blen < args->maxlen || bdiff > 0); 1071 error = xfs_btree_increment(cnt_cur, 0, &j)) { 1072 /* 1073 * For each entry, decide if it's better than 1074 * the previous best entry. 1075 */ 1076 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1077 goto error0; 1078 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1079 busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, 1080 <bnoa, <lena, &busy_gen); 1081 if (ltlena < args->minlen) 1082 continue; 1083 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) 1084 continue; 1085 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1086 xfs_alloc_fix_len(args); 1087 ASSERT(args->len >= args->minlen); 1088 if (args->len < blen) 1089 continue; 1090 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1091 args->alignment, args->datatype, ltbnoa, 1092 ltlena, <new); 1093 if (ltnew != NULLAGBLOCK && 1094 (args->len > blen || ltdiff < bdiff)) { 1095 bdiff = ltdiff; 1096 bnew = ltnew; 1097 blen = args->len; 1098 besti = cnt_cur->bc_ptrs[0]; 1099 } 1100 } 1101 /* 1102 * It didn't work. We COULD be in a case where 1103 * there's a good record somewhere, so try again. 1104 */ 1105 if (blen == 0) 1106 break; 1107 /* 1108 * Point at the best entry, and retrieve it again. 1109 */ 1110 cnt_cur->bc_ptrs[0] = besti; 1111 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1112 goto error0; 1113 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1114 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1115 args->len = blen; 1116 1117 /* 1118 * We are allocating starting at bnew for blen blocks. 1119 */ 1120 args->agbno = bnew; 1121 ASSERT(bnew >= ltbno); 1122 ASSERT(bnew + blen <= ltbno + ltlen); 1123 /* 1124 * Set up a cursor for the by-bno tree. 1125 */ 1126 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, 1127 args->agbp, args->agno, XFS_BTNUM_BNO); 1128 /* 1129 * Fix up the btree entries. 1130 */ 1131 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, 1132 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) 1133 goto error0; 1134 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1135 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1136 1137 trace_xfs_alloc_near_first(args); 1138 return 0; 1139 } 1140 /* 1141 * Second algorithm. 1142 * Search in the by-bno tree to the left and to the right 1143 * simultaneously, until in each case we find a space big enough, 1144 * or run into the edge of the tree. When we run into the edge, 1145 * we deallocate that cursor. 1146 * If both searches succeed, we compare the two spaces and pick 1147 * the better one. 1148 * With alignment, it's possible for both to fail; the upper 1149 * level algorithm that picks allocation groups for allocations 1150 * is not supposed to do this. 1151 */ 1152 /* 1153 * Allocate and initialize the cursor for the leftward search. 1154 */ 1155 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1156 args->agno, XFS_BTNUM_BNO); 1157 /* 1158 * Lookup <= bno to find the leftward search's starting point. 1159 */ 1160 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) 1161 goto error0; 1162 if (!i) { 1163 /* 1164 * Didn't find anything; use this cursor for the rightward 1165 * search. 1166 */ 1167 bno_cur_gt = bno_cur_lt; 1168 bno_cur_lt = NULL; 1169 } 1170 /* 1171 * Found something. Duplicate the cursor for the rightward search. 1172 */ 1173 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) 1174 goto error0; 1175 /* 1176 * Increment the cursor, so we will point at the entry just right 1177 * of the leftward entry if any, or to the leftmost entry. 1178 */ 1179 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1180 goto error0; 1181 if (!i) { 1182 /* 1183 * It failed, there are no rightward entries. 1184 */ 1185 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); 1186 bno_cur_gt = NULL; 1187 } 1188 /* 1189 * Loop going left with the leftward cursor, right with the 1190 * rightward cursor, until either both directions give up or 1191 * we find an entry at least as big as minlen. 1192 */ 1193 do { 1194 if (bno_cur_lt) { 1195 if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) 1196 goto error0; 1197 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1198 busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, 1199 <bnoa, <lena, &busy_gen); 1200 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) 1201 break; 1202 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1203 goto error0; 1204 if (!i || ltbnoa < args->min_agbno) { 1205 xfs_btree_del_cursor(bno_cur_lt, 1206 XFS_BTREE_NOERROR); 1207 bno_cur_lt = NULL; 1208 } 1209 } 1210 if (bno_cur_gt) { 1211 if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) 1212 goto error0; 1213 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1214 busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, 1215 >bnoa, >lena, &busy_gen); 1216 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) 1217 break; 1218 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1219 goto error0; 1220 if (!i || gtbnoa > args->max_agbno) { 1221 xfs_btree_del_cursor(bno_cur_gt, 1222 XFS_BTREE_NOERROR); 1223 bno_cur_gt = NULL; 1224 } 1225 } 1226 } while (bno_cur_lt || bno_cur_gt); 1227 1228 /* 1229 * Got both cursors still active, need to find better entry. 1230 */ 1231 if (bno_cur_lt && bno_cur_gt) { 1232 if (ltlena >= args->minlen) { 1233 /* 1234 * Left side is good, look for a right side entry. 1235 */ 1236 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1237 xfs_alloc_fix_len(args); 1238 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1239 args->alignment, args->datatype, ltbnoa, 1240 ltlena, <new); 1241 1242 error = xfs_alloc_find_best_extent(args, 1243 &bno_cur_lt, &bno_cur_gt, 1244 ltdiff, >bno, >len, 1245 >bnoa, >lena, 1246 0 /* search right */); 1247 } else { 1248 ASSERT(gtlena >= args->minlen); 1249 1250 /* 1251 * Right side is good, look for a left side entry. 1252 */ 1253 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1254 xfs_alloc_fix_len(args); 1255 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1256 args->alignment, args->datatype, gtbnoa, 1257 gtlena, >new); 1258 1259 error = xfs_alloc_find_best_extent(args, 1260 &bno_cur_gt, &bno_cur_lt, 1261 gtdiff, <bno, <len, 1262 <bnoa, <lena, 1263 1 /* search left */); 1264 } 1265 1266 if (error) 1267 goto error0; 1268 } 1269 1270 /* 1271 * If we couldn't get anything, give up. 1272 */ 1273 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1274 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1275 1276 if (busy) { 1277 trace_xfs_alloc_near_busy(args); 1278 xfs_extent_busy_flush(args->mp, args->pag, busy_gen); 1279 goto restart; 1280 } 1281 trace_xfs_alloc_size_neither(args); 1282 args->agbno = NULLAGBLOCK; 1283 return 0; 1284 } 1285 1286 /* 1287 * At this point we have selected a freespace entry, either to the 1288 * left or to the right. If it's on the right, copy all the 1289 * useful variables to the "left" set so we only have one 1290 * copy of this code. 1291 */ 1292 if (bno_cur_gt) { 1293 bno_cur_lt = bno_cur_gt; 1294 bno_cur_gt = NULL; 1295 ltbno = gtbno; 1296 ltbnoa = gtbnoa; 1297 ltlen = gtlen; 1298 ltlena = gtlena; 1299 j = 1; 1300 } else 1301 j = 0; 1302 1303 /* 1304 * Fix up the length and compute the useful address. 1305 */ 1306 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1307 xfs_alloc_fix_len(args); 1308 rlen = args->len; 1309 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, 1310 args->datatype, ltbnoa, ltlena, <new); 1311 ASSERT(ltnew >= ltbno); 1312 ASSERT(ltnew + rlen <= ltbnoa + ltlena); 1313 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1314 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno); 1315 args->agbno = ltnew; 1316 1317 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1318 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1319 goto error0; 1320 1321 if (j) 1322 trace_xfs_alloc_near_greater(args); 1323 else 1324 trace_xfs_alloc_near_lesser(args); 1325 1326 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1327 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1328 return 0; 1329 1330 error0: 1331 trace_xfs_alloc_near_error(args); 1332 if (cnt_cur != NULL) 1333 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1334 if (bno_cur_lt != NULL) 1335 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); 1336 if (bno_cur_gt != NULL) 1337 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); 1338 return error; 1339 } 1340 1341 /* 1342 * Allocate a variable extent anywhere in the allocation group agno. 1343 * Extent's length (returned in len) will be between minlen and maxlen, 1344 * and of the form k * prod + mod unless there's nothing that large. 1345 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1346 */ 1347 STATIC int /* error */ 1348 xfs_alloc_ag_vextent_size( 1349 xfs_alloc_arg_t *args) /* allocation argument structure */ 1350 { 1351 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ 1352 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ 1353 int error; /* error result */ 1354 xfs_agblock_t fbno; /* start of found freespace */ 1355 xfs_extlen_t flen; /* length of found freespace */ 1356 int i; /* temp status variable */ 1357 xfs_agblock_t rbno; /* returned block number */ 1358 xfs_extlen_t rlen; /* length of returned extent */ 1359 bool busy; 1360 unsigned busy_gen; 1361 1362 restart: 1363 /* 1364 * Allocate and initialize a cursor for the by-size btree. 1365 */ 1366 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1367 args->agno, XFS_BTNUM_CNT); 1368 bno_cur = NULL; 1369 busy = false; 1370 1371 /* 1372 * Look for an entry >= maxlen+alignment-1 blocks. 1373 */ 1374 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1375 args->maxlen + args->alignment - 1, &i))) 1376 goto error0; 1377 1378 /* 1379 * If none then we have to settle for a smaller extent. In the case that 1380 * there are no large extents, this will return the last entry in the 1381 * tree unless the tree is empty. In the case that there are only busy 1382 * large extents, this will return the largest small extent unless there 1383 * are no smaller extents available. 1384 */ 1385 if (!i) { 1386 error = xfs_alloc_ag_vextent_small(args, cnt_cur, 1387 &fbno, &flen, &i); 1388 if (error) 1389 goto error0; 1390 if (i == 0 || flen == 0) { 1391 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1392 trace_xfs_alloc_size_noentry(args); 1393 return 0; 1394 } 1395 ASSERT(i == 1); 1396 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, 1397 &rlen, &busy_gen); 1398 } else { 1399 /* 1400 * Search for a non-busy extent that is large enough. 1401 */ 1402 for (;;) { 1403 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1404 if (error) 1405 goto error0; 1406 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1407 1408 busy = xfs_alloc_compute_aligned(args, fbno, flen, 1409 &rbno, &rlen, &busy_gen); 1410 1411 if (rlen >= args->maxlen) 1412 break; 1413 1414 error = xfs_btree_increment(cnt_cur, 0, &i); 1415 if (error) 1416 goto error0; 1417 if (i == 0) { 1418 /* 1419 * Our only valid extents must have been busy. 1420 * Make it unbusy by forcing the log out and 1421 * retrying. 1422 */ 1423 xfs_btree_del_cursor(cnt_cur, 1424 XFS_BTREE_NOERROR); 1425 trace_xfs_alloc_size_busy(args); 1426 xfs_extent_busy_flush(args->mp, 1427 args->pag, busy_gen); 1428 goto restart; 1429 } 1430 } 1431 } 1432 1433 /* 1434 * In the first case above, we got the last entry in the 1435 * by-size btree. Now we check to see if the space hits maxlen 1436 * once aligned; if not, we search left for something better. 1437 * This can't happen in the second case above. 1438 */ 1439 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1440 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1441 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1442 if (rlen < args->maxlen) { 1443 xfs_agblock_t bestfbno; 1444 xfs_extlen_t bestflen; 1445 xfs_agblock_t bestrbno; 1446 xfs_extlen_t bestrlen; 1447 1448 bestrlen = rlen; 1449 bestrbno = rbno; 1450 bestflen = flen; 1451 bestfbno = fbno; 1452 for (;;) { 1453 if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) 1454 goto error0; 1455 if (i == 0) 1456 break; 1457 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1458 &i))) 1459 goto error0; 1460 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1461 if (flen < bestrlen) 1462 break; 1463 busy = xfs_alloc_compute_aligned(args, fbno, flen, 1464 &rbno, &rlen, &busy_gen); 1465 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1466 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1467 (rlen <= flen && rbno + rlen <= fbno + flen), 1468 error0); 1469 if (rlen > bestrlen) { 1470 bestrlen = rlen; 1471 bestrbno = rbno; 1472 bestflen = flen; 1473 bestfbno = fbno; 1474 if (rlen == args->maxlen) 1475 break; 1476 } 1477 } 1478 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1479 &i))) 1480 goto error0; 1481 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1482 rlen = bestrlen; 1483 rbno = bestrbno; 1484 flen = bestflen; 1485 fbno = bestfbno; 1486 } 1487 args->wasfromfl = 0; 1488 /* 1489 * Fix up the length. 1490 */ 1491 args->len = rlen; 1492 if (rlen < args->minlen) { 1493 if (busy) { 1494 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1495 trace_xfs_alloc_size_busy(args); 1496 xfs_extent_busy_flush(args->mp, args->pag, busy_gen); 1497 goto restart; 1498 } 1499 goto out_nominleft; 1500 } 1501 xfs_alloc_fix_len(args); 1502 1503 rlen = args->len; 1504 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); 1505 /* 1506 * Allocate and initialize a cursor for the by-block tree. 1507 */ 1508 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1509 args->agno, XFS_BTNUM_BNO); 1510 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1511 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1512 goto error0; 1513 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1514 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1515 cnt_cur = bno_cur = NULL; 1516 args->len = rlen; 1517 args->agbno = rbno; 1518 XFS_WANT_CORRUPTED_GOTO(args->mp, 1519 args->agbno + args->len <= 1520 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1521 error0); 1522 trace_xfs_alloc_size_done(args); 1523 return 0; 1524 1525 error0: 1526 trace_xfs_alloc_size_error(args); 1527 if (cnt_cur) 1528 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1529 if (bno_cur) 1530 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1531 return error; 1532 1533 out_nominleft: 1534 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1535 trace_xfs_alloc_size_nominleft(args); 1536 args->agbno = NULLAGBLOCK; 1537 return 0; 1538 } 1539 1540 /* 1541 * Deal with the case where only small freespaces remain. 1542 * Either return the contents of the last freespace record, 1543 * or allocate space from the freelist if there is nothing in the tree. 1544 */ 1545 STATIC int /* error */ 1546 xfs_alloc_ag_vextent_small( 1547 xfs_alloc_arg_t *args, /* allocation argument structure */ 1548 xfs_btree_cur_t *ccur, /* by-size cursor */ 1549 xfs_agblock_t *fbnop, /* result block number */ 1550 xfs_extlen_t *flenp, /* result length */ 1551 int *stat) /* status: 0-freelist, 1-normal/none */ 1552 { 1553 struct xfs_owner_info oinfo; 1554 struct xfs_perag *pag; 1555 int error; 1556 xfs_agblock_t fbno; 1557 xfs_extlen_t flen; 1558 int i; 1559 1560 if ((error = xfs_btree_decrement(ccur, 0, &i))) 1561 goto error0; 1562 if (i) { 1563 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1564 goto error0; 1565 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1566 } 1567 /* 1568 * Nothing in the btree, try the freelist. Make sure 1569 * to respect minleft even when pulling from the 1570 * freelist. 1571 */ 1572 else if (args->minlen == 1 && args->alignment == 1 && 1573 args->resv != XFS_AG_RESV_AGFL && 1574 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1575 > args->minleft)) { 1576 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1577 if (error) 1578 goto error0; 1579 if (fbno != NULLAGBLOCK) { 1580 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 1581 xfs_alloc_allow_busy_reuse(args->datatype)); 1582 1583 if (xfs_alloc_is_userdata(args->datatype)) { 1584 xfs_buf_t *bp; 1585 1586 bp = xfs_btree_get_bufs(args->mp, args->tp, 1587 args->agno, fbno, 0); 1588 if (!bp) { 1589 error = -EFSCORRUPTED; 1590 goto error0; 1591 } 1592 xfs_trans_binval(args->tp, bp); 1593 } 1594 args->len = 1; 1595 args->agbno = fbno; 1596 XFS_WANT_CORRUPTED_GOTO(args->mp, 1597 args->agbno + args->len <= 1598 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1599 error0); 1600 args->wasfromfl = 1; 1601 trace_xfs_alloc_small_freelist(args); 1602 1603 /* 1604 * If we're feeding an AGFL block to something that 1605 * doesn't live in the free space, we need to clear 1606 * out the OWN_AG rmap and add the block back to 1607 * the AGFL per-AG reservation. 1608 */ 1609 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); 1610 error = xfs_rmap_free(args->tp, args->agbp, args->agno, 1611 fbno, 1, &oinfo); 1612 if (error) 1613 goto error0; 1614 pag = xfs_perag_get(args->mp, args->agno); 1615 xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL, 1616 args->tp, 1); 1617 xfs_perag_put(pag); 1618 1619 *stat = 0; 1620 return 0; 1621 } 1622 /* 1623 * Nothing in the freelist. 1624 */ 1625 else 1626 flen = 0; 1627 } 1628 /* 1629 * Can't allocate from the freelist for some reason. 1630 */ 1631 else { 1632 fbno = NULLAGBLOCK; 1633 flen = 0; 1634 } 1635 /* 1636 * Can't do the allocation, give up. 1637 */ 1638 if (flen < args->minlen) { 1639 args->agbno = NULLAGBLOCK; 1640 trace_xfs_alloc_small_notenough(args); 1641 flen = 0; 1642 } 1643 *fbnop = fbno; 1644 *flenp = flen; 1645 *stat = 1; 1646 trace_xfs_alloc_small_done(args); 1647 return 0; 1648 1649 error0: 1650 trace_xfs_alloc_small_error(args); 1651 return error; 1652 } 1653 1654 /* 1655 * Free the extent starting at agno/bno for length. 1656 */ 1657 STATIC int 1658 xfs_free_ag_extent( 1659 xfs_trans_t *tp, 1660 xfs_buf_t *agbp, 1661 xfs_agnumber_t agno, 1662 xfs_agblock_t bno, 1663 xfs_extlen_t len, 1664 struct xfs_owner_info *oinfo, 1665 enum xfs_ag_resv_type type) 1666 { 1667 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1668 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1669 int error; /* error return value */ 1670 xfs_agblock_t gtbno; /* start of right neighbor block */ 1671 xfs_extlen_t gtlen; /* length of right neighbor block */ 1672 int haveleft; /* have a left neighbor block */ 1673 int haveright; /* have a right neighbor block */ 1674 int i; /* temp, result code */ 1675 xfs_agblock_t ltbno; /* start of left neighbor block */ 1676 xfs_extlen_t ltlen; /* length of left neighbor block */ 1677 xfs_mount_t *mp; /* mount point struct for filesystem */ 1678 xfs_agblock_t nbno; /* new starting block of freespace */ 1679 xfs_extlen_t nlen; /* new length of freespace */ 1680 xfs_perag_t *pag; /* per allocation group data */ 1681 1682 bno_cur = cnt_cur = NULL; 1683 mp = tp->t_mountp; 1684 1685 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { 1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1687 if (error) 1688 goto error0; 1689 } 1690 1691 /* 1692 * Allocate and initialize a cursor for the by-block btree. 1693 */ 1694 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1695 /* 1696 * Look for a neighboring block on the left (lower block numbers) 1697 * that is contiguous with this space. 1698 */ 1699 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) 1700 goto error0; 1701 if (haveleft) { 1702 /* 1703 * There is a block to our left. 1704 */ 1705 if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) 1706 goto error0; 1707 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1708 /* 1709 * It's not contiguous, though. 1710 */ 1711 if (ltbno + ltlen < bno) 1712 haveleft = 0; 1713 else { 1714 /* 1715 * If this failure happens the request to free this 1716 * space was invalid, it's (partly) already free. 1717 * Very bad. 1718 */ 1719 XFS_WANT_CORRUPTED_GOTO(mp, 1720 ltbno + ltlen <= bno, error0); 1721 } 1722 } 1723 /* 1724 * Look for a neighboring block on the right (higher block numbers) 1725 * that is contiguous with this space. 1726 */ 1727 if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) 1728 goto error0; 1729 if (haveright) { 1730 /* 1731 * There is a block to our right. 1732 */ 1733 if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) 1734 goto error0; 1735 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1736 /* 1737 * It's not contiguous, though. 1738 */ 1739 if (bno + len < gtbno) 1740 haveright = 0; 1741 else { 1742 /* 1743 * If this failure happens the request to free this 1744 * space was invalid, it's (partly) already free. 1745 * Very bad. 1746 */ 1747 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0); 1748 } 1749 } 1750 /* 1751 * Now allocate and initialize a cursor for the by-size tree. 1752 */ 1753 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); 1754 /* 1755 * Have both left and right contiguous neighbors. 1756 * Merge all three into a single free block. 1757 */ 1758 if (haveleft && haveright) { 1759 /* 1760 * Delete the old by-size entry on the left. 1761 */ 1762 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1763 goto error0; 1764 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1765 if ((error = xfs_btree_delete(cnt_cur, &i))) 1766 goto error0; 1767 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1768 /* 1769 * Delete the old by-size entry on the right. 1770 */ 1771 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1772 goto error0; 1773 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1774 if ((error = xfs_btree_delete(cnt_cur, &i))) 1775 goto error0; 1776 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1777 /* 1778 * Delete the old by-block entry for the right block. 1779 */ 1780 if ((error = xfs_btree_delete(bno_cur, &i))) 1781 goto error0; 1782 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1783 /* 1784 * Move the by-block cursor back to the left neighbor. 1785 */ 1786 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1787 goto error0; 1788 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1789 #ifdef DEBUG 1790 /* 1791 * Check that this is the right record: delete didn't 1792 * mangle the cursor. 1793 */ 1794 { 1795 xfs_agblock_t xxbno; 1796 xfs_extlen_t xxlen; 1797 1798 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1799 &i))) 1800 goto error0; 1801 XFS_WANT_CORRUPTED_GOTO(mp, 1802 i == 1 && xxbno == ltbno && xxlen == ltlen, 1803 error0); 1804 } 1805 #endif 1806 /* 1807 * Update remaining by-block entry to the new, joined block. 1808 */ 1809 nbno = ltbno; 1810 nlen = len + ltlen + gtlen; 1811 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1812 goto error0; 1813 } 1814 /* 1815 * Have only a left contiguous neighbor. 1816 * Merge it together with the new freespace. 1817 */ 1818 else if (haveleft) { 1819 /* 1820 * Delete the old by-size entry on the left. 1821 */ 1822 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1823 goto error0; 1824 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1825 if ((error = xfs_btree_delete(cnt_cur, &i))) 1826 goto error0; 1827 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1828 /* 1829 * Back up the by-block cursor to the left neighbor, and 1830 * update its length. 1831 */ 1832 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1833 goto error0; 1834 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1835 nbno = ltbno; 1836 nlen = len + ltlen; 1837 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1838 goto error0; 1839 } 1840 /* 1841 * Have only a right contiguous neighbor. 1842 * Merge it together with the new freespace. 1843 */ 1844 else if (haveright) { 1845 /* 1846 * Delete the old by-size entry on the right. 1847 */ 1848 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1849 goto error0; 1850 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1851 if ((error = xfs_btree_delete(cnt_cur, &i))) 1852 goto error0; 1853 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1854 /* 1855 * Update the starting block and length of the right 1856 * neighbor in the by-block tree. 1857 */ 1858 nbno = bno; 1859 nlen = len + gtlen; 1860 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1861 goto error0; 1862 } 1863 /* 1864 * No contiguous neighbors. 1865 * Insert the new freespace into the by-block tree. 1866 */ 1867 else { 1868 nbno = bno; 1869 nlen = len; 1870 if ((error = xfs_btree_insert(bno_cur, &i))) 1871 goto error0; 1872 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1873 } 1874 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1875 bno_cur = NULL; 1876 /* 1877 * In all cases we need to insert the new freespace in the by-size tree. 1878 */ 1879 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1880 goto error0; 1881 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0); 1882 if ((error = xfs_btree_insert(cnt_cur, &i))) 1883 goto error0; 1884 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1885 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1886 cnt_cur = NULL; 1887 1888 /* 1889 * Update the freespace totals in the ag and superblock. 1890 */ 1891 pag = xfs_perag_get(mp, agno); 1892 error = xfs_alloc_update_counters(tp, pag, agbp, len); 1893 xfs_ag_resv_free_extent(pag, type, tp, len); 1894 xfs_perag_put(pag); 1895 if (error) 1896 goto error0; 1897 1898 XFS_STATS_INC(mp, xs_freex); 1899 XFS_STATS_ADD(mp, xs_freeb, len); 1900 1901 trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL, 1902 haveleft, haveright); 1903 1904 return 0; 1905 1906 error0: 1907 trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL, 1908 -1, -1); 1909 if (bno_cur) 1910 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1911 if (cnt_cur) 1912 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1913 return error; 1914 } 1915 1916 /* 1917 * Visible (exported) allocation/free functions. 1918 * Some of these are used just by xfs_alloc_btree.c and this file. 1919 */ 1920 1921 /* 1922 * Compute and fill in value of m_ag_maxlevels. 1923 */ 1924 void 1925 xfs_alloc_compute_maxlevels( 1926 xfs_mount_t *mp) /* file system mount structure */ 1927 { 1928 mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp, mp->m_alloc_mnr, 1929 (mp->m_sb.sb_agblocks + 1) / 2); 1930 } 1931 1932 /* 1933 * Find the length of the longest extent in an AG. The 'need' parameter 1934 * specifies how much space we're going to need for the AGFL and the 1935 * 'reserved' parameter tells us how many blocks in this AG are reserved for 1936 * other callers. 1937 */ 1938 xfs_extlen_t 1939 xfs_alloc_longest_free_extent( 1940 struct xfs_mount *mp, 1941 struct xfs_perag *pag, 1942 xfs_extlen_t need, 1943 xfs_extlen_t reserved) 1944 { 1945 xfs_extlen_t delta = 0; 1946 1947 /* 1948 * If the AGFL needs a recharge, we'll have to subtract that from the 1949 * longest extent. 1950 */ 1951 if (need > pag->pagf_flcount) 1952 delta = need - pag->pagf_flcount; 1953 1954 /* 1955 * If we cannot maintain others' reservations with space from the 1956 * not-longest freesp extents, we'll have to subtract /that/ from 1957 * the longest extent too. 1958 */ 1959 if (pag->pagf_freeblks - pag->pagf_longest < reserved) 1960 delta += reserved - (pag->pagf_freeblks - pag->pagf_longest); 1961 1962 /* 1963 * If the longest extent is long enough to satisfy all the 1964 * reservations and AGFL rules in place, we can return this extent. 1965 */ 1966 if (pag->pagf_longest > delta) 1967 return pag->pagf_longest - delta; 1968 1969 /* Otherwise, let the caller try for 1 block if there's space. */ 1970 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 1971 } 1972 1973 unsigned int 1974 xfs_alloc_min_freelist( 1975 struct xfs_mount *mp, 1976 struct xfs_perag *pag) 1977 { 1978 unsigned int min_free; 1979 1980 /* space needed by-bno freespace btree */ 1981 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, 1982 mp->m_ag_maxlevels); 1983 /* space needed by-size freespace btree */ 1984 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 1985 mp->m_ag_maxlevels); 1986 /* space needed reverse mapping used space btree */ 1987 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 1988 min_free += min_t(unsigned int, 1989 pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, 1990 mp->m_rmap_maxlevels); 1991 1992 return min_free; 1993 } 1994 1995 /* 1996 * Check if the operation we are fixing up the freelist for should go ahead or 1997 * not. If we are freeing blocks, we always allow it, otherwise the allocation 1998 * is dependent on whether the size and shape of free space available will 1999 * permit the requested allocation to take place. 2000 */ 2001 static bool 2002 xfs_alloc_space_available( 2003 struct xfs_alloc_arg *args, 2004 xfs_extlen_t min_free, 2005 int flags) 2006 { 2007 struct xfs_perag *pag = args->pag; 2008 xfs_extlen_t alloc_len, longest; 2009 xfs_extlen_t reservation; /* blocks that are still reserved */ 2010 int available; 2011 2012 if (flags & XFS_ALLOC_FLAG_FREEING) 2013 return true; 2014 2015 reservation = xfs_ag_resv_needed(pag, args->resv); 2016 2017 /* do we have enough contiguous free space for the allocation? */ 2018 alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop; 2019 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free, 2020 reservation); 2021 if (longest < alloc_len) 2022 return false; 2023 2024 /* do we have enough free space remaining for the allocation? */ 2025 available = (int)(pag->pagf_freeblks + pag->pagf_flcount - 2026 reservation - min_free - args->minleft); 2027 if (available < (int)max(args->total, alloc_len)) 2028 return false; 2029 2030 /* 2031 * Clamp maxlen to the amount of free space available for the actual 2032 * extent allocation. 2033 */ 2034 if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) { 2035 args->maxlen = available; 2036 ASSERT(args->maxlen > 0); 2037 ASSERT(args->maxlen >= args->minlen); 2038 } 2039 2040 return true; 2041 } 2042 2043 /* 2044 * Decide whether to use this allocation group for this allocation. 2045 * If so, fix up the btree freelist's size. 2046 */ 2047 int /* error */ 2048 xfs_alloc_fix_freelist( 2049 struct xfs_alloc_arg *args, /* allocation argument structure */ 2050 int flags) /* XFS_ALLOC_FLAG_... */ 2051 { 2052 struct xfs_mount *mp = args->mp; 2053 struct xfs_perag *pag = args->pag; 2054 struct xfs_trans *tp = args->tp; 2055 struct xfs_buf *agbp = NULL; 2056 struct xfs_buf *agflbp = NULL; 2057 struct xfs_alloc_arg targs; /* local allocation arguments */ 2058 xfs_agblock_t bno; /* freelist block */ 2059 xfs_extlen_t need; /* total blocks needed in freelist */ 2060 int error = 0; 2061 2062 if (!pag->pagf_init) { 2063 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 2064 if (error) 2065 goto out_no_agbp; 2066 if (!pag->pagf_init) { 2067 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 2068 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2069 goto out_agbp_relse; 2070 } 2071 } 2072 2073 /* 2074 * If this is a metadata preferred pag and we are user data then try 2075 * somewhere else if we are not being asked to try harder at this 2076 * point 2077 */ 2078 if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) && 2079 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 2080 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2081 goto out_agbp_relse; 2082 } 2083 2084 need = xfs_alloc_min_freelist(mp, pag); 2085 if (!xfs_alloc_space_available(args, need, flags | 2086 XFS_ALLOC_FLAG_CHECK)) 2087 goto out_agbp_relse; 2088 2089 /* 2090 * Get the a.g. freespace buffer. 2091 * Can fail if we're not blocking on locks, and it's held. 2092 */ 2093 if (!agbp) { 2094 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 2095 if (error) 2096 goto out_no_agbp; 2097 if (!agbp) { 2098 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 2099 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 2100 goto out_no_agbp; 2101 } 2102 } 2103 2104 /* If there isn't enough total space or single-extent, reject it. */ 2105 need = xfs_alloc_min_freelist(mp, pag); 2106 if (!xfs_alloc_space_available(args, need, flags)) 2107 goto out_agbp_relse; 2108 2109 /* 2110 * Make the freelist shorter if it's too long. 2111 * 2112 * Note that from this point onwards, we will always release the agf and 2113 * agfl buffers on error. This handles the case where we error out and 2114 * the buffers are clean or may not have been joined to the transaction 2115 * and hence need to be released manually. If they have been joined to 2116 * the transaction, then xfs_trans_brelse() will handle them 2117 * appropriately based on the recursion count and dirty state of the 2118 * buffer. 2119 * 2120 * XXX (dgc): When we have lots of free space, does this buy us 2121 * anything other than extra overhead when we need to put more blocks 2122 * back on the free list? Maybe we should only do this when space is 2123 * getting low or the AGFL is more than half full? 2124 * 2125 * The NOSHRINK flag prevents the AGFL from being shrunk if it's too 2126 * big; the NORMAP flag prevents AGFL expand/shrink operations from 2127 * updating the rmapbt. Both flags are used in xfs_repair while we're 2128 * rebuilding the rmapbt, and neither are used by the kernel. They're 2129 * both required to ensure that rmaps are correctly recorded for the 2130 * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and 2131 * repair/rmap.c in xfsprogs for details. 2132 */ 2133 memset(&targs, 0, sizeof(targs)); 2134 if (flags & XFS_ALLOC_FLAG_NORMAP) 2135 xfs_rmap_skip_owner_update(&targs.oinfo); 2136 else 2137 xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); 2138 while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { 2139 struct xfs_buf *bp; 2140 2141 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2142 if (error) 2143 goto out_agbp_relse; 2144 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 2145 &targs.oinfo, XFS_AG_RESV_AGFL); 2146 if (error) 2147 goto out_agbp_relse; 2148 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2149 if (!bp) { 2150 error = -EFSCORRUPTED; 2151 goto out_agbp_relse; 2152 } 2153 xfs_trans_binval(tp, bp); 2154 } 2155 2156 targs.tp = tp; 2157 targs.mp = mp; 2158 targs.agbp = agbp; 2159 targs.agno = args->agno; 2160 targs.alignment = targs.minlen = targs.prod = 1; 2161 targs.type = XFS_ALLOCTYPE_THIS_AG; 2162 targs.pag = pag; 2163 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); 2164 if (error) 2165 goto out_agbp_relse; 2166 2167 /* Make the freelist longer if it's too short. */ 2168 while (pag->pagf_flcount < need) { 2169 targs.agbno = 0; 2170 targs.maxlen = need - pag->pagf_flcount; 2171 targs.resv = XFS_AG_RESV_AGFL; 2172 2173 /* Allocate as many blocks as possible at once. */ 2174 error = xfs_alloc_ag_vextent(&targs); 2175 if (error) 2176 goto out_agflbp_relse; 2177 2178 /* 2179 * Stop if we run out. Won't happen if callers are obeying 2180 * the restrictions correctly. Can happen for free calls 2181 * on a completely full ag. 2182 */ 2183 if (targs.agbno == NULLAGBLOCK) { 2184 if (flags & XFS_ALLOC_FLAG_FREEING) 2185 break; 2186 goto out_agflbp_relse; 2187 } 2188 /* 2189 * Put each allocated block on the list. 2190 */ 2191 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 2192 error = xfs_alloc_put_freelist(tp, agbp, 2193 agflbp, bno, 0); 2194 if (error) 2195 goto out_agflbp_relse; 2196 } 2197 } 2198 xfs_trans_brelse(tp, agflbp); 2199 args->agbp = agbp; 2200 return 0; 2201 2202 out_agflbp_relse: 2203 xfs_trans_brelse(tp, agflbp); 2204 out_agbp_relse: 2205 if (agbp) 2206 xfs_trans_brelse(tp, agbp); 2207 out_no_agbp: 2208 args->agbp = NULL; 2209 return error; 2210 } 2211 2212 /* 2213 * Get a block from the freelist. 2214 * Returns with the buffer for the block gotten. 2215 */ 2216 int /* error */ 2217 xfs_alloc_get_freelist( 2218 xfs_trans_t *tp, /* transaction pointer */ 2219 xfs_buf_t *agbp, /* buffer containing the agf structure */ 2220 xfs_agblock_t *bnop, /* block address retrieved from freelist */ 2221 int btreeblk) /* destination is a AGF btree */ 2222 { 2223 xfs_agf_t *agf; /* a.g. freespace structure */ 2224 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2225 xfs_agblock_t bno; /* block number returned */ 2226 __be32 *agfl_bno; 2227 int error; 2228 int logflags; 2229 xfs_mount_t *mp = tp->t_mountp; 2230 xfs_perag_t *pag; /* per allocation group data */ 2231 2232 /* 2233 * Freelist is empty, give up. 2234 */ 2235 agf = XFS_BUF_TO_AGF(agbp); 2236 if (!agf->agf_flcount) { 2237 *bnop = NULLAGBLOCK; 2238 return 0; 2239 } 2240 /* 2241 * Read the array of free blocks. 2242 */ 2243 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), 2244 &agflbp); 2245 if (error) 2246 return error; 2247 2248 2249 /* 2250 * Get the block number and update the data structures. 2251 */ 2252 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2253 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2254 be32_add_cpu(&agf->agf_flfirst, 1); 2255 xfs_trans_brelse(tp, agflbp); 2256 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 2257 agf->agf_flfirst = 0; 2258 2259 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2260 be32_add_cpu(&agf->agf_flcount, -1); 2261 xfs_trans_agflist_delta(tp, -1); 2262 pag->pagf_flcount--; 2263 xfs_perag_put(pag); 2264 2265 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 2266 if (btreeblk) { 2267 be32_add_cpu(&agf->agf_btreeblks, 1); 2268 pag->pagf_btreeblks++; 2269 logflags |= XFS_AGF_BTREEBLKS; 2270 } 2271 2272 xfs_alloc_log_agf(tp, agbp, logflags); 2273 *bnop = bno; 2274 2275 return 0; 2276 } 2277 2278 /* 2279 * Log the given fields from the agf structure. 2280 */ 2281 void 2282 xfs_alloc_log_agf( 2283 xfs_trans_t *tp, /* transaction pointer */ 2284 xfs_buf_t *bp, /* buffer for a.g. freelist header */ 2285 int fields) /* mask of fields to be logged (XFS_AGF_...) */ 2286 { 2287 int first; /* first byte offset */ 2288 int last; /* last byte offset */ 2289 static const short offsets[] = { 2290 offsetof(xfs_agf_t, agf_magicnum), 2291 offsetof(xfs_agf_t, agf_versionnum), 2292 offsetof(xfs_agf_t, agf_seqno), 2293 offsetof(xfs_agf_t, agf_length), 2294 offsetof(xfs_agf_t, agf_roots[0]), 2295 offsetof(xfs_agf_t, agf_levels[0]), 2296 offsetof(xfs_agf_t, agf_flfirst), 2297 offsetof(xfs_agf_t, agf_fllast), 2298 offsetof(xfs_agf_t, agf_flcount), 2299 offsetof(xfs_agf_t, agf_freeblks), 2300 offsetof(xfs_agf_t, agf_longest), 2301 offsetof(xfs_agf_t, agf_btreeblks), 2302 offsetof(xfs_agf_t, agf_uuid), 2303 offsetof(xfs_agf_t, agf_rmap_blocks), 2304 offsetof(xfs_agf_t, agf_refcount_blocks), 2305 offsetof(xfs_agf_t, agf_refcount_root), 2306 offsetof(xfs_agf_t, agf_refcount_level), 2307 /* needed so that we don't log the whole rest of the structure: */ 2308 offsetof(xfs_agf_t, agf_spare64), 2309 sizeof(xfs_agf_t) 2310 }; 2311 2312 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2313 2314 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); 2315 2316 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2317 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2318 } 2319 2320 /* 2321 * Interface for inode allocation to force the pag data to be initialized. 2322 */ 2323 int /* error */ 2324 xfs_alloc_pagf_init( 2325 xfs_mount_t *mp, /* file system mount structure */ 2326 xfs_trans_t *tp, /* transaction pointer */ 2327 xfs_agnumber_t agno, /* allocation group number */ 2328 int flags) /* XFS_ALLOC_FLAGS_... */ 2329 { 2330 xfs_buf_t *bp; 2331 int error; 2332 2333 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) 2334 return error; 2335 if (bp) 2336 xfs_trans_brelse(tp, bp); 2337 return 0; 2338 } 2339 2340 /* 2341 * Put the block on the freelist for the allocation group. 2342 */ 2343 int /* error */ 2344 xfs_alloc_put_freelist( 2345 xfs_trans_t *tp, /* transaction pointer */ 2346 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2347 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2348 xfs_agblock_t bno, /* block being freed */ 2349 int btreeblk) /* block came from a AGF btree */ 2350 { 2351 xfs_agf_t *agf; /* a.g. freespace structure */ 2352 __be32 *blockp;/* pointer to array entry */ 2353 int error; 2354 int logflags; 2355 xfs_mount_t *mp; /* mount structure */ 2356 xfs_perag_t *pag; /* per allocation group data */ 2357 __be32 *agfl_bno; 2358 int startoff; 2359 2360 agf = XFS_BUF_TO_AGF(agbp); 2361 mp = tp->t_mountp; 2362 2363 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2364 be32_to_cpu(agf->agf_seqno), &agflbp))) 2365 return error; 2366 be32_add_cpu(&agf->agf_fllast, 1); 2367 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2368 agf->agf_fllast = 0; 2369 2370 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2371 be32_add_cpu(&agf->agf_flcount, 1); 2372 xfs_trans_agflist_delta(tp, 1); 2373 pag->pagf_flcount++; 2374 2375 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; 2376 if (btreeblk) { 2377 be32_add_cpu(&agf->agf_btreeblks, -1); 2378 pag->pagf_btreeblks--; 2379 logflags |= XFS_AGF_BTREEBLKS; 2380 } 2381 xfs_perag_put(pag); 2382 2383 xfs_alloc_log_agf(tp, agbp, logflags); 2384 2385 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2386 2387 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2388 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2389 *blockp = cpu_to_be32(bno); 2390 startoff = (char *)blockp - (char *)agflbp->b_addr; 2391 2392 xfs_alloc_log_agf(tp, agbp, logflags); 2393 2394 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); 2395 xfs_trans_log_buf(tp, agflbp, startoff, 2396 startoff + sizeof(xfs_agblock_t) - 1); 2397 return 0; 2398 } 2399 2400 static bool 2401 xfs_agf_verify( 2402 struct xfs_mount *mp, 2403 struct xfs_buf *bp) 2404 { 2405 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2406 2407 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2408 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2409 return false; 2410 if (!xfs_log_check_lsn(mp, 2411 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn))) 2412 return false; 2413 } 2414 2415 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2416 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2417 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2418 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2419 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2420 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) 2421 return false; 2422 2423 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || 2424 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || 2425 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2426 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2427 return false; 2428 2429 if (xfs_sb_version_hasrmapbt(&mp->m_sb) && 2430 (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || 2431 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS)) 2432 return false; 2433 2434 /* 2435 * during growfs operations, the perag is not fully initialised, 2436 * so we can't use it for any useful checking. growfs ensures we can't 2437 * use it by using uncached buffers that don't have the perag attached 2438 * so we can detect and avoid this problem. 2439 */ 2440 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2441 return false; 2442 2443 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2444 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2445 return false; 2446 2447 if (xfs_sb_version_hasreflink(&mp->m_sb) && 2448 (be32_to_cpu(agf->agf_refcount_level) < 1 || 2449 be32_to_cpu(agf->agf_refcount_level) > XFS_BTREE_MAXLEVELS)) 2450 return false; 2451 2452 return true;; 2453 2454 } 2455 2456 static void 2457 xfs_agf_read_verify( 2458 struct xfs_buf *bp) 2459 { 2460 struct xfs_mount *mp = bp->b_target->bt_mount; 2461 2462 if (xfs_sb_version_hascrc(&mp->m_sb) && 2463 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2464 xfs_buf_ioerror(bp, -EFSBADCRC); 2465 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, 2466 XFS_ERRTAG_ALLOC_READ_AGF)) 2467 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2468 2469 if (bp->b_error) 2470 xfs_verifier_error(bp); 2471 } 2472 2473 static void 2474 xfs_agf_write_verify( 2475 struct xfs_buf *bp) 2476 { 2477 struct xfs_mount *mp = bp->b_target->bt_mount; 2478 struct xfs_buf_log_item *bip = bp->b_fspriv; 2479 2480 if (!xfs_agf_verify(mp, bp)) { 2481 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2482 xfs_verifier_error(bp); 2483 return; 2484 } 2485 2486 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2487 return; 2488 2489 if (bip) 2490 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2491 2492 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); 2493 } 2494 2495 const struct xfs_buf_ops xfs_agf_buf_ops = { 2496 .name = "xfs_agf", 2497 .verify_read = xfs_agf_read_verify, 2498 .verify_write = xfs_agf_write_verify, 2499 }; 2500 2501 /* 2502 * Read in the allocation group header (free/alloc section). 2503 */ 2504 int /* error */ 2505 xfs_read_agf( 2506 struct xfs_mount *mp, /* mount point structure */ 2507 struct xfs_trans *tp, /* transaction pointer */ 2508 xfs_agnumber_t agno, /* allocation group number */ 2509 int flags, /* XFS_BUF_ */ 2510 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2511 { 2512 int error; 2513 2514 trace_xfs_read_agf(mp, agno); 2515 2516 ASSERT(agno != NULLAGNUMBER); 2517 error = xfs_trans_read_buf( 2518 mp, tp, mp->m_ddev_targp, 2519 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2520 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); 2521 if (error) 2522 return error; 2523 if (!*bpp) 2524 return 0; 2525 2526 ASSERT(!(*bpp)->b_error); 2527 xfs_buf_set_ref(*bpp, XFS_AGF_REF); 2528 return 0; 2529 } 2530 2531 /* 2532 * Read in the allocation group header (free/alloc section). 2533 */ 2534 int /* error */ 2535 xfs_alloc_read_agf( 2536 struct xfs_mount *mp, /* mount point structure */ 2537 struct xfs_trans *tp, /* transaction pointer */ 2538 xfs_agnumber_t agno, /* allocation group number */ 2539 int flags, /* XFS_ALLOC_FLAG_... */ 2540 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2541 { 2542 struct xfs_agf *agf; /* ag freelist header */ 2543 struct xfs_perag *pag; /* per allocation group data */ 2544 int error; 2545 2546 trace_xfs_alloc_read_agf(mp, agno); 2547 2548 ASSERT(agno != NULLAGNUMBER); 2549 error = xfs_read_agf(mp, tp, agno, 2550 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, 2551 bpp); 2552 if (error) 2553 return error; 2554 if (!*bpp) 2555 return 0; 2556 ASSERT(!(*bpp)->b_error); 2557 2558 agf = XFS_BUF_TO_AGF(*bpp); 2559 pag = xfs_perag_get(mp, agno); 2560 if (!pag->pagf_init) { 2561 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2562 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2563 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2564 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2565 pag->pagf_levels[XFS_BTNUM_BNOi] = 2566 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2567 pag->pagf_levels[XFS_BTNUM_CNTi] = 2568 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2569 pag->pagf_levels[XFS_BTNUM_RMAPi] = 2570 be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); 2571 pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); 2572 spin_lock_init(&pag->pagb_lock); 2573 pag->pagb_count = 0; 2574 pag->pagb_tree = RB_ROOT; 2575 pag->pagf_init = 1; 2576 } 2577 #ifdef DEBUG 2578 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2579 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2580 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); 2581 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2582 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2583 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2584 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); 2585 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == 2586 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2587 } 2588 #endif 2589 xfs_perag_put(pag); 2590 return 0; 2591 } 2592 2593 /* 2594 * Allocate an extent (variable-size). 2595 * Depending on the allocation type, we either look in a single allocation 2596 * group or loop over the allocation groups to find the result. 2597 */ 2598 int /* error */ 2599 xfs_alloc_vextent( 2600 xfs_alloc_arg_t *args) /* allocation argument structure */ 2601 { 2602 xfs_agblock_t agsize; /* allocation group size */ 2603 int error; 2604 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2605 xfs_mount_t *mp; /* mount structure pointer */ 2606 xfs_agnumber_t sagno; /* starting allocation group number */ 2607 xfs_alloctype_t type; /* input allocation type */ 2608 int bump_rotor = 0; 2609 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ 2610 2611 mp = args->mp; 2612 type = args->otype = args->type; 2613 args->agbno = NULLAGBLOCK; 2614 /* 2615 * Just fix this up, for the case where the last a.g. is shorter 2616 * (or there's only one a.g.) and the caller couldn't easily figure 2617 * that out (xfs_bmap_alloc). 2618 */ 2619 agsize = mp->m_sb.sb_agblocks; 2620 if (args->maxlen > agsize) 2621 args->maxlen = agsize; 2622 if (args->alignment == 0) 2623 args->alignment = 1; 2624 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); 2625 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); 2626 ASSERT(args->minlen <= args->maxlen); 2627 ASSERT(args->minlen <= agsize); 2628 ASSERT(args->mod < args->prod); 2629 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || 2630 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || 2631 args->minlen > args->maxlen || args->minlen > agsize || 2632 args->mod >= args->prod) { 2633 args->fsbno = NULLFSBLOCK; 2634 trace_xfs_alloc_vextent_badargs(args); 2635 return 0; 2636 } 2637 2638 switch (type) { 2639 case XFS_ALLOCTYPE_THIS_AG: 2640 case XFS_ALLOCTYPE_NEAR_BNO: 2641 case XFS_ALLOCTYPE_THIS_BNO: 2642 /* 2643 * These three force us into a single a.g. 2644 */ 2645 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2646 args->pag = xfs_perag_get(mp, args->agno); 2647 error = xfs_alloc_fix_freelist(args, 0); 2648 if (error) { 2649 trace_xfs_alloc_vextent_nofix(args); 2650 goto error0; 2651 } 2652 if (!args->agbp) { 2653 trace_xfs_alloc_vextent_noagbp(args); 2654 break; 2655 } 2656 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2657 if ((error = xfs_alloc_ag_vextent(args))) 2658 goto error0; 2659 break; 2660 case XFS_ALLOCTYPE_START_BNO: 2661 /* 2662 * Try near allocation first, then anywhere-in-ag after 2663 * the first a.g. fails. 2664 */ 2665 if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && 2666 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2667 args->fsbno = XFS_AGB_TO_FSB(mp, 2668 ((mp->m_agfrotor / rotorstep) % 2669 mp->m_sb.sb_agcount), 0); 2670 bump_rotor = 1; 2671 } 2672 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2673 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2674 /* FALLTHROUGH */ 2675 case XFS_ALLOCTYPE_FIRST_AG: 2676 /* 2677 * Rotate through the allocation groups looking for a winner. 2678 */ 2679 if (type == XFS_ALLOCTYPE_FIRST_AG) { 2680 /* 2681 * Start with allocation group given by bno. 2682 */ 2683 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2684 args->type = XFS_ALLOCTYPE_THIS_AG; 2685 sagno = 0; 2686 flags = 0; 2687 } else { 2688 /* 2689 * Start with the given allocation group. 2690 */ 2691 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2692 flags = XFS_ALLOC_FLAG_TRYLOCK; 2693 } 2694 /* 2695 * Loop over allocation groups twice; first time with 2696 * trylock set, second time without. 2697 */ 2698 for (;;) { 2699 args->pag = xfs_perag_get(mp, args->agno); 2700 error = xfs_alloc_fix_freelist(args, flags); 2701 if (error) { 2702 trace_xfs_alloc_vextent_nofix(args); 2703 goto error0; 2704 } 2705 /* 2706 * If we get a buffer back then the allocation will fly. 2707 */ 2708 if (args->agbp) { 2709 if ((error = xfs_alloc_ag_vextent(args))) 2710 goto error0; 2711 break; 2712 } 2713 2714 trace_xfs_alloc_vextent_loopfailed(args); 2715 2716 /* 2717 * Didn't work, figure out the next iteration. 2718 */ 2719 if (args->agno == sagno && 2720 type == XFS_ALLOCTYPE_START_BNO) 2721 args->type = XFS_ALLOCTYPE_THIS_AG; 2722 /* 2723 * For the first allocation, we can try any AG to get 2724 * space. However, if we already have allocated a 2725 * block, we don't want to try AGs whose number is below 2726 * sagno. Otherwise, we may end up with out-of-order 2727 * locking of AGF, which might cause deadlock. 2728 */ 2729 if (++(args->agno) == mp->m_sb.sb_agcount) { 2730 if (args->firstblock != NULLFSBLOCK) 2731 args->agno = sagno; 2732 else 2733 args->agno = 0; 2734 } 2735 /* 2736 * Reached the starting a.g., must either be done 2737 * or switch to non-trylock mode. 2738 */ 2739 if (args->agno == sagno) { 2740 if (flags == 0) { 2741 args->agbno = NULLAGBLOCK; 2742 trace_xfs_alloc_vextent_allfailed(args); 2743 break; 2744 } 2745 2746 flags = 0; 2747 if (type == XFS_ALLOCTYPE_START_BNO) { 2748 args->agbno = XFS_FSB_TO_AGBNO(mp, 2749 args->fsbno); 2750 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2751 } 2752 } 2753 xfs_perag_put(args->pag); 2754 } 2755 if (bump_rotor) { 2756 if (args->agno == sagno) 2757 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2758 (mp->m_sb.sb_agcount * rotorstep); 2759 else 2760 mp->m_agfrotor = (args->agno * rotorstep + 1) % 2761 (mp->m_sb.sb_agcount * rotorstep); 2762 } 2763 break; 2764 default: 2765 ASSERT(0); 2766 /* NOTREACHED */ 2767 } 2768 if (args->agbno == NULLAGBLOCK) 2769 args->fsbno = NULLFSBLOCK; 2770 else { 2771 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); 2772 #ifdef DEBUG 2773 ASSERT(args->len >= args->minlen); 2774 ASSERT(args->len <= args->maxlen); 2775 ASSERT(args->agbno % args->alignment == 0); 2776 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2777 args->len); 2778 #endif 2779 2780 /* Zero the extent if we were asked to do so */ 2781 if (args->datatype & XFS_ALLOC_USERDATA_ZERO) { 2782 error = xfs_zero_extent(args->ip, args->fsbno, args->len); 2783 if (error) 2784 goto error0; 2785 } 2786 2787 } 2788 xfs_perag_put(args->pag); 2789 return 0; 2790 error0: 2791 xfs_perag_put(args->pag); 2792 return error; 2793 } 2794 2795 /* Ensure that the freelist is at full capacity. */ 2796 int 2797 xfs_free_extent_fix_freelist( 2798 struct xfs_trans *tp, 2799 xfs_agnumber_t agno, 2800 struct xfs_buf **agbp) 2801 { 2802 struct xfs_alloc_arg args; 2803 int error; 2804 2805 memset(&args, 0, sizeof(struct xfs_alloc_arg)); 2806 args.tp = tp; 2807 args.mp = tp->t_mountp; 2808 args.agno = agno; 2809 2810 /* 2811 * validate that the block number is legal - the enables us to detect 2812 * and handle a silent filesystem corruption rather than crashing. 2813 */ 2814 if (args.agno >= args.mp->m_sb.sb_agcount) 2815 return -EFSCORRUPTED; 2816 2817 args.pag = xfs_perag_get(args.mp, args.agno); 2818 ASSERT(args.pag); 2819 2820 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); 2821 if (error) 2822 goto out; 2823 2824 *agbp = args.agbp; 2825 out: 2826 xfs_perag_put(args.pag); 2827 return error; 2828 } 2829 2830 /* 2831 * Free an extent. 2832 * Just break up the extent address and hand off to xfs_free_ag_extent 2833 * after fixing up the freelist. 2834 */ 2835 int /* error */ 2836 xfs_free_extent( 2837 struct xfs_trans *tp, /* transaction pointer */ 2838 xfs_fsblock_t bno, /* starting block number of extent */ 2839 xfs_extlen_t len, /* length of extent */ 2840 struct xfs_owner_info *oinfo, /* extent owner */ 2841 enum xfs_ag_resv_type type) /* block reservation type */ 2842 { 2843 struct xfs_mount *mp = tp->t_mountp; 2844 struct xfs_buf *agbp; 2845 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, bno); 2846 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, bno); 2847 int error; 2848 2849 ASSERT(len != 0); 2850 ASSERT(type != XFS_AG_RESV_AGFL); 2851 2852 if (XFS_TEST_ERROR(false, mp, 2853 XFS_ERRTAG_FREE_EXTENT)) 2854 return -EIO; 2855 2856 error = xfs_free_extent_fix_freelist(tp, agno, &agbp); 2857 if (error) 2858 return error; 2859 2860 XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err); 2861 2862 /* validate the extent size is legal now we have the agf locked */ 2863 XFS_WANT_CORRUPTED_GOTO(mp, 2864 agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), 2865 err); 2866 2867 error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type); 2868 if (error) 2869 goto err; 2870 2871 xfs_extent_busy_insert(tp, agno, agbno, len, 0); 2872 return 0; 2873 2874 err: 2875 xfs_trans_brelse(tp, agbp); 2876 return error; 2877 } 2878 2879 struct xfs_alloc_query_range_info { 2880 xfs_alloc_query_range_fn fn; 2881 void *priv; 2882 }; 2883 2884 /* Format btree record and pass to our callback. */ 2885 STATIC int 2886 xfs_alloc_query_range_helper( 2887 struct xfs_btree_cur *cur, 2888 union xfs_btree_rec *rec, 2889 void *priv) 2890 { 2891 struct xfs_alloc_query_range_info *query = priv; 2892 struct xfs_alloc_rec_incore irec; 2893 2894 irec.ar_startblock = be32_to_cpu(rec->alloc.ar_startblock); 2895 irec.ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount); 2896 return query->fn(cur, &irec, query->priv); 2897 } 2898 2899 /* Find all free space within a given range of blocks. */ 2900 int 2901 xfs_alloc_query_range( 2902 struct xfs_btree_cur *cur, 2903 struct xfs_alloc_rec_incore *low_rec, 2904 struct xfs_alloc_rec_incore *high_rec, 2905 xfs_alloc_query_range_fn fn, 2906 void *priv) 2907 { 2908 union xfs_btree_irec low_brec; 2909 union xfs_btree_irec high_brec; 2910 struct xfs_alloc_query_range_info query; 2911 2912 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 2913 low_brec.a = *low_rec; 2914 high_brec.a = *high_rec; 2915 query.priv = priv; 2916 query.fn = fn; 2917 return xfs_btree_query_range(cur, &low_brec, &high_brec, 2918 xfs_alloc_query_range_helper, &query); 2919 } 2920 2921 /* Find all free space records. */ 2922 int 2923 xfs_alloc_query_all( 2924 struct xfs_btree_cur *cur, 2925 xfs_alloc_query_range_fn fn, 2926 void *priv) 2927 { 2928 struct xfs_alloc_query_range_info query; 2929 2930 ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); 2931 query.priv = priv; 2932 query.fn = fn; 2933 return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); 2934 } 2935 2936 /* Find the size of the AG, in blocks. */ 2937 xfs_agblock_t 2938 xfs_ag_block_count( 2939 struct xfs_mount *mp, 2940 xfs_agnumber_t agno) 2941 { 2942 ASSERT(agno < mp->m_sb.sb_agcount); 2943 2944 if (agno < mp->m_sb.sb_agcount - 1) 2945 return mp->m_sb.sb_agblocks; 2946 return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks); 2947 } 2948 2949 /* 2950 * Verify that an AG block number pointer neither points outside the AG 2951 * nor points at static metadata. 2952 */ 2953 bool 2954 xfs_verify_agbno( 2955 struct xfs_mount *mp, 2956 xfs_agnumber_t agno, 2957 xfs_agblock_t agbno) 2958 { 2959 xfs_agblock_t eoag; 2960 2961 eoag = xfs_ag_block_count(mp, agno); 2962 if (agbno >= eoag) 2963 return false; 2964 if (agbno <= XFS_AGFL_BLOCK(mp)) 2965 return false; 2966 return true; 2967 } 2968 2969 /* 2970 * Verify that an FS block number pointer neither points outside the 2971 * filesystem nor points at static AG metadata. 2972 */ 2973 bool 2974 xfs_verify_fsbno( 2975 struct xfs_mount *mp, 2976 xfs_fsblock_t fsbno) 2977 { 2978 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno); 2979 2980 if (agno >= mp->m_sb.sb_agcount) 2981 return false; 2982 return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); 2983 } 2984