1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_format.h" 21 #include "xfs_log_format.h" 22 #include "xfs_shared.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_sb.h" 26 #include "xfs_mount.h" 27 #include "xfs_inode.h" 28 #include "xfs_btree.h" 29 #include "xfs_alloc_btree.h" 30 #include "xfs_alloc.h" 31 #include "xfs_extent_busy.h" 32 #include "xfs_error.h" 33 #include "xfs_cksum.h" 34 #include "xfs_trace.h" 35 #include "xfs_trans.h" 36 #include "xfs_buf_item.h" 37 #include "xfs_log.h" 38 39 struct workqueue_struct *xfs_alloc_wq; 40 41 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 42 43 #define XFSA_FIXUP_BNO_OK 1 44 #define XFSA_FIXUP_CNT_OK 2 45 46 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 47 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 48 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 49 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 50 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 51 52 /* 53 * Lookup the record equal to [bno, len] in the btree given by cur. 54 */ 55 STATIC int /* error */ 56 xfs_alloc_lookup_eq( 57 struct xfs_btree_cur *cur, /* btree cursor */ 58 xfs_agblock_t bno, /* starting block of extent */ 59 xfs_extlen_t len, /* length of extent */ 60 int *stat) /* success/failure */ 61 { 62 cur->bc_rec.a.ar_startblock = bno; 63 cur->bc_rec.a.ar_blockcount = len; 64 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 65 } 66 67 /* 68 * Lookup the first record greater than or equal to [bno, len] 69 * in the btree given by cur. 70 */ 71 int /* error */ 72 xfs_alloc_lookup_ge( 73 struct xfs_btree_cur *cur, /* btree cursor */ 74 xfs_agblock_t bno, /* starting block of extent */ 75 xfs_extlen_t len, /* length of extent */ 76 int *stat) /* success/failure */ 77 { 78 cur->bc_rec.a.ar_startblock = bno; 79 cur->bc_rec.a.ar_blockcount = len; 80 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 81 } 82 83 /* 84 * Lookup the first record less than or equal to [bno, len] 85 * in the btree given by cur. 86 */ 87 int /* error */ 88 xfs_alloc_lookup_le( 89 struct xfs_btree_cur *cur, /* btree cursor */ 90 xfs_agblock_t bno, /* starting block of extent */ 91 xfs_extlen_t len, /* length of extent */ 92 int *stat) /* success/failure */ 93 { 94 cur->bc_rec.a.ar_startblock = bno; 95 cur->bc_rec.a.ar_blockcount = len; 96 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 97 } 98 99 /* 100 * Update the record referred to by cur to the value given 101 * by [bno, len]. 102 * This either works (return 0) or gets an EFSCORRUPTED error. 103 */ 104 STATIC int /* error */ 105 xfs_alloc_update( 106 struct xfs_btree_cur *cur, /* btree cursor */ 107 xfs_agblock_t bno, /* starting block of extent */ 108 xfs_extlen_t len) /* length of extent */ 109 { 110 union xfs_btree_rec rec; 111 112 rec.alloc.ar_startblock = cpu_to_be32(bno); 113 rec.alloc.ar_blockcount = cpu_to_be32(len); 114 return xfs_btree_update(cur, &rec); 115 } 116 117 /* 118 * Get the data from the pointed-to record. 119 */ 120 int /* error */ 121 xfs_alloc_get_rec( 122 struct xfs_btree_cur *cur, /* btree cursor */ 123 xfs_agblock_t *bno, /* output: starting block of extent */ 124 xfs_extlen_t *len, /* output: length of extent */ 125 int *stat) /* output: success/failure */ 126 { 127 union xfs_btree_rec *rec; 128 int error; 129 130 error = xfs_btree_get_rec(cur, &rec, stat); 131 if (!error && *stat == 1) { 132 *bno = be32_to_cpu(rec->alloc.ar_startblock); 133 *len = be32_to_cpu(rec->alloc.ar_blockcount); 134 } 135 return error; 136 } 137 138 /* 139 * Compute aligned version of the found extent. 140 * Takes alignment and min length into account. 141 */ 142 STATIC void 143 xfs_alloc_compute_aligned( 144 xfs_alloc_arg_t *args, /* allocation argument structure */ 145 xfs_agblock_t foundbno, /* starting block in found extent */ 146 xfs_extlen_t foundlen, /* length in found extent */ 147 xfs_agblock_t *resbno, /* result block number */ 148 xfs_extlen_t *reslen) /* result length */ 149 { 150 xfs_agblock_t bno; 151 xfs_extlen_t len; 152 153 /* Trim busy sections out of found extent */ 154 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); 155 156 if (args->alignment > 1 && len >= args->minlen) { 157 xfs_agblock_t aligned_bno = roundup(bno, args->alignment); 158 xfs_extlen_t diff = aligned_bno - bno; 159 160 *resbno = aligned_bno; 161 *reslen = diff >= len ? 0 : len - diff; 162 } else { 163 *resbno = bno; 164 *reslen = len; 165 } 166 } 167 168 /* 169 * Compute best start block and diff for "near" allocations. 170 * freelen >= wantlen already checked by caller. 171 */ 172 STATIC xfs_extlen_t /* difference value (absolute) */ 173 xfs_alloc_compute_diff( 174 xfs_agblock_t wantbno, /* target starting block */ 175 xfs_extlen_t wantlen, /* target length */ 176 xfs_extlen_t alignment, /* target alignment */ 177 char userdata, /* are we allocating data? */ 178 xfs_agblock_t freebno, /* freespace's starting block */ 179 xfs_extlen_t freelen, /* freespace's length */ 180 xfs_agblock_t *newbnop) /* result: best start block from free */ 181 { 182 xfs_agblock_t freeend; /* end of freespace extent */ 183 xfs_agblock_t newbno1; /* return block number */ 184 xfs_agblock_t newbno2; /* other new block number */ 185 xfs_extlen_t newlen1=0; /* length with newbno1 */ 186 xfs_extlen_t newlen2=0; /* length with newbno2 */ 187 xfs_agblock_t wantend; /* end of target extent */ 188 189 ASSERT(freelen >= wantlen); 190 freeend = freebno + freelen; 191 wantend = wantbno + wantlen; 192 /* 193 * We want to allocate from the start of a free extent if it is past 194 * the desired block or if we are allocating user data and the free 195 * extent is before desired block. The second case is there to allow 196 * for contiguous allocation from the remaining free space if the file 197 * grows in the short term. 198 */ 199 if (freebno >= wantbno || (userdata && freeend < wantend)) { 200 if ((newbno1 = roundup(freebno, alignment)) >= freeend) 201 newbno1 = NULLAGBLOCK; 202 } else if (freeend >= wantend && alignment > 1) { 203 newbno1 = roundup(wantbno, alignment); 204 newbno2 = newbno1 - alignment; 205 if (newbno1 >= freeend) 206 newbno1 = NULLAGBLOCK; 207 else 208 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); 209 if (newbno2 < freebno) 210 newbno2 = NULLAGBLOCK; 211 else 212 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); 213 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { 214 if (newlen1 < newlen2 || 215 (newlen1 == newlen2 && 216 XFS_ABSDIFF(newbno1, wantbno) > 217 XFS_ABSDIFF(newbno2, wantbno))) 218 newbno1 = newbno2; 219 } else if (newbno2 != NULLAGBLOCK) 220 newbno1 = newbno2; 221 } else if (freeend >= wantend) { 222 newbno1 = wantbno; 223 } else if (alignment > 1) { 224 newbno1 = roundup(freeend - wantlen, alignment); 225 if (newbno1 > freeend - wantlen && 226 newbno1 - alignment >= freebno) 227 newbno1 -= alignment; 228 else if (newbno1 >= freeend) 229 newbno1 = NULLAGBLOCK; 230 } else 231 newbno1 = freeend - wantlen; 232 *newbnop = newbno1; 233 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); 234 } 235 236 /* 237 * Fix up the length, based on mod and prod. 238 * len should be k * prod + mod for some k. 239 * If len is too small it is returned unchanged. 240 * If len hits maxlen it is left alone. 241 */ 242 STATIC void 243 xfs_alloc_fix_len( 244 xfs_alloc_arg_t *args) /* allocation argument structure */ 245 { 246 xfs_extlen_t k; 247 xfs_extlen_t rlen; 248 249 ASSERT(args->mod < args->prod); 250 rlen = args->len; 251 ASSERT(rlen >= args->minlen); 252 ASSERT(rlen <= args->maxlen); 253 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || 254 (args->mod == 0 && rlen < args->prod)) 255 return; 256 k = rlen % args->prod; 257 if (k == args->mod) 258 return; 259 if (k > args->mod) 260 rlen = rlen - (k - args->mod); 261 else 262 rlen = rlen - args->prod + (args->mod - k); 263 if ((int)rlen < (int)args->minlen) 264 return; 265 ASSERT(rlen >= args->minlen && rlen <= args->maxlen); 266 ASSERT(rlen % args->prod == args->mod); 267 args->len = rlen; 268 } 269 270 /* 271 * Fix up length if there is too little space left in the a.g. 272 * Return 1 if ok, 0 if too little, should give up. 273 */ 274 STATIC int 275 xfs_alloc_fix_minleft( 276 xfs_alloc_arg_t *args) /* allocation argument structure */ 277 { 278 xfs_agf_t *agf; /* a.g. freelist header */ 279 int diff; /* free space difference */ 280 281 if (args->minleft == 0) 282 return 1; 283 agf = XFS_BUF_TO_AGF(args->agbp); 284 diff = be32_to_cpu(agf->agf_freeblks) 285 - args->len - args->minleft; 286 if (diff >= 0) 287 return 1; 288 args->len += diff; /* shrink the allocated space */ 289 if (args->len >= args->minlen) 290 return 1; 291 args->agbno = NULLAGBLOCK; 292 return 0; 293 } 294 295 /* 296 * Update the two btrees, logically removing from freespace the extent 297 * starting at rbno, rlen blocks. The extent is contained within the 298 * actual (current) free extent fbno for flen blocks. 299 * Flags are passed in indicating whether the cursors are set to the 300 * relevant records. 301 */ 302 STATIC int /* error code */ 303 xfs_alloc_fixup_trees( 304 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ 305 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ 306 xfs_agblock_t fbno, /* starting block of free extent */ 307 xfs_extlen_t flen, /* length of free extent */ 308 xfs_agblock_t rbno, /* starting block of returned extent */ 309 xfs_extlen_t rlen, /* length of returned extent */ 310 int flags) /* flags, XFSA_FIXUP_... */ 311 { 312 int error; /* error code */ 313 int i; /* operation results */ 314 xfs_agblock_t nfbno1; /* first new free startblock */ 315 xfs_agblock_t nfbno2; /* second new free startblock */ 316 xfs_extlen_t nflen1=0; /* first new free length */ 317 xfs_extlen_t nflen2=0; /* second new free length */ 318 319 /* 320 * Look up the record in the by-size tree if necessary. 321 */ 322 if (flags & XFSA_FIXUP_CNT_OK) { 323 #ifdef DEBUG 324 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 325 return error; 326 XFS_WANT_CORRUPTED_RETURN( 327 i == 1 && nfbno1 == fbno && nflen1 == flen); 328 #endif 329 } else { 330 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 331 return error; 332 XFS_WANT_CORRUPTED_RETURN(i == 1); 333 } 334 /* 335 * Look up the record in the by-block tree if necessary. 336 */ 337 if (flags & XFSA_FIXUP_BNO_OK) { 338 #ifdef DEBUG 339 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 340 return error; 341 XFS_WANT_CORRUPTED_RETURN( 342 i == 1 && nfbno1 == fbno && nflen1 == flen); 343 #endif 344 } else { 345 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 346 return error; 347 XFS_WANT_CORRUPTED_RETURN(i == 1); 348 } 349 350 #ifdef DEBUG 351 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { 352 struct xfs_btree_block *bnoblock; 353 struct xfs_btree_block *cntblock; 354 355 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 356 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 357 358 XFS_WANT_CORRUPTED_RETURN( 359 bnoblock->bb_numrecs == cntblock->bb_numrecs); 360 } 361 #endif 362 363 /* 364 * Deal with all four cases: the allocated record is contained 365 * within the freespace record, so we can have new freespace 366 * at either (or both) end, or no freespace remaining. 367 */ 368 if (rbno == fbno && rlen == flen) 369 nfbno1 = nfbno2 = NULLAGBLOCK; 370 else if (rbno == fbno) { 371 nfbno1 = rbno + rlen; 372 nflen1 = flen - rlen; 373 nfbno2 = NULLAGBLOCK; 374 } else if (rbno + rlen == fbno + flen) { 375 nfbno1 = fbno; 376 nflen1 = flen - rlen; 377 nfbno2 = NULLAGBLOCK; 378 } else { 379 nfbno1 = fbno; 380 nflen1 = rbno - fbno; 381 nfbno2 = rbno + rlen; 382 nflen2 = (fbno + flen) - nfbno2; 383 } 384 /* 385 * Delete the entry from the by-size btree. 386 */ 387 if ((error = xfs_btree_delete(cnt_cur, &i))) 388 return error; 389 XFS_WANT_CORRUPTED_RETURN(i == 1); 390 /* 391 * Add new by-size btree entry(s). 392 */ 393 if (nfbno1 != NULLAGBLOCK) { 394 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 395 return error; 396 XFS_WANT_CORRUPTED_RETURN(i == 0); 397 if ((error = xfs_btree_insert(cnt_cur, &i))) 398 return error; 399 XFS_WANT_CORRUPTED_RETURN(i == 1); 400 } 401 if (nfbno2 != NULLAGBLOCK) { 402 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 403 return error; 404 XFS_WANT_CORRUPTED_RETURN(i == 0); 405 if ((error = xfs_btree_insert(cnt_cur, &i))) 406 return error; 407 XFS_WANT_CORRUPTED_RETURN(i == 1); 408 } 409 /* 410 * Fix up the by-block btree entry(s). 411 */ 412 if (nfbno1 == NULLAGBLOCK) { 413 /* 414 * No remaining freespace, just delete the by-block tree entry. 415 */ 416 if ((error = xfs_btree_delete(bno_cur, &i))) 417 return error; 418 XFS_WANT_CORRUPTED_RETURN(i == 1); 419 } else { 420 /* 421 * Update the by-block entry to start later|be shorter. 422 */ 423 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) 424 return error; 425 } 426 if (nfbno2 != NULLAGBLOCK) { 427 /* 428 * 2 resulting free entries, need to add one. 429 */ 430 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 431 return error; 432 XFS_WANT_CORRUPTED_RETURN(i == 0); 433 if ((error = xfs_btree_insert(bno_cur, &i))) 434 return error; 435 XFS_WANT_CORRUPTED_RETURN(i == 1); 436 } 437 return 0; 438 } 439 440 static bool 441 xfs_agfl_verify( 442 struct xfs_buf *bp) 443 { 444 struct xfs_mount *mp = bp->b_target->bt_mount; 445 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 446 int i; 447 448 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid)) 449 return false; 450 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) 451 return false; 452 /* 453 * during growfs operations, the perag is not fully initialised, 454 * so we can't use it for any useful checking. growfs ensures we can't 455 * use it by using uncached buffers that don't have the perag attached 456 * so we can detect and avoid this problem. 457 */ 458 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 459 return false; 460 461 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { 462 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 463 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 464 return false; 465 } 466 return true; 467 } 468 469 static void 470 xfs_agfl_read_verify( 471 struct xfs_buf *bp) 472 { 473 struct xfs_mount *mp = bp->b_target->bt_mount; 474 475 /* 476 * There is no verification of non-crc AGFLs because mkfs does not 477 * initialise the AGFL to zero or NULL. Hence the only valid part of the 478 * AGFL is what the AGF says is active. We can't get to the AGF, so we 479 * can't verify just those entries are valid. 480 */ 481 if (!xfs_sb_version_hascrc(&mp->m_sb)) 482 return; 483 484 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 485 xfs_buf_ioerror(bp, -EFSBADCRC); 486 else if (!xfs_agfl_verify(bp)) 487 xfs_buf_ioerror(bp, -EFSCORRUPTED); 488 489 if (bp->b_error) 490 xfs_verifier_error(bp); 491 } 492 493 static void 494 xfs_agfl_write_verify( 495 struct xfs_buf *bp) 496 { 497 struct xfs_mount *mp = bp->b_target->bt_mount; 498 struct xfs_buf_log_item *bip = bp->b_fspriv; 499 500 /* no verification of non-crc AGFLs */ 501 if (!xfs_sb_version_hascrc(&mp->m_sb)) 502 return; 503 504 if (!xfs_agfl_verify(bp)) { 505 xfs_buf_ioerror(bp, -EFSCORRUPTED); 506 xfs_verifier_error(bp); 507 return; 508 } 509 510 if (bip) 511 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 512 513 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); 514 } 515 516 const struct xfs_buf_ops xfs_agfl_buf_ops = { 517 .verify_read = xfs_agfl_read_verify, 518 .verify_write = xfs_agfl_write_verify, 519 }; 520 521 /* 522 * Read in the allocation group free block array. 523 */ 524 STATIC int /* error */ 525 xfs_alloc_read_agfl( 526 xfs_mount_t *mp, /* mount point structure */ 527 xfs_trans_t *tp, /* transaction pointer */ 528 xfs_agnumber_t agno, /* allocation group number */ 529 xfs_buf_t **bpp) /* buffer for the ag free block array */ 530 { 531 xfs_buf_t *bp; /* return value */ 532 int error; 533 534 ASSERT(agno != NULLAGNUMBER); 535 error = xfs_trans_read_buf( 536 mp, tp, mp->m_ddev_targp, 537 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), 538 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); 539 if (error) 540 return error; 541 xfs_buf_set_ref(bp, XFS_AGFL_REF); 542 *bpp = bp; 543 return 0; 544 } 545 546 STATIC int 547 xfs_alloc_update_counters( 548 struct xfs_trans *tp, 549 struct xfs_perag *pag, 550 struct xfs_buf *agbp, 551 long len) 552 { 553 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 554 555 pag->pagf_freeblks += len; 556 be32_add_cpu(&agf->agf_freeblks, len); 557 558 xfs_trans_agblocks_delta(tp, len); 559 if (unlikely(be32_to_cpu(agf->agf_freeblks) > 560 be32_to_cpu(agf->agf_length))) 561 return -EFSCORRUPTED; 562 563 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 564 return 0; 565 } 566 567 /* 568 * Allocation group level functions. 569 */ 570 571 /* 572 * Allocate a variable extent in the allocation group agno. 573 * Type and bno are used to determine where in the allocation group the 574 * extent will start. 575 * Extent's length (returned in *len) will be between minlen and maxlen, 576 * and of the form k * prod + mod unless there's nothing that large. 577 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 578 */ 579 STATIC int /* error */ 580 xfs_alloc_ag_vextent( 581 xfs_alloc_arg_t *args) /* argument structure for allocation */ 582 { 583 int error=0; 584 585 ASSERT(args->minlen > 0); 586 ASSERT(args->maxlen > 0); 587 ASSERT(args->minlen <= args->maxlen); 588 ASSERT(args->mod < args->prod); 589 ASSERT(args->alignment > 0); 590 /* 591 * Branch to correct routine based on the type. 592 */ 593 args->wasfromfl = 0; 594 switch (args->type) { 595 case XFS_ALLOCTYPE_THIS_AG: 596 error = xfs_alloc_ag_vextent_size(args); 597 break; 598 case XFS_ALLOCTYPE_NEAR_BNO: 599 error = xfs_alloc_ag_vextent_near(args); 600 break; 601 case XFS_ALLOCTYPE_THIS_BNO: 602 error = xfs_alloc_ag_vextent_exact(args); 603 break; 604 default: 605 ASSERT(0); 606 /* NOTREACHED */ 607 } 608 609 if (error || args->agbno == NULLAGBLOCK) 610 return error; 611 612 ASSERT(args->len >= args->minlen); 613 ASSERT(args->len <= args->maxlen); 614 ASSERT(!args->wasfromfl || !args->isfl); 615 ASSERT(args->agbno % args->alignment == 0); 616 617 if (!args->wasfromfl) { 618 error = xfs_alloc_update_counters(args->tp, args->pag, 619 args->agbp, 620 -((long)(args->len))); 621 if (error) 622 return error; 623 624 ASSERT(!xfs_extent_busy_search(args->mp, args->agno, 625 args->agbno, args->len)); 626 } 627 628 if (!args->isfl) { 629 xfs_trans_mod_sb(args->tp, args->wasdel ? 630 XFS_TRANS_SB_RES_FDBLOCKS : 631 XFS_TRANS_SB_FDBLOCKS, 632 -((long)(args->len))); 633 } 634 635 XFS_STATS_INC(xs_allocx); 636 XFS_STATS_ADD(xs_allocb, args->len); 637 return error; 638 } 639 640 /* 641 * Allocate a variable extent at exactly agno/bno. 642 * Extent's length (returned in *len) will be between minlen and maxlen, 643 * and of the form k * prod + mod unless there's nothing that large. 644 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. 645 */ 646 STATIC int /* error */ 647 xfs_alloc_ag_vextent_exact( 648 xfs_alloc_arg_t *args) /* allocation argument structure */ 649 { 650 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 651 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 652 int error; 653 xfs_agblock_t fbno; /* start block of found extent */ 654 xfs_extlen_t flen; /* length of found extent */ 655 xfs_agblock_t tbno; /* start block of trimmed extent */ 656 xfs_extlen_t tlen; /* length of trimmed extent */ 657 xfs_agblock_t tend; /* end block of trimmed extent */ 658 int i; /* success/failure of operation */ 659 660 ASSERT(args->alignment == 1); 661 662 /* 663 * Allocate/initialize a cursor for the by-number freespace btree. 664 */ 665 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 666 args->agno, XFS_BTNUM_BNO); 667 668 /* 669 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 670 * Look for the closest free block <= bno, it must contain bno 671 * if any free block does. 672 */ 673 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); 674 if (error) 675 goto error0; 676 if (!i) 677 goto not_found; 678 679 /* 680 * Grab the freespace record. 681 */ 682 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); 683 if (error) 684 goto error0; 685 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 686 ASSERT(fbno <= args->agbno); 687 688 /* 689 * Check for overlapping busy extents. 690 */ 691 xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); 692 693 /* 694 * Give up if the start of the extent is busy, or the freespace isn't 695 * long enough for the minimum request. 696 */ 697 if (tbno > args->agbno) 698 goto not_found; 699 if (tlen < args->minlen) 700 goto not_found; 701 tend = tbno + tlen; 702 if (tend < args->agbno + args->minlen) 703 goto not_found; 704 705 /* 706 * End of extent will be smaller of the freespace end and the 707 * maximal requested end. 708 * 709 * Fix the length according to mod and prod if given. 710 */ 711 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) 712 - args->agbno; 713 xfs_alloc_fix_len(args); 714 if (!xfs_alloc_fix_minleft(args)) 715 goto not_found; 716 717 ASSERT(args->agbno + args->len <= tend); 718 719 /* 720 * We are allocating agbno for args->len 721 * Allocate/initialize a cursor for the by-size btree. 722 */ 723 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 724 args->agno, XFS_BTNUM_CNT); 725 ASSERT(args->agbno + args->len <= 726 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 727 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, 728 args->len, XFSA_FIXUP_BNO_OK); 729 if (error) { 730 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 731 goto error0; 732 } 733 734 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 735 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 736 737 args->wasfromfl = 0; 738 trace_xfs_alloc_exact_done(args); 739 return 0; 740 741 not_found: 742 /* Didn't find it, return null. */ 743 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 744 args->agbno = NULLAGBLOCK; 745 trace_xfs_alloc_exact_notfound(args); 746 return 0; 747 748 error0: 749 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 750 trace_xfs_alloc_exact_error(args); 751 return error; 752 } 753 754 /* 755 * Search the btree in a given direction via the search cursor and compare 756 * the records found against the good extent we've already found. 757 */ 758 STATIC int 759 xfs_alloc_find_best_extent( 760 struct xfs_alloc_arg *args, /* allocation argument structure */ 761 struct xfs_btree_cur **gcur, /* good cursor */ 762 struct xfs_btree_cur **scur, /* searching cursor */ 763 xfs_agblock_t gdiff, /* difference for search comparison */ 764 xfs_agblock_t *sbno, /* extent found by search */ 765 xfs_extlen_t *slen, /* extent length */ 766 xfs_agblock_t *sbnoa, /* aligned extent found by search */ 767 xfs_extlen_t *slena, /* aligned extent length */ 768 int dir) /* 0 = search right, 1 = search left */ 769 { 770 xfs_agblock_t new; 771 xfs_agblock_t sdiff; 772 int error; 773 int i; 774 775 /* The good extent is perfect, no need to search. */ 776 if (!gdiff) 777 goto out_use_good; 778 779 /* 780 * Look until we find a better one, run out of space or run off the end. 781 */ 782 do { 783 error = xfs_alloc_get_rec(*scur, sbno, slen, &i); 784 if (error) 785 goto error0; 786 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 787 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); 788 789 /* 790 * The good extent is closer than this one. 791 */ 792 if (!dir) { 793 if (*sbnoa >= args->agbno + gdiff) 794 goto out_use_good; 795 } else { 796 if (*sbnoa <= args->agbno - gdiff) 797 goto out_use_good; 798 } 799 800 /* 801 * Same distance, compare length and pick the best. 802 */ 803 if (*slena >= args->minlen) { 804 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); 805 xfs_alloc_fix_len(args); 806 807 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 808 args->alignment, 809 args->userdata, *sbnoa, 810 *slena, &new); 811 812 /* 813 * Choose closer size and invalidate other cursor. 814 */ 815 if (sdiff < gdiff) 816 goto out_use_search; 817 goto out_use_good; 818 } 819 820 if (!dir) 821 error = xfs_btree_increment(*scur, 0, &i); 822 else 823 error = xfs_btree_decrement(*scur, 0, &i); 824 if (error) 825 goto error0; 826 } while (i); 827 828 out_use_good: 829 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); 830 *scur = NULL; 831 return 0; 832 833 out_use_search: 834 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); 835 *gcur = NULL; 836 return 0; 837 838 error0: 839 /* caller invalidates cursors */ 840 return error; 841 } 842 843 /* 844 * Allocate a variable extent near bno in the allocation group agno. 845 * Extent's length (returned in len) will be between minlen and maxlen, 846 * and of the form k * prod + mod unless there's nothing that large. 847 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 848 */ 849 STATIC int /* error */ 850 xfs_alloc_ag_vextent_near( 851 xfs_alloc_arg_t *args) /* allocation argument structure */ 852 { 853 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 854 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 855 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 856 xfs_agblock_t gtbno; /* start bno of right side entry */ 857 xfs_agblock_t gtbnoa; /* aligned ... */ 858 xfs_extlen_t gtdiff; /* difference to right side entry */ 859 xfs_extlen_t gtlen; /* length of right side entry */ 860 xfs_extlen_t gtlena; /* aligned ... */ 861 xfs_agblock_t gtnew; /* useful start bno of right side */ 862 int error; /* error code */ 863 int i; /* result code, temporary */ 864 int j; /* result code, temporary */ 865 xfs_agblock_t ltbno; /* start bno of left side entry */ 866 xfs_agblock_t ltbnoa; /* aligned ... */ 867 xfs_extlen_t ltdiff; /* difference to left side entry */ 868 xfs_extlen_t ltlen; /* length of left side entry */ 869 xfs_extlen_t ltlena; /* aligned ... */ 870 xfs_agblock_t ltnew; /* useful start bno of left side */ 871 xfs_extlen_t rlen; /* length of returned extent */ 872 int forced = 0; 873 #ifdef DEBUG 874 /* 875 * Randomly don't execute the first algorithm. 876 */ 877 int dofirst; /* set to do first algorithm */ 878 879 dofirst = prandom_u32() & 1; 880 #endif 881 882 restart: 883 bno_cur_lt = NULL; 884 bno_cur_gt = NULL; 885 ltlen = 0; 886 gtlena = 0; 887 ltlena = 0; 888 889 /* 890 * Get a cursor for the by-size btree. 891 */ 892 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 893 args->agno, XFS_BTNUM_CNT); 894 895 /* 896 * See if there are any free extents as big as maxlen. 897 */ 898 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) 899 goto error0; 900 /* 901 * If none, then pick up the last entry in the tree unless the 902 * tree is empty. 903 */ 904 if (!i) { 905 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, 906 <len, &i))) 907 goto error0; 908 if (i == 0 || ltlen == 0) { 909 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 910 trace_xfs_alloc_near_noentry(args); 911 return 0; 912 } 913 ASSERT(i == 1); 914 } 915 args->wasfromfl = 0; 916 917 /* 918 * First algorithm. 919 * If the requested extent is large wrt the freespaces available 920 * in this a.g., then the cursor will be pointing to a btree entry 921 * near the right edge of the tree. If it's in the last btree leaf 922 * block, then we just examine all the entries in that block 923 * that are big enough, and pick the best one. 924 * This is written as a while loop so we can break out of it, 925 * but we never loop back to the top. 926 */ 927 while (xfs_btree_islastblock(cnt_cur, 0)) { 928 xfs_extlen_t bdiff; 929 int besti=0; 930 xfs_extlen_t blen=0; 931 xfs_agblock_t bnew=0; 932 933 #ifdef DEBUG 934 if (dofirst) 935 break; 936 #endif 937 /* 938 * Start from the entry that lookup found, sequence through 939 * all larger free blocks. If we're actually pointing at a 940 * record smaller than maxlen, go to the start of this block, 941 * and skip all those smaller than minlen. 942 */ 943 if (ltlen || args->alignment > 1) { 944 cnt_cur->bc_ptrs[0] = 1; 945 do { 946 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, 947 <len, &i))) 948 goto error0; 949 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 950 if (ltlen >= args->minlen) 951 break; 952 if ((error = xfs_btree_increment(cnt_cur, 0, &i))) 953 goto error0; 954 } while (i); 955 ASSERT(ltlen >= args->minlen); 956 if (!i) 957 break; 958 } 959 i = cnt_cur->bc_ptrs[0]; 960 for (j = 1, blen = 0, bdiff = 0; 961 !error && j && (blen < args->maxlen || bdiff > 0); 962 error = xfs_btree_increment(cnt_cur, 0, &j)) { 963 /* 964 * For each entry, decide if it's better than 965 * the previous best entry. 966 */ 967 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 968 goto error0; 969 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 970 xfs_alloc_compute_aligned(args, ltbno, ltlen, 971 <bnoa, <lena); 972 if (ltlena < args->minlen) 973 continue; 974 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 975 xfs_alloc_fix_len(args); 976 ASSERT(args->len >= args->minlen); 977 if (args->len < blen) 978 continue; 979 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 980 args->alignment, args->userdata, ltbnoa, 981 ltlena, <new); 982 if (ltnew != NULLAGBLOCK && 983 (args->len > blen || ltdiff < bdiff)) { 984 bdiff = ltdiff; 985 bnew = ltnew; 986 blen = args->len; 987 besti = cnt_cur->bc_ptrs[0]; 988 } 989 } 990 /* 991 * It didn't work. We COULD be in a case where 992 * there's a good record somewhere, so try again. 993 */ 994 if (blen == 0) 995 break; 996 /* 997 * Point at the best entry, and retrieve it again. 998 */ 999 cnt_cur->bc_ptrs[0] = besti; 1000 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1001 goto error0; 1002 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1003 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1004 args->len = blen; 1005 if (!xfs_alloc_fix_minleft(args)) { 1006 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1007 trace_xfs_alloc_near_nominleft(args); 1008 return 0; 1009 } 1010 blen = args->len; 1011 /* 1012 * We are allocating starting at bnew for blen blocks. 1013 */ 1014 args->agbno = bnew; 1015 ASSERT(bnew >= ltbno); 1016 ASSERT(bnew + blen <= ltbno + ltlen); 1017 /* 1018 * Set up a cursor for the by-bno tree. 1019 */ 1020 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, 1021 args->agbp, args->agno, XFS_BTNUM_BNO); 1022 /* 1023 * Fix up the btree entries. 1024 */ 1025 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, 1026 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) 1027 goto error0; 1028 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1029 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1030 1031 trace_xfs_alloc_near_first(args); 1032 return 0; 1033 } 1034 /* 1035 * Second algorithm. 1036 * Search in the by-bno tree to the left and to the right 1037 * simultaneously, until in each case we find a space big enough, 1038 * or run into the edge of the tree. When we run into the edge, 1039 * we deallocate that cursor. 1040 * If both searches succeed, we compare the two spaces and pick 1041 * the better one. 1042 * With alignment, it's possible for both to fail; the upper 1043 * level algorithm that picks allocation groups for allocations 1044 * is not supposed to do this. 1045 */ 1046 /* 1047 * Allocate and initialize the cursor for the leftward search. 1048 */ 1049 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1050 args->agno, XFS_BTNUM_BNO); 1051 /* 1052 * Lookup <= bno to find the leftward search's starting point. 1053 */ 1054 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) 1055 goto error0; 1056 if (!i) { 1057 /* 1058 * Didn't find anything; use this cursor for the rightward 1059 * search. 1060 */ 1061 bno_cur_gt = bno_cur_lt; 1062 bno_cur_lt = NULL; 1063 } 1064 /* 1065 * Found something. Duplicate the cursor for the rightward search. 1066 */ 1067 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) 1068 goto error0; 1069 /* 1070 * Increment the cursor, so we will point at the entry just right 1071 * of the leftward entry if any, or to the leftmost entry. 1072 */ 1073 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1074 goto error0; 1075 if (!i) { 1076 /* 1077 * It failed, there are no rightward entries. 1078 */ 1079 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); 1080 bno_cur_gt = NULL; 1081 } 1082 /* 1083 * Loop going left with the leftward cursor, right with the 1084 * rightward cursor, until either both directions give up or 1085 * we find an entry at least as big as minlen. 1086 */ 1087 do { 1088 if (bno_cur_lt) { 1089 if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) 1090 goto error0; 1091 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1092 xfs_alloc_compute_aligned(args, ltbno, ltlen, 1093 <bnoa, <lena); 1094 if (ltlena >= args->minlen) 1095 break; 1096 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1097 goto error0; 1098 if (!i) { 1099 xfs_btree_del_cursor(bno_cur_lt, 1100 XFS_BTREE_NOERROR); 1101 bno_cur_lt = NULL; 1102 } 1103 } 1104 if (bno_cur_gt) { 1105 if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) 1106 goto error0; 1107 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1108 xfs_alloc_compute_aligned(args, gtbno, gtlen, 1109 >bnoa, >lena); 1110 if (gtlena >= args->minlen) 1111 break; 1112 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1113 goto error0; 1114 if (!i) { 1115 xfs_btree_del_cursor(bno_cur_gt, 1116 XFS_BTREE_NOERROR); 1117 bno_cur_gt = NULL; 1118 } 1119 } 1120 } while (bno_cur_lt || bno_cur_gt); 1121 1122 /* 1123 * Got both cursors still active, need to find better entry. 1124 */ 1125 if (bno_cur_lt && bno_cur_gt) { 1126 if (ltlena >= args->minlen) { 1127 /* 1128 * Left side is good, look for a right side entry. 1129 */ 1130 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1131 xfs_alloc_fix_len(args); 1132 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1133 args->alignment, args->userdata, ltbnoa, 1134 ltlena, <new); 1135 1136 error = xfs_alloc_find_best_extent(args, 1137 &bno_cur_lt, &bno_cur_gt, 1138 ltdiff, >bno, >len, 1139 >bnoa, >lena, 1140 0 /* search right */); 1141 } else { 1142 ASSERT(gtlena >= args->minlen); 1143 1144 /* 1145 * Right side is good, look for a left side entry. 1146 */ 1147 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1148 xfs_alloc_fix_len(args); 1149 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1150 args->alignment, args->userdata, gtbnoa, 1151 gtlena, >new); 1152 1153 error = xfs_alloc_find_best_extent(args, 1154 &bno_cur_gt, &bno_cur_lt, 1155 gtdiff, <bno, <len, 1156 <bnoa, <lena, 1157 1 /* search left */); 1158 } 1159 1160 if (error) 1161 goto error0; 1162 } 1163 1164 /* 1165 * If we couldn't get anything, give up. 1166 */ 1167 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1168 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1169 1170 if (!forced++) { 1171 trace_xfs_alloc_near_busy(args); 1172 xfs_log_force(args->mp, XFS_LOG_SYNC); 1173 goto restart; 1174 } 1175 trace_xfs_alloc_size_neither(args); 1176 args->agbno = NULLAGBLOCK; 1177 return 0; 1178 } 1179 1180 /* 1181 * At this point we have selected a freespace entry, either to the 1182 * left or to the right. If it's on the right, copy all the 1183 * useful variables to the "left" set so we only have one 1184 * copy of this code. 1185 */ 1186 if (bno_cur_gt) { 1187 bno_cur_lt = bno_cur_gt; 1188 bno_cur_gt = NULL; 1189 ltbno = gtbno; 1190 ltbnoa = gtbnoa; 1191 ltlen = gtlen; 1192 ltlena = gtlena; 1193 j = 1; 1194 } else 1195 j = 0; 1196 1197 /* 1198 * Fix up the length and compute the useful address. 1199 */ 1200 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1201 xfs_alloc_fix_len(args); 1202 if (!xfs_alloc_fix_minleft(args)) { 1203 trace_xfs_alloc_near_nominleft(args); 1204 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1205 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1206 return 0; 1207 } 1208 rlen = args->len; 1209 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, 1210 args->userdata, ltbnoa, ltlena, <new); 1211 ASSERT(ltnew >= ltbno); 1212 ASSERT(ltnew + rlen <= ltbnoa + ltlena); 1213 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1214 args->agbno = ltnew; 1215 1216 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1217 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1218 goto error0; 1219 1220 if (j) 1221 trace_xfs_alloc_near_greater(args); 1222 else 1223 trace_xfs_alloc_near_lesser(args); 1224 1225 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1226 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1227 return 0; 1228 1229 error0: 1230 trace_xfs_alloc_near_error(args); 1231 if (cnt_cur != NULL) 1232 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1233 if (bno_cur_lt != NULL) 1234 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); 1235 if (bno_cur_gt != NULL) 1236 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); 1237 return error; 1238 } 1239 1240 /* 1241 * Allocate a variable extent anywhere in the allocation group agno. 1242 * Extent's length (returned in len) will be between minlen and maxlen, 1243 * and of the form k * prod + mod unless there's nothing that large. 1244 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1245 */ 1246 STATIC int /* error */ 1247 xfs_alloc_ag_vextent_size( 1248 xfs_alloc_arg_t *args) /* allocation argument structure */ 1249 { 1250 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ 1251 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ 1252 int error; /* error result */ 1253 xfs_agblock_t fbno; /* start of found freespace */ 1254 xfs_extlen_t flen; /* length of found freespace */ 1255 int i; /* temp status variable */ 1256 xfs_agblock_t rbno; /* returned block number */ 1257 xfs_extlen_t rlen; /* length of returned extent */ 1258 int forced = 0; 1259 1260 restart: 1261 /* 1262 * Allocate and initialize a cursor for the by-size btree. 1263 */ 1264 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1265 args->agno, XFS_BTNUM_CNT); 1266 bno_cur = NULL; 1267 1268 /* 1269 * Look for an entry >= maxlen+alignment-1 blocks. 1270 */ 1271 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1272 args->maxlen + args->alignment - 1, &i))) 1273 goto error0; 1274 1275 /* 1276 * If none or we have busy extents that we cannot allocate from, then 1277 * we have to settle for a smaller extent. In the case that there are 1278 * no large extents, this will return the last entry in the tree unless 1279 * the tree is empty. In the case that there are only busy large 1280 * extents, this will return the largest small extent unless there 1281 * are no smaller extents available. 1282 */ 1283 if (!i || forced > 1) { 1284 error = xfs_alloc_ag_vextent_small(args, cnt_cur, 1285 &fbno, &flen, &i); 1286 if (error) 1287 goto error0; 1288 if (i == 0 || flen == 0) { 1289 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1290 trace_xfs_alloc_size_noentry(args); 1291 return 0; 1292 } 1293 ASSERT(i == 1); 1294 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); 1295 } else { 1296 /* 1297 * Search for a non-busy extent that is large enough. 1298 * If we are at low space, don't check, or if we fall of 1299 * the end of the btree, turn off the busy check and 1300 * restart. 1301 */ 1302 for (;;) { 1303 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1304 if (error) 1305 goto error0; 1306 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1307 1308 xfs_alloc_compute_aligned(args, fbno, flen, 1309 &rbno, &rlen); 1310 1311 if (rlen >= args->maxlen) 1312 break; 1313 1314 error = xfs_btree_increment(cnt_cur, 0, &i); 1315 if (error) 1316 goto error0; 1317 if (i == 0) { 1318 /* 1319 * Our only valid extents must have been busy. 1320 * Make it unbusy by forcing the log out and 1321 * retrying. If we've been here before, forcing 1322 * the log isn't making the extents available, 1323 * which means they have probably been freed in 1324 * this transaction. In that case, we have to 1325 * give up on them and we'll attempt a minlen 1326 * allocation the next time around. 1327 */ 1328 xfs_btree_del_cursor(cnt_cur, 1329 XFS_BTREE_NOERROR); 1330 trace_xfs_alloc_size_busy(args); 1331 if (!forced++) 1332 xfs_log_force(args->mp, XFS_LOG_SYNC); 1333 goto restart; 1334 } 1335 } 1336 } 1337 1338 /* 1339 * In the first case above, we got the last entry in the 1340 * by-size btree. Now we check to see if the space hits maxlen 1341 * once aligned; if not, we search left for something better. 1342 * This can't happen in the second case above. 1343 */ 1344 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1345 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1346 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1347 if (rlen < args->maxlen) { 1348 xfs_agblock_t bestfbno; 1349 xfs_extlen_t bestflen; 1350 xfs_agblock_t bestrbno; 1351 xfs_extlen_t bestrlen; 1352 1353 bestrlen = rlen; 1354 bestrbno = rbno; 1355 bestflen = flen; 1356 bestfbno = fbno; 1357 for (;;) { 1358 if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) 1359 goto error0; 1360 if (i == 0) 1361 break; 1362 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1363 &i))) 1364 goto error0; 1365 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1366 if (flen < bestrlen) 1367 break; 1368 xfs_alloc_compute_aligned(args, fbno, flen, 1369 &rbno, &rlen); 1370 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1371 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1372 (rlen <= flen && rbno + rlen <= fbno + flen), 1373 error0); 1374 if (rlen > bestrlen) { 1375 bestrlen = rlen; 1376 bestrbno = rbno; 1377 bestflen = flen; 1378 bestfbno = fbno; 1379 if (rlen == args->maxlen) 1380 break; 1381 } 1382 } 1383 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1384 &i))) 1385 goto error0; 1386 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1387 rlen = bestrlen; 1388 rbno = bestrbno; 1389 flen = bestflen; 1390 fbno = bestfbno; 1391 } 1392 args->wasfromfl = 0; 1393 /* 1394 * Fix up the length. 1395 */ 1396 args->len = rlen; 1397 if (rlen < args->minlen) { 1398 if (!forced++) { 1399 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1400 trace_xfs_alloc_size_busy(args); 1401 xfs_log_force(args->mp, XFS_LOG_SYNC); 1402 goto restart; 1403 } 1404 goto out_nominleft; 1405 } 1406 xfs_alloc_fix_len(args); 1407 1408 if (!xfs_alloc_fix_minleft(args)) 1409 goto out_nominleft; 1410 rlen = args->len; 1411 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1412 /* 1413 * Allocate and initialize a cursor for the by-block tree. 1414 */ 1415 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1416 args->agno, XFS_BTNUM_BNO); 1417 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1418 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1419 goto error0; 1420 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1421 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1422 cnt_cur = bno_cur = NULL; 1423 args->len = rlen; 1424 args->agbno = rbno; 1425 XFS_WANT_CORRUPTED_GOTO( 1426 args->agbno + args->len <= 1427 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1428 error0); 1429 trace_xfs_alloc_size_done(args); 1430 return 0; 1431 1432 error0: 1433 trace_xfs_alloc_size_error(args); 1434 if (cnt_cur) 1435 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1436 if (bno_cur) 1437 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1438 return error; 1439 1440 out_nominleft: 1441 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1442 trace_xfs_alloc_size_nominleft(args); 1443 args->agbno = NULLAGBLOCK; 1444 return 0; 1445 } 1446 1447 /* 1448 * Deal with the case where only small freespaces remain. 1449 * Either return the contents of the last freespace record, 1450 * or allocate space from the freelist if there is nothing in the tree. 1451 */ 1452 STATIC int /* error */ 1453 xfs_alloc_ag_vextent_small( 1454 xfs_alloc_arg_t *args, /* allocation argument structure */ 1455 xfs_btree_cur_t *ccur, /* by-size cursor */ 1456 xfs_agblock_t *fbnop, /* result block number */ 1457 xfs_extlen_t *flenp, /* result length */ 1458 int *stat) /* status: 0-freelist, 1-normal/none */ 1459 { 1460 int error; 1461 xfs_agblock_t fbno; 1462 xfs_extlen_t flen; 1463 int i; 1464 1465 if ((error = xfs_btree_decrement(ccur, 0, &i))) 1466 goto error0; 1467 if (i) { 1468 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1469 goto error0; 1470 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1471 } 1472 /* 1473 * Nothing in the btree, try the freelist. Make sure 1474 * to respect minleft even when pulling from the 1475 * freelist. 1476 */ 1477 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && 1478 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1479 > args->minleft)) { 1480 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1481 if (error) 1482 goto error0; 1483 if (fbno != NULLAGBLOCK) { 1484 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 1485 args->userdata); 1486 1487 if (args->userdata) { 1488 xfs_buf_t *bp; 1489 1490 bp = xfs_btree_get_bufs(args->mp, args->tp, 1491 args->agno, fbno, 0); 1492 xfs_trans_binval(args->tp, bp); 1493 } 1494 args->len = 1; 1495 args->agbno = fbno; 1496 XFS_WANT_CORRUPTED_GOTO( 1497 args->agbno + args->len <= 1498 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1499 error0); 1500 args->wasfromfl = 1; 1501 trace_xfs_alloc_small_freelist(args); 1502 *stat = 0; 1503 return 0; 1504 } 1505 /* 1506 * Nothing in the freelist. 1507 */ 1508 else 1509 flen = 0; 1510 } 1511 /* 1512 * Can't allocate from the freelist for some reason. 1513 */ 1514 else { 1515 fbno = NULLAGBLOCK; 1516 flen = 0; 1517 } 1518 /* 1519 * Can't do the allocation, give up. 1520 */ 1521 if (flen < args->minlen) { 1522 args->agbno = NULLAGBLOCK; 1523 trace_xfs_alloc_small_notenough(args); 1524 flen = 0; 1525 } 1526 *fbnop = fbno; 1527 *flenp = flen; 1528 *stat = 1; 1529 trace_xfs_alloc_small_done(args); 1530 return 0; 1531 1532 error0: 1533 trace_xfs_alloc_small_error(args); 1534 return error; 1535 } 1536 1537 /* 1538 * Free the extent starting at agno/bno for length. 1539 */ 1540 STATIC int /* error */ 1541 xfs_free_ag_extent( 1542 xfs_trans_t *tp, /* transaction pointer */ 1543 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 1544 xfs_agnumber_t agno, /* allocation group number */ 1545 xfs_agblock_t bno, /* starting block number */ 1546 xfs_extlen_t len, /* length of extent */ 1547 int isfl) /* set if is freelist blocks - no sb acctg */ 1548 { 1549 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1550 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1551 int error; /* error return value */ 1552 xfs_agblock_t gtbno; /* start of right neighbor block */ 1553 xfs_extlen_t gtlen; /* length of right neighbor block */ 1554 int haveleft; /* have a left neighbor block */ 1555 int haveright; /* have a right neighbor block */ 1556 int i; /* temp, result code */ 1557 xfs_agblock_t ltbno; /* start of left neighbor block */ 1558 xfs_extlen_t ltlen; /* length of left neighbor block */ 1559 xfs_mount_t *mp; /* mount point struct for filesystem */ 1560 xfs_agblock_t nbno; /* new starting block of freespace */ 1561 xfs_extlen_t nlen; /* new length of freespace */ 1562 xfs_perag_t *pag; /* per allocation group data */ 1563 1564 mp = tp->t_mountp; 1565 /* 1566 * Allocate and initialize a cursor for the by-block btree. 1567 */ 1568 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1569 cnt_cur = NULL; 1570 /* 1571 * Look for a neighboring block on the left (lower block numbers) 1572 * that is contiguous with this space. 1573 */ 1574 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) 1575 goto error0; 1576 if (haveleft) { 1577 /* 1578 * There is a block to our left. 1579 */ 1580 if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) 1581 goto error0; 1582 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1583 /* 1584 * It's not contiguous, though. 1585 */ 1586 if (ltbno + ltlen < bno) 1587 haveleft = 0; 1588 else { 1589 /* 1590 * If this failure happens the request to free this 1591 * space was invalid, it's (partly) already free. 1592 * Very bad. 1593 */ 1594 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0); 1595 } 1596 } 1597 /* 1598 * Look for a neighboring block on the right (higher block numbers) 1599 * that is contiguous with this space. 1600 */ 1601 if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) 1602 goto error0; 1603 if (haveright) { 1604 /* 1605 * There is a block to our right. 1606 */ 1607 if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) 1608 goto error0; 1609 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1610 /* 1611 * It's not contiguous, though. 1612 */ 1613 if (bno + len < gtbno) 1614 haveright = 0; 1615 else { 1616 /* 1617 * If this failure happens the request to free this 1618 * space was invalid, it's (partly) already free. 1619 * Very bad. 1620 */ 1621 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0); 1622 } 1623 } 1624 /* 1625 * Now allocate and initialize a cursor for the by-size tree. 1626 */ 1627 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); 1628 /* 1629 * Have both left and right contiguous neighbors. 1630 * Merge all three into a single free block. 1631 */ 1632 if (haveleft && haveright) { 1633 /* 1634 * Delete the old by-size entry on the left. 1635 */ 1636 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1637 goto error0; 1638 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1639 if ((error = xfs_btree_delete(cnt_cur, &i))) 1640 goto error0; 1641 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1642 /* 1643 * Delete the old by-size entry on the right. 1644 */ 1645 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1646 goto error0; 1647 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1648 if ((error = xfs_btree_delete(cnt_cur, &i))) 1649 goto error0; 1650 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1651 /* 1652 * Delete the old by-block entry for the right block. 1653 */ 1654 if ((error = xfs_btree_delete(bno_cur, &i))) 1655 goto error0; 1656 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1657 /* 1658 * Move the by-block cursor back to the left neighbor. 1659 */ 1660 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1661 goto error0; 1662 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1663 #ifdef DEBUG 1664 /* 1665 * Check that this is the right record: delete didn't 1666 * mangle the cursor. 1667 */ 1668 { 1669 xfs_agblock_t xxbno; 1670 xfs_extlen_t xxlen; 1671 1672 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1673 &i))) 1674 goto error0; 1675 XFS_WANT_CORRUPTED_GOTO( 1676 i == 1 && xxbno == ltbno && xxlen == ltlen, 1677 error0); 1678 } 1679 #endif 1680 /* 1681 * Update remaining by-block entry to the new, joined block. 1682 */ 1683 nbno = ltbno; 1684 nlen = len + ltlen + gtlen; 1685 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1686 goto error0; 1687 } 1688 /* 1689 * Have only a left contiguous neighbor. 1690 * Merge it together with the new freespace. 1691 */ 1692 else if (haveleft) { 1693 /* 1694 * Delete the old by-size entry on the left. 1695 */ 1696 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1697 goto error0; 1698 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1699 if ((error = xfs_btree_delete(cnt_cur, &i))) 1700 goto error0; 1701 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1702 /* 1703 * Back up the by-block cursor to the left neighbor, and 1704 * update its length. 1705 */ 1706 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1707 goto error0; 1708 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1709 nbno = ltbno; 1710 nlen = len + ltlen; 1711 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1712 goto error0; 1713 } 1714 /* 1715 * Have only a right contiguous neighbor. 1716 * Merge it together with the new freespace. 1717 */ 1718 else if (haveright) { 1719 /* 1720 * Delete the old by-size entry on the right. 1721 */ 1722 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1723 goto error0; 1724 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1725 if ((error = xfs_btree_delete(cnt_cur, &i))) 1726 goto error0; 1727 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1728 /* 1729 * Update the starting block and length of the right 1730 * neighbor in the by-block tree. 1731 */ 1732 nbno = bno; 1733 nlen = len + gtlen; 1734 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1735 goto error0; 1736 } 1737 /* 1738 * No contiguous neighbors. 1739 * Insert the new freespace into the by-block tree. 1740 */ 1741 else { 1742 nbno = bno; 1743 nlen = len; 1744 if ((error = xfs_btree_insert(bno_cur, &i))) 1745 goto error0; 1746 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1747 } 1748 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1749 bno_cur = NULL; 1750 /* 1751 * In all cases we need to insert the new freespace in the by-size tree. 1752 */ 1753 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1754 goto error0; 1755 XFS_WANT_CORRUPTED_GOTO(i == 0, error0); 1756 if ((error = xfs_btree_insert(cnt_cur, &i))) 1757 goto error0; 1758 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 1759 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1760 cnt_cur = NULL; 1761 1762 /* 1763 * Update the freespace totals in the ag and superblock. 1764 */ 1765 pag = xfs_perag_get(mp, agno); 1766 error = xfs_alloc_update_counters(tp, pag, agbp, len); 1767 xfs_perag_put(pag); 1768 if (error) 1769 goto error0; 1770 1771 if (!isfl) 1772 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1773 XFS_STATS_INC(xs_freex); 1774 XFS_STATS_ADD(xs_freeb, len); 1775 1776 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1777 1778 return 0; 1779 1780 error0: 1781 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); 1782 if (bno_cur) 1783 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1784 if (cnt_cur) 1785 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1786 return error; 1787 } 1788 1789 /* 1790 * Visible (exported) allocation/free functions. 1791 * Some of these are used just by xfs_alloc_btree.c and this file. 1792 */ 1793 1794 /* 1795 * Compute and fill in value of m_ag_maxlevels. 1796 */ 1797 void 1798 xfs_alloc_compute_maxlevels( 1799 xfs_mount_t *mp) /* file system mount structure */ 1800 { 1801 int level; 1802 uint maxblocks; 1803 uint maxleafents; 1804 int minleafrecs; 1805 int minnoderecs; 1806 1807 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; 1808 minleafrecs = mp->m_alloc_mnr[0]; 1809 minnoderecs = mp->m_alloc_mnr[1]; 1810 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1811 for (level = 1; maxblocks > 1; level++) 1812 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1813 mp->m_ag_maxlevels = level; 1814 } 1815 1816 /* 1817 * Find the length of the longest extent in an AG. 1818 */ 1819 xfs_extlen_t 1820 xfs_alloc_longest_free_extent( 1821 struct xfs_mount *mp, 1822 struct xfs_perag *pag) 1823 { 1824 xfs_extlen_t need, delta = 0; 1825 1826 need = XFS_MIN_FREELIST_PAG(pag, mp); 1827 if (need > pag->pagf_flcount) 1828 delta = need - pag->pagf_flcount; 1829 1830 if (pag->pagf_longest > delta) 1831 return pag->pagf_longest - delta; 1832 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 1833 } 1834 1835 /* 1836 * Decide whether to use this allocation group for this allocation. 1837 * If so, fix up the btree freelist's size. 1838 */ 1839 STATIC int /* error */ 1840 xfs_alloc_fix_freelist( 1841 xfs_alloc_arg_t *args, /* allocation argument structure */ 1842 int flags) /* XFS_ALLOC_FLAG_... */ 1843 { 1844 xfs_buf_t *agbp; /* agf buffer pointer */ 1845 xfs_agf_t *agf; /* a.g. freespace structure pointer */ 1846 xfs_buf_t *agflbp;/* agfl buffer pointer */ 1847 xfs_agblock_t bno; /* freelist block */ 1848 xfs_extlen_t delta; /* new blocks needed in freelist */ 1849 int error; /* error result code */ 1850 xfs_extlen_t longest;/* longest extent in allocation group */ 1851 xfs_mount_t *mp; /* file system mount point structure */ 1852 xfs_extlen_t need; /* total blocks needed in freelist */ 1853 xfs_perag_t *pag; /* per-ag information structure */ 1854 xfs_alloc_arg_t targs; /* local allocation arguments */ 1855 xfs_trans_t *tp; /* transaction pointer */ 1856 1857 mp = args->mp; 1858 1859 pag = args->pag; 1860 tp = args->tp; 1861 if (!pag->pagf_init) { 1862 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, 1863 &agbp))) 1864 return error; 1865 if (!pag->pagf_init) { 1866 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1867 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1868 args->agbp = NULL; 1869 return 0; 1870 } 1871 } else 1872 agbp = NULL; 1873 1874 /* 1875 * If this is a metadata preferred pag and we are user data 1876 * then try somewhere else if we are not being asked to 1877 * try harder at this point 1878 */ 1879 if (pag->pagf_metadata && args->userdata && 1880 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 1881 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1882 args->agbp = NULL; 1883 return 0; 1884 } 1885 1886 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1887 /* 1888 * If it looks like there isn't a long enough extent, or enough 1889 * total blocks, reject it. 1890 */ 1891 need = XFS_MIN_FREELIST_PAG(pag, mp); 1892 longest = xfs_alloc_longest_free_extent(mp, pag); 1893 if ((args->minlen + args->alignment + args->minalignslop - 1) > 1894 longest || 1895 ((int)(pag->pagf_freeblks + pag->pagf_flcount - 1896 need - args->total) < (int)args->minleft)) { 1897 if (agbp) 1898 xfs_trans_brelse(tp, agbp); 1899 args->agbp = NULL; 1900 return 0; 1901 } 1902 } 1903 1904 /* 1905 * Get the a.g. freespace buffer. 1906 * Can fail if we're not blocking on locks, and it's held. 1907 */ 1908 if (agbp == NULL) { 1909 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags, 1910 &agbp))) 1911 return error; 1912 if (agbp == NULL) { 1913 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1914 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1915 args->agbp = NULL; 1916 return 0; 1917 } 1918 } 1919 /* 1920 * Figure out how many blocks we should have in the freelist. 1921 */ 1922 agf = XFS_BUF_TO_AGF(agbp); 1923 need = XFS_MIN_FREELIST(agf, mp); 1924 /* 1925 * If there isn't enough total or single-extent, reject it. 1926 */ 1927 if (!(flags & XFS_ALLOC_FLAG_FREEING)) { 1928 delta = need > be32_to_cpu(agf->agf_flcount) ? 1929 (need - be32_to_cpu(agf->agf_flcount)) : 0; 1930 longest = be32_to_cpu(agf->agf_longest); 1931 longest = (longest > delta) ? (longest - delta) : 1932 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); 1933 if ((args->minlen + args->alignment + args->minalignslop - 1) > 1934 longest || 1935 ((int)(be32_to_cpu(agf->agf_freeblks) + 1936 be32_to_cpu(agf->agf_flcount) - need - args->total) < 1937 (int)args->minleft)) { 1938 xfs_trans_brelse(tp, agbp); 1939 args->agbp = NULL; 1940 return 0; 1941 } 1942 } 1943 /* 1944 * Make the freelist shorter if it's too long. 1945 */ 1946 while (be32_to_cpu(agf->agf_flcount) > need) { 1947 xfs_buf_t *bp; 1948 1949 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 1950 if (error) 1951 return error; 1952 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1))) 1953 return error; 1954 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 1955 xfs_trans_binval(tp, bp); 1956 } 1957 /* 1958 * Initialize the args structure. 1959 */ 1960 memset(&targs, 0, sizeof(targs)); 1961 targs.tp = tp; 1962 targs.mp = mp; 1963 targs.agbp = agbp; 1964 targs.agno = args->agno; 1965 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 1966 targs.type = XFS_ALLOCTYPE_THIS_AG; 1967 targs.pag = pag; 1968 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp))) 1969 return error; 1970 /* 1971 * Make the freelist longer if it's too short. 1972 */ 1973 while (be32_to_cpu(agf->agf_flcount) < need) { 1974 targs.agbno = 0; 1975 targs.maxlen = need - be32_to_cpu(agf->agf_flcount); 1976 /* 1977 * Allocate as many blocks as possible at once. 1978 */ 1979 if ((error = xfs_alloc_ag_vextent(&targs))) { 1980 xfs_trans_brelse(tp, agflbp); 1981 return error; 1982 } 1983 /* 1984 * Stop if we run out. Won't happen if callers are obeying 1985 * the restrictions correctly. Can happen for free calls 1986 * on a completely full ag. 1987 */ 1988 if (targs.agbno == NULLAGBLOCK) { 1989 if (flags & XFS_ALLOC_FLAG_FREEING) 1990 break; 1991 xfs_trans_brelse(tp, agflbp); 1992 args->agbp = NULL; 1993 return 0; 1994 } 1995 /* 1996 * Put each allocated block on the list. 1997 */ 1998 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 1999 error = xfs_alloc_put_freelist(tp, agbp, 2000 agflbp, bno, 0); 2001 if (error) 2002 return error; 2003 } 2004 } 2005 xfs_trans_brelse(tp, agflbp); 2006 args->agbp = agbp; 2007 return 0; 2008 } 2009 2010 /* 2011 * Get a block from the freelist. 2012 * Returns with the buffer for the block gotten. 2013 */ 2014 int /* error */ 2015 xfs_alloc_get_freelist( 2016 xfs_trans_t *tp, /* transaction pointer */ 2017 xfs_buf_t *agbp, /* buffer containing the agf structure */ 2018 xfs_agblock_t *bnop, /* block address retrieved from freelist */ 2019 int btreeblk) /* destination is a AGF btree */ 2020 { 2021 xfs_agf_t *agf; /* a.g. freespace structure */ 2022 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2023 xfs_agblock_t bno; /* block number returned */ 2024 __be32 *agfl_bno; 2025 int error; 2026 int logflags; 2027 xfs_mount_t *mp = tp->t_mountp; 2028 xfs_perag_t *pag; /* per allocation group data */ 2029 2030 /* 2031 * Freelist is empty, give up. 2032 */ 2033 agf = XFS_BUF_TO_AGF(agbp); 2034 if (!agf->agf_flcount) { 2035 *bnop = NULLAGBLOCK; 2036 return 0; 2037 } 2038 /* 2039 * Read the array of free blocks. 2040 */ 2041 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), 2042 &agflbp); 2043 if (error) 2044 return error; 2045 2046 2047 /* 2048 * Get the block number and update the data structures. 2049 */ 2050 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2051 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2052 be32_add_cpu(&agf->agf_flfirst, 1); 2053 xfs_trans_brelse(tp, agflbp); 2054 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 2055 agf->agf_flfirst = 0; 2056 2057 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2058 be32_add_cpu(&agf->agf_flcount, -1); 2059 xfs_trans_agflist_delta(tp, -1); 2060 pag->pagf_flcount--; 2061 xfs_perag_put(pag); 2062 2063 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 2064 if (btreeblk) { 2065 be32_add_cpu(&agf->agf_btreeblks, 1); 2066 pag->pagf_btreeblks++; 2067 logflags |= XFS_AGF_BTREEBLKS; 2068 } 2069 2070 xfs_alloc_log_agf(tp, agbp, logflags); 2071 *bnop = bno; 2072 2073 return 0; 2074 } 2075 2076 /* 2077 * Log the given fields from the agf structure. 2078 */ 2079 void 2080 xfs_alloc_log_agf( 2081 xfs_trans_t *tp, /* transaction pointer */ 2082 xfs_buf_t *bp, /* buffer for a.g. freelist header */ 2083 int fields) /* mask of fields to be logged (XFS_AGF_...) */ 2084 { 2085 int first; /* first byte offset */ 2086 int last; /* last byte offset */ 2087 static const short offsets[] = { 2088 offsetof(xfs_agf_t, agf_magicnum), 2089 offsetof(xfs_agf_t, agf_versionnum), 2090 offsetof(xfs_agf_t, agf_seqno), 2091 offsetof(xfs_agf_t, agf_length), 2092 offsetof(xfs_agf_t, agf_roots[0]), 2093 offsetof(xfs_agf_t, agf_levels[0]), 2094 offsetof(xfs_agf_t, agf_flfirst), 2095 offsetof(xfs_agf_t, agf_fllast), 2096 offsetof(xfs_agf_t, agf_flcount), 2097 offsetof(xfs_agf_t, agf_freeblks), 2098 offsetof(xfs_agf_t, agf_longest), 2099 offsetof(xfs_agf_t, agf_btreeblks), 2100 offsetof(xfs_agf_t, agf_uuid), 2101 sizeof(xfs_agf_t) 2102 }; 2103 2104 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2105 2106 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); 2107 2108 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2109 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2110 } 2111 2112 /* 2113 * Interface for inode allocation to force the pag data to be initialized. 2114 */ 2115 int /* error */ 2116 xfs_alloc_pagf_init( 2117 xfs_mount_t *mp, /* file system mount structure */ 2118 xfs_trans_t *tp, /* transaction pointer */ 2119 xfs_agnumber_t agno, /* allocation group number */ 2120 int flags) /* XFS_ALLOC_FLAGS_... */ 2121 { 2122 xfs_buf_t *bp; 2123 int error; 2124 2125 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) 2126 return error; 2127 if (bp) 2128 xfs_trans_brelse(tp, bp); 2129 return 0; 2130 } 2131 2132 /* 2133 * Put the block on the freelist for the allocation group. 2134 */ 2135 int /* error */ 2136 xfs_alloc_put_freelist( 2137 xfs_trans_t *tp, /* transaction pointer */ 2138 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2139 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2140 xfs_agblock_t bno, /* block being freed */ 2141 int btreeblk) /* block came from a AGF btree */ 2142 { 2143 xfs_agf_t *agf; /* a.g. freespace structure */ 2144 __be32 *blockp;/* pointer to array entry */ 2145 int error; 2146 int logflags; 2147 xfs_mount_t *mp; /* mount structure */ 2148 xfs_perag_t *pag; /* per allocation group data */ 2149 __be32 *agfl_bno; 2150 int startoff; 2151 2152 agf = XFS_BUF_TO_AGF(agbp); 2153 mp = tp->t_mountp; 2154 2155 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2156 be32_to_cpu(agf->agf_seqno), &agflbp))) 2157 return error; 2158 be32_add_cpu(&agf->agf_fllast, 1); 2159 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2160 agf->agf_fllast = 0; 2161 2162 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2163 be32_add_cpu(&agf->agf_flcount, 1); 2164 xfs_trans_agflist_delta(tp, 1); 2165 pag->pagf_flcount++; 2166 2167 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; 2168 if (btreeblk) { 2169 be32_add_cpu(&agf->agf_btreeblks, -1); 2170 pag->pagf_btreeblks--; 2171 logflags |= XFS_AGF_BTREEBLKS; 2172 } 2173 xfs_perag_put(pag); 2174 2175 xfs_alloc_log_agf(tp, agbp, logflags); 2176 2177 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2178 2179 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2180 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2181 *blockp = cpu_to_be32(bno); 2182 startoff = (char *)blockp - (char *)agflbp->b_addr; 2183 2184 xfs_alloc_log_agf(tp, agbp, logflags); 2185 2186 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); 2187 xfs_trans_log_buf(tp, agflbp, startoff, 2188 startoff + sizeof(xfs_agblock_t) - 1); 2189 return 0; 2190 } 2191 2192 static bool 2193 xfs_agf_verify( 2194 struct xfs_mount *mp, 2195 struct xfs_buf *bp) 2196 { 2197 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2198 2199 if (xfs_sb_version_hascrc(&mp->m_sb) && 2200 !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid)) 2201 return false; 2202 2203 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2204 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2205 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2206 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2207 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2208 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) 2209 return false; 2210 2211 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2212 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2213 return false; 2214 2215 /* 2216 * during growfs operations, the perag is not fully initialised, 2217 * so we can't use it for any useful checking. growfs ensures we can't 2218 * use it by using uncached buffers that don't have the perag attached 2219 * so we can detect and avoid this problem. 2220 */ 2221 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2222 return false; 2223 2224 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2225 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2226 return false; 2227 2228 return true;; 2229 2230 } 2231 2232 static void 2233 xfs_agf_read_verify( 2234 struct xfs_buf *bp) 2235 { 2236 struct xfs_mount *mp = bp->b_target->bt_mount; 2237 2238 if (xfs_sb_version_hascrc(&mp->m_sb) && 2239 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2240 xfs_buf_ioerror(bp, -EFSBADCRC); 2241 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, 2242 XFS_ERRTAG_ALLOC_READ_AGF, 2243 XFS_RANDOM_ALLOC_READ_AGF)) 2244 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2245 2246 if (bp->b_error) 2247 xfs_verifier_error(bp); 2248 } 2249 2250 static void 2251 xfs_agf_write_verify( 2252 struct xfs_buf *bp) 2253 { 2254 struct xfs_mount *mp = bp->b_target->bt_mount; 2255 struct xfs_buf_log_item *bip = bp->b_fspriv; 2256 2257 if (!xfs_agf_verify(mp, bp)) { 2258 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2259 xfs_verifier_error(bp); 2260 return; 2261 } 2262 2263 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2264 return; 2265 2266 if (bip) 2267 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2268 2269 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); 2270 } 2271 2272 const struct xfs_buf_ops xfs_agf_buf_ops = { 2273 .verify_read = xfs_agf_read_verify, 2274 .verify_write = xfs_agf_write_verify, 2275 }; 2276 2277 /* 2278 * Read in the allocation group header (free/alloc section). 2279 */ 2280 int /* error */ 2281 xfs_read_agf( 2282 struct xfs_mount *mp, /* mount point structure */ 2283 struct xfs_trans *tp, /* transaction pointer */ 2284 xfs_agnumber_t agno, /* allocation group number */ 2285 int flags, /* XFS_BUF_ */ 2286 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2287 { 2288 int error; 2289 2290 trace_xfs_read_agf(mp, agno); 2291 2292 ASSERT(agno != NULLAGNUMBER); 2293 error = xfs_trans_read_buf( 2294 mp, tp, mp->m_ddev_targp, 2295 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2296 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); 2297 if (error) 2298 return error; 2299 if (!*bpp) 2300 return 0; 2301 2302 ASSERT(!(*bpp)->b_error); 2303 xfs_buf_set_ref(*bpp, XFS_AGF_REF); 2304 return 0; 2305 } 2306 2307 /* 2308 * Read in the allocation group header (free/alloc section). 2309 */ 2310 int /* error */ 2311 xfs_alloc_read_agf( 2312 struct xfs_mount *mp, /* mount point structure */ 2313 struct xfs_trans *tp, /* transaction pointer */ 2314 xfs_agnumber_t agno, /* allocation group number */ 2315 int flags, /* XFS_ALLOC_FLAG_... */ 2316 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2317 { 2318 struct xfs_agf *agf; /* ag freelist header */ 2319 struct xfs_perag *pag; /* per allocation group data */ 2320 int error; 2321 2322 trace_xfs_alloc_read_agf(mp, agno); 2323 2324 ASSERT(agno != NULLAGNUMBER); 2325 error = xfs_read_agf(mp, tp, agno, 2326 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, 2327 bpp); 2328 if (error) 2329 return error; 2330 if (!*bpp) 2331 return 0; 2332 ASSERT(!(*bpp)->b_error); 2333 2334 agf = XFS_BUF_TO_AGF(*bpp); 2335 pag = xfs_perag_get(mp, agno); 2336 if (!pag->pagf_init) { 2337 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2338 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2339 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2340 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2341 pag->pagf_levels[XFS_BTNUM_BNOi] = 2342 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2343 pag->pagf_levels[XFS_BTNUM_CNTi] = 2344 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2345 spin_lock_init(&pag->pagb_lock); 2346 pag->pagb_count = 0; 2347 pag->pagb_tree = RB_ROOT; 2348 pag->pagf_init = 1; 2349 } 2350 #ifdef DEBUG 2351 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2352 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2353 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); 2354 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2355 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2356 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2357 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); 2358 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == 2359 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2360 } 2361 #endif 2362 xfs_perag_put(pag); 2363 return 0; 2364 } 2365 2366 /* 2367 * Allocate an extent (variable-size). 2368 * Depending on the allocation type, we either look in a single allocation 2369 * group or loop over the allocation groups to find the result. 2370 */ 2371 int /* error */ 2372 xfs_alloc_vextent( 2373 xfs_alloc_arg_t *args) /* allocation argument structure */ 2374 { 2375 xfs_agblock_t agsize; /* allocation group size */ 2376 int error; 2377 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2378 xfs_extlen_t minleft;/* minimum left value, temp copy */ 2379 xfs_mount_t *mp; /* mount structure pointer */ 2380 xfs_agnumber_t sagno; /* starting allocation group number */ 2381 xfs_alloctype_t type; /* input allocation type */ 2382 int bump_rotor = 0; 2383 int no_min = 0; 2384 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ 2385 2386 mp = args->mp; 2387 type = args->otype = args->type; 2388 args->agbno = NULLAGBLOCK; 2389 /* 2390 * Just fix this up, for the case where the last a.g. is shorter 2391 * (or there's only one a.g.) and the caller couldn't easily figure 2392 * that out (xfs_bmap_alloc). 2393 */ 2394 agsize = mp->m_sb.sb_agblocks; 2395 if (args->maxlen > agsize) 2396 args->maxlen = agsize; 2397 if (args->alignment == 0) 2398 args->alignment = 1; 2399 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); 2400 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); 2401 ASSERT(args->minlen <= args->maxlen); 2402 ASSERT(args->minlen <= agsize); 2403 ASSERT(args->mod < args->prod); 2404 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || 2405 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || 2406 args->minlen > args->maxlen || args->minlen > agsize || 2407 args->mod >= args->prod) { 2408 args->fsbno = NULLFSBLOCK; 2409 trace_xfs_alloc_vextent_badargs(args); 2410 return 0; 2411 } 2412 minleft = args->minleft; 2413 2414 switch (type) { 2415 case XFS_ALLOCTYPE_THIS_AG: 2416 case XFS_ALLOCTYPE_NEAR_BNO: 2417 case XFS_ALLOCTYPE_THIS_BNO: 2418 /* 2419 * These three force us into a single a.g. 2420 */ 2421 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2422 args->pag = xfs_perag_get(mp, args->agno); 2423 args->minleft = 0; 2424 error = xfs_alloc_fix_freelist(args, 0); 2425 args->minleft = minleft; 2426 if (error) { 2427 trace_xfs_alloc_vextent_nofix(args); 2428 goto error0; 2429 } 2430 if (!args->agbp) { 2431 trace_xfs_alloc_vextent_noagbp(args); 2432 break; 2433 } 2434 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2435 if ((error = xfs_alloc_ag_vextent(args))) 2436 goto error0; 2437 break; 2438 case XFS_ALLOCTYPE_START_BNO: 2439 /* 2440 * Try near allocation first, then anywhere-in-ag after 2441 * the first a.g. fails. 2442 */ 2443 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && 2444 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2445 args->fsbno = XFS_AGB_TO_FSB(mp, 2446 ((mp->m_agfrotor / rotorstep) % 2447 mp->m_sb.sb_agcount), 0); 2448 bump_rotor = 1; 2449 } 2450 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2451 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2452 /* FALLTHROUGH */ 2453 case XFS_ALLOCTYPE_ANY_AG: 2454 case XFS_ALLOCTYPE_START_AG: 2455 case XFS_ALLOCTYPE_FIRST_AG: 2456 /* 2457 * Rotate through the allocation groups looking for a winner. 2458 */ 2459 if (type == XFS_ALLOCTYPE_ANY_AG) { 2460 /* 2461 * Start with the last place we left off. 2462 */ 2463 args->agno = sagno = (mp->m_agfrotor / rotorstep) % 2464 mp->m_sb.sb_agcount; 2465 args->type = XFS_ALLOCTYPE_THIS_AG; 2466 flags = XFS_ALLOC_FLAG_TRYLOCK; 2467 } else if (type == XFS_ALLOCTYPE_FIRST_AG) { 2468 /* 2469 * Start with allocation group given by bno. 2470 */ 2471 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2472 args->type = XFS_ALLOCTYPE_THIS_AG; 2473 sagno = 0; 2474 flags = 0; 2475 } else { 2476 if (type == XFS_ALLOCTYPE_START_AG) 2477 args->type = XFS_ALLOCTYPE_THIS_AG; 2478 /* 2479 * Start with the given allocation group. 2480 */ 2481 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2482 flags = XFS_ALLOC_FLAG_TRYLOCK; 2483 } 2484 /* 2485 * Loop over allocation groups twice; first time with 2486 * trylock set, second time without. 2487 */ 2488 for (;;) { 2489 args->pag = xfs_perag_get(mp, args->agno); 2490 if (no_min) args->minleft = 0; 2491 error = xfs_alloc_fix_freelist(args, flags); 2492 args->minleft = minleft; 2493 if (error) { 2494 trace_xfs_alloc_vextent_nofix(args); 2495 goto error0; 2496 } 2497 /* 2498 * If we get a buffer back then the allocation will fly. 2499 */ 2500 if (args->agbp) { 2501 if ((error = xfs_alloc_ag_vextent(args))) 2502 goto error0; 2503 break; 2504 } 2505 2506 trace_xfs_alloc_vextent_loopfailed(args); 2507 2508 /* 2509 * Didn't work, figure out the next iteration. 2510 */ 2511 if (args->agno == sagno && 2512 type == XFS_ALLOCTYPE_START_BNO) 2513 args->type = XFS_ALLOCTYPE_THIS_AG; 2514 /* 2515 * For the first allocation, we can try any AG to get 2516 * space. However, if we already have allocated a 2517 * block, we don't want to try AGs whose number is below 2518 * sagno. Otherwise, we may end up with out-of-order 2519 * locking of AGF, which might cause deadlock. 2520 */ 2521 if (++(args->agno) == mp->m_sb.sb_agcount) { 2522 if (args->firstblock != NULLFSBLOCK) 2523 args->agno = sagno; 2524 else 2525 args->agno = 0; 2526 } 2527 /* 2528 * Reached the starting a.g., must either be done 2529 * or switch to non-trylock mode. 2530 */ 2531 if (args->agno == sagno) { 2532 if (no_min == 1) { 2533 args->agbno = NULLAGBLOCK; 2534 trace_xfs_alloc_vextent_allfailed(args); 2535 break; 2536 } 2537 if (flags == 0) { 2538 no_min = 1; 2539 } else { 2540 flags = 0; 2541 if (type == XFS_ALLOCTYPE_START_BNO) { 2542 args->agbno = XFS_FSB_TO_AGBNO(mp, 2543 args->fsbno); 2544 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2545 } 2546 } 2547 } 2548 xfs_perag_put(args->pag); 2549 } 2550 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { 2551 if (args->agno == sagno) 2552 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2553 (mp->m_sb.sb_agcount * rotorstep); 2554 else 2555 mp->m_agfrotor = (args->agno * rotorstep + 1) % 2556 (mp->m_sb.sb_agcount * rotorstep); 2557 } 2558 break; 2559 default: 2560 ASSERT(0); 2561 /* NOTREACHED */ 2562 } 2563 if (args->agbno == NULLAGBLOCK) 2564 args->fsbno = NULLFSBLOCK; 2565 else { 2566 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); 2567 #ifdef DEBUG 2568 ASSERT(args->len >= args->minlen); 2569 ASSERT(args->len <= args->maxlen); 2570 ASSERT(args->agbno % args->alignment == 0); 2571 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2572 args->len); 2573 #endif 2574 } 2575 xfs_perag_put(args->pag); 2576 return 0; 2577 error0: 2578 xfs_perag_put(args->pag); 2579 return error; 2580 } 2581 2582 /* 2583 * Free an extent. 2584 * Just break up the extent address and hand off to xfs_free_ag_extent 2585 * after fixing up the freelist. 2586 */ 2587 int /* error */ 2588 xfs_free_extent( 2589 xfs_trans_t *tp, /* transaction pointer */ 2590 xfs_fsblock_t bno, /* starting block number of extent */ 2591 xfs_extlen_t len) /* length of extent */ 2592 { 2593 xfs_alloc_arg_t args; 2594 int error; 2595 2596 ASSERT(len != 0); 2597 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2598 args.tp = tp; 2599 args.mp = tp->t_mountp; 2600 2601 /* 2602 * validate that the block number is legal - the enables us to detect 2603 * and handle a silent filesystem corruption rather than crashing. 2604 */ 2605 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2606 if (args.agno >= args.mp->m_sb.sb_agcount) 2607 return -EFSCORRUPTED; 2608 2609 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2610 if (args.agbno >= args.mp->m_sb.sb_agblocks) 2611 return -EFSCORRUPTED; 2612 2613 args.pag = xfs_perag_get(args.mp, args.agno); 2614 ASSERT(args.pag); 2615 2616 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); 2617 if (error) 2618 goto error0; 2619 2620 /* validate the extent size is legal now we have the agf locked */ 2621 if (args.agbno + len > 2622 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { 2623 error = -EFSCORRUPTED; 2624 goto error0; 2625 } 2626 2627 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2628 if (!error) 2629 xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); 2630 error0: 2631 xfs_perag_put(args.pag); 2632 return error; 2633 } 2634