1 /* 2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_format.h" 21 #include "xfs_log_format.h" 22 #include "xfs_shared.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_bit.h" 25 #include "xfs_sb.h" 26 #include "xfs_mount.h" 27 #include "xfs_inode.h" 28 #include "xfs_btree.h" 29 #include "xfs_alloc_btree.h" 30 #include "xfs_alloc.h" 31 #include "xfs_extent_busy.h" 32 #include "xfs_error.h" 33 #include "xfs_cksum.h" 34 #include "xfs_trace.h" 35 #include "xfs_trans.h" 36 #include "xfs_buf_item.h" 37 #include "xfs_log.h" 38 39 struct workqueue_struct *xfs_alloc_wq; 40 41 #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) 42 43 #define XFSA_FIXUP_BNO_OK 1 44 #define XFSA_FIXUP_CNT_OK 2 45 46 STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 47 STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 48 STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 49 STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 50 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 51 52 /* 53 * Lookup the record equal to [bno, len] in the btree given by cur. 54 */ 55 STATIC int /* error */ 56 xfs_alloc_lookup_eq( 57 struct xfs_btree_cur *cur, /* btree cursor */ 58 xfs_agblock_t bno, /* starting block of extent */ 59 xfs_extlen_t len, /* length of extent */ 60 int *stat) /* success/failure */ 61 { 62 cur->bc_rec.a.ar_startblock = bno; 63 cur->bc_rec.a.ar_blockcount = len; 64 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); 65 } 66 67 /* 68 * Lookup the first record greater than or equal to [bno, len] 69 * in the btree given by cur. 70 */ 71 int /* error */ 72 xfs_alloc_lookup_ge( 73 struct xfs_btree_cur *cur, /* btree cursor */ 74 xfs_agblock_t bno, /* starting block of extent */ 75 xfs_extlen_t len, /* length of extent */ 76 int *stat) /* success/failure */ 77 { 78 cur->bc_rec.a.ar_startblock = bno; 79 cur->bc_rec.a.ar_blockcount = len; 80 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); 81 } 82 83 /* 84 * Lookup the first record less than or equal to [bno, len] 85 * in the btree given by cur. 86 */ 87 int /* error */ 88 xfs_alloc_lookup_le( 89 struct xfs_btree_cur *cur, /* btree cursor */ 90 xfs_agblock_t bno, /* starting block of extent */ 91 xfs_extlen_t len, /* length of extent */ 92 int *stat) /* success/failure */ 93 { 94 cur->bc_rec.a.ar_startblock = bno; 95 cur->bc_rec.a.ar_blockcount = len; 96 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); 97 } 98 99 /* 100 * Update the record referred to by cur to the value given 101 * by [bno, len]. 102 * This either works (return 0) or gets an EFSCORRUPTED error. 103 */ 104 STATIC int /* error */ 105 xfs_alloc_update( 106 struct xfs_btree_cur *cur, /* btree cursor */ 107 xfs_agblock_t bno, /* starting block of extent */ 108 xfs_extlen_t len) /* length of extent */ 109 { 110 union xfs_btree_rec rec; 111 112 rec.alloc.ar_startblock = cpu_to_be32(bno); 113 rec.alloc.ar_blockcount = cpu_to_be32(len); 114 return xfs_btree_update(cur, &rec); 115 } 116 117 /* 118 * Get the data from the pointed-to record. 119 */ 120 int /* error */ 121 xfs_alloc_get_rec( 122 struct xfs_btree_cur *cur, /* btree cursor */ 123 xfs_agblock_t *bno, /* output: starting block of extent */ 124 xfs_extlen_t *len, /* output: length of extent */ 125 int *stat) /* output: success/failure */ 126 { 127 union xfs_btree_rec *rec; 128 int error; 129 130 error = xfs_btree_get_rec(cur, &rec, stat); 131 if (!error && *stat == 1) { 132 *bno = be32_to_cpu(rec->alloc.ar_startblock); 133 *len = be32_to_cpu(rec->alloc.ar_blockcount); 134 } 135 return error; 136 } 137 138 /* 139 * Compute aligned version of the found extent. 140 * Takes alignment and min length into account. 141 */ 142 STATIC void 143 xfs_alloc_compute_aligned( 144 xfs_alloc_arg_t *args, /* allocation argument structure */ 145 xfs_agblock_t foundbno, /* starting block in found extent */ 146 xfs_extlen_t foundlen, /* length in found extent */ 147 xfs_agblock_t *resbno, /* result block number */ 148 xfs_extlen_t *reslen) /* result length */ 149 { 150 xfs_agblock_t bno; 151 xfs_extlen_t len; 152 xfs_extlen_t diff; 153 154 /* Trim busy sections out of found extent */ 155 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); 156 157 /* 158 * If we have a largish extent that happens to start before min_agbno, 159 * see if we can shift it into range... 160 */ 161 if (bno < args->min_agbno && bno + len > args->min_agbno) { 162 diff = args->min_agbno - bno; 163 if (len > diff) { 164 bno += diff; 165 len -= diff; 166 } 167 } 168 169 if (args->alignment > 1 && len >= args->minlen) { 170 xfs_agblock_t aligned_bno = roundup(bno, args->alignment); 171 172 diff = aligned_bno - bno; 173 174 *resbno = aligned_bno; 175 *reslen = diff >= len ? 0 : len - diff; 176 } else { 177 *resbno = bno; 178 *reslen = len; 179 } 180 } 181 182 /* 183 * Compute best start block and diff for "near" allocations. 184 * freelen >= wantlen already checked by caller. 185 */ 186 STATIC xfs_extlen_t /* difference value (absolute) */ 187 xfs_alloc_compute_diff( 188 xfs_agblock_t wantbno, /* target starting block */ 189 xfs_extlen_t wantlen, /* target length */ 190 xfs_extlen_t alignment, /* target alignment */ 191 char userdata, /* are we allocating data? */ 192 xfs_agblock_t freebno, /* freespace's starting block */ 193 xfs_extlen_t freelen, /* freespace's length */ 194 xfs_agblock_t *newbnop) /* result: best start block from free */ 195 { 196 xfs_agblock_t freeend; /* end of freespace extent */ 197 xfs_agblock_t newbno1; /* return block number */ 198 xfs_agblock_t newbno2; /* other new block number */ 199 xfs_extlen_t newlen1=0; /* length with newbno1 */ 200 xfs_extlen_t newlen2=0; /* length with newbno2 */ 201 xfs_agblock_t wantend; /* end of target extent */ 202 203 ASSERT(freelen >= wantlen); 204 freeend = freebno + freelen; 205 wantend = wantbno + wantlen; 206 /* 207 * We want to allocate from the start of a free extent if it is past 208 * the desired block or if we are allocating user data and the free 209 * extent is before desired block. The second case is there to allow 210 * for contiguous allocation from the remaining free space if the file 211 * grows in the short term. 212 */ 213 if (freebno >= wantbno || (userdata && freeend < wantend)) { 214 if ((newbno1 = roundup(freebno, alignment)) >= freeend) 215 newbno1 = NULLAGBLOCK; 216 } else if (freeend >= wantend && alignment > 1) { 217 newbno1 = roundup(wantbno, alignment); 218 newbno2 = newbno1 - alignment; 219 if (newbno1 >= freeend) 220 newbno1 = NULLAGBLOCK; 221 else 222 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1); 223 if (newbno2 < freebno) 224 newbno2 = NULLAGBLOCK; 225 else 226 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2); 227 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) { 228 if (newlen1 < newlen2 || 229 (newlen1 == newlen2 && 230 XFS_ABSDIFF(newbno1, wantbno) > 231 XFS_ABSDIFF(newbno2, wantbno))) 232 newbno1 = newbno2; 233 } else if (newbno2 != NULLAGBLOCK) 234 newbno1 = newbno2; 235 } else if (freeend >= wantend) { 236 newbno1 = wantbno; 237 } else if (alignment > 1) { 238 newbno1 = roundup(freeend - wantlen, alignment); 239 if (newbno1 > freeend - wantlen && 240 newbno1 - alignment >= freebno) 241 newbno1 -= alignment; 242 else if (newbno1 >= freeend) 243 newbno1 = NULLAGBLOCK; 244 } else 245 newbno1 = freeend - wantlen; 246 *newbnop = newbno1; 247 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno); 248 } 249 250 /* 251 * Fix up the length, based on mod and prod. 252 * len should be k * prod + mod for some k. 253 * If len is too small it is returned unchanged. 254 * If len hits maxlen it is left alone. 255 */ 256 STATIC void 257 xfs_alloc_fix_len( 258 xfs_alloc_arg_t *args) /* allocation argument structure */ 259 { 260 xfs_extlen_t k; 261 xfs_extlen_t rlen; 262 263 ASSERT(args->mod < args->prod); 264 rlen = args->len; 265 ASSERT(rlen >= args->minlen); 266 ASSERT(rlen <= args->maxlen); 267 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen || 268 (args->mod == 0 && rlen < args->prod)) 269 return; 270 k = rlen % args->prod; 271 if (k == args->mod) 272 return; 273 if (k > args->mod) 274 rlen = rlen - (k - args->mod); 275 else 276 rlen = rlen - args->prod + (args->mod - k); 277 /* casts to (int) catch length underflows */ 278 if ((int)rlen < (int)args->minlen) 279 return; 280 ASSERT(rlen >= args->minlen && rlen <= args->maxlen); 281 ASSERT(rlen % args->prod == args->mod); 282 args->len = rlen; 283 } 284 285 /* 286 * Fix up length if there is too little space left in the a.g. 287 * Return 1 if ok, 0 if too little, should give up. 288 */ 289 STATIC int 290 xfs_alloc_fix_minleft( 291 xfs_alloc_arg_t *args) /* allocation argument structure */ 292 { 293 xfs_agf_t *agf; /* a.g. freelist header */ 294 int diff; /* free space difference */ 295 296 if (args->minleft == 0) 297 return 1; 298 agf = XFS_BUF_TO_AGF(args->agbp); 299 diff = be32_to_cpu(agf->agf_freeblks) 300 - args->len - args->minleft; 301 if (diff >= 0) 302 return 1; 303 args->len += diff; /* shrink the allocated space */ 304 /* casts to (int) catch length underflows */ 305 if ((int)args->len >= (int)args->minlen) 306 return 1; 307 args->agbno = NULLAGBLOCK; 308 return 0; 309 } 310 311 /* 312 * Update the two btrees, logically removing from freespace the extent 313 * starting at rbno, rlen blocks. The extent is contained within the 314 * actual (current) free extent fbno for flen blocks. 315 * Flags are passed in indicating whether the cursors are set to the 316 * relevant records. 317 */ 318 STATIC int /* error code */ 319 xfs_alloc_fixup_trees( 320 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */ 321 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */ 322 xfs_agblock_t fbno, /* starting block of free extent */ 323 xfs_extlen_t flen, /* length of free extent */ 324 xfs_agblock_t rbno, /* starting block of returned extent */ 325 xfs_extlen_t rlen, /* length of returned extent */ 326 int flags) /* flags, XFSA_FIXUP_... */ 327 { 328 int error; /* error code */ 329 int i; /* operation results */ 330 xfs_agblock_t nfbno1; /* first new free startblock */ 331 xfs_agblock_t nfbno2; /* second new free startblock */ 332 xfs_extlen_t nflen1=0; /* first new free length */ 333 xfs_extlen_t nflen2=0; /* second new free length */ 334 struct xfs_mount *mp; 335 336 mp = cnt_cur->bc_mp; 337 338 /* 339 * Look up the record in the by-size tree if necessary. 340 */ 341 if (flags & XFSA_FIXUP_CNT_OK) { 342 #ifdef DEBUG 343 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i))) 344 return error; 345 XFS_WANT_CORRUPTED_RETURN(mp, 346 i == 1 && nfbno1 == fbno && nflen1 == flen); 347 #endif 348 } else { 349 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) 350 return error; 351 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 352 } 353 /* 354 * Look up the record in the by-block tree if necessary. 355 */ 356 if (flags & XFSA_FIXUP_BNO_OK) { 357 #ifdef DEBUG 358 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i))) 359 return error; 360 XFS_WANT_CORRUPTED_RETURN(mp, 361 i == 1 && nfbno1 == fbno && nflen1 == flen); 362 #endif 363 } else { 364 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) 365 return error; 366 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 367 } 368 369 #ifdef DEBUG 370 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { 371 struct xfs_btree_block *bnoblock; 372 struct xfs_btree_block *cntblock; 373 374 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); 375 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); 376 377 XFS_WANT_CORRUPTED_RETURN(mp, 378 bnoblock->bb_numrecs == cntblock->bb_numrecs); 379 } 380 #endif 381 382 /* 383 * Deal with all four cases: the allocated record is contained 384 * within the freespace record, so we can have new freespace 385 * at either (or both) end, or no freespace remaining. 386 */ 387 if (rbno == fbno && rlen == flen) 388 nfbno1 = nfbno2 = NULLAGBLOCK; 389 else if (rbno == fbno) { 390 nfbno1 = rbno + rlen; 391 nflen1 = flen - rlen; 392 nfbno2 = NULLAGBLOCK; 393 } else if (rbno + rlen == fbno + flen) { 394 nfbno1 = fbno; 395 nflen1 = flen - rlen; 396 nfbno2 = NULLAGBLOCK; 397 } else { 398 nfbno1 = fbno; 399 nflen1 = rbno - fbno; 400 nfbno2 = rbno + rlen; 401 nflen2 = (fbno + flen) - nfbno2; 402 } 403 /* 404 * Delete the entry from the by-size btree. 405 */ 406 if ((error = xfs_btree_delete(cnt_cur, &i))) 407 return error; 408 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 409 /* 410 * Add new by-size btree entry(s). 411 */ 412 if (nfbno1 != NULLAGBLOCK) { 413 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) 414 return error; 415 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 416 if ((error = xfs_btree_insert(cnt_cur, &i))) 417 return error; 418 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 419 } 420 if (nfbno2 != NULLAGBLOCK) { 421 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) 422 return error; 423 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 424 if ((error = xfs_btree_insert(cnt_cur, &i))) 425 return error; 426 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 427 } 428 /* 429 * Fix up the by-block btree entry(s). 430 */ 431 if (nfbno1 == NULLAGBLOCK) { 432 /* 433 * No remaining freespace, just delete the by-block tree entry. 434 */ 435 if ((error = xfs_btree_delete(bno_cur, &i))) 436 return error; 437 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 438 } else { 439 /* 440 * Update the by-block entry to start later|be shorter. 441 */ 442 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1))) 443 return error; 444 } 445 if (nfbno2 != NULLAGBLOCK) { 446 /* 447 * 2 resulting free entries, need to add one. 448 */ 449 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) 450 return error; 451 XFS_WANT_CORRUPTED_RETURN(mp, i == 0); 452 if ((error = xfs_btree_insert(bno_cur, &i))) 453 return error; 454 XFS_WANT_CORRUPTED_RETURN(mp, i == 1); 455 } 456 return 0; 457 } 458 459 static bool 460 xfs_agfl_verify( 461 struct xfs_buf *bp) 462 { 463 struct xfs_mount *mp = bp->b_target->bt_mount; 464 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 465 int i; 466 467 if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid)) 468 return false; 469 if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) 470 return false; 471 /* 472 * during growfs operations, the perag is not fully initialised, 473 * so we can't use it for any useful checking. growfs ensures we can't 474 * use it by using uncached buffers that don't have the perag attached 475 * so we can detect and avoid this problem. 476 */ 477 if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) 478 return false; 479 480 for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { 481 if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && 482 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 483 return false; 484 } 485 return true; 486 } 487 488 static void 489 xfs_agfl_read_verify( 490 struct xfs_buf *bp) 491 { 492 struct xfs_mount *mp = bp->b_target->bt_mount; 493 494 /* 495 * There is no verification of non-crc AGFLs because mkfs does not 496 * initialise the AGFL to zero or NULL. Hence the only valid part of the 497 * AGFL is what the AGF says is active. We can't get to the AGF, so we 498 * can't verify just those entries are valid. 499 */ 500 if (!xfs_sb_version_hascrc(&mp->m_sb)) 501 return; 502 503 if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) 504 xfs_buf_ioerror(bp, -EFSBADCRC); 505 else if (!xfs_agfl_verify(bp)) 506 xfs_buf_ioerror(bp, -EFSCORRUPTED); 507 508 if (bp->b_error) 509 xfs_verifier_error(bp); 510 } 511 512 static void 513 xfs_agfl_write_verify( 514 struct xfs_buf *bp) 515 { 516 struct xfs_mount *mp = bp->b_target->bt_mount; 517 struct xfs_buf_log_item *bip = bp->b_fspriv; 518 519 /* no verification of non-crc AGFLs */ 520 if (!xfs_sb_version_hascrc(&mp->m_sb)) 521 return; 522 523 if (!xfs_agfl_verify(bp)) { 524 xfs_buf_ioerror(bp, -EFSCORRUPTED); 525 xfs_verifier_error(bp); 526 return; 527 } 528 529 if (bip) 530 XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); 531 532 xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); 533 } 534 535 const struct xfs_buf_ops xfs_agfl_buf_ops = { 536 .verify_read = xfs_agfl_read_verify, 537 .verify_write = xfs_agfl_write_verify, 538 }; 539 540 /* 541 * Read in the allocation group free block array. 542 */ 543 STATIC int /* error */ 544 xfs_alloc_read_agfl( 545 xfs_mount_t *mp, /* mount point structure */ 546 xfs_trans_t *tp, /* transaction pointer */ 547 xfs_agnumber_t agno, /* allocation group number */ 548 xfs_buf_t **bpp) /* buffer for the ag free block array */ 549 { 550 xfs_buf_t *bp; /* return value */ 551 int error; 552 553 ASSERT(agno != NULLAGNUMBER); 554 error = xfs_trans_read_buf( 555 mp, tp, mp->m_ddev_targp, 556 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), 557 XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); 558 if (error) 559 return error; 560 xfs_buf_set_ref(bp, XFS_AGFL_REF); 561 *bpp = bp; 562 return 0; 563 } 564 565 STATIC int 566 xfs_alloc_update_counters( 567 struct xfs_trans *tp, 568 struct xfs_perag *pag, 569 struct xfs_buf *agbp, 570 long len) 571 { 572 struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); 573 574 pag->pagf_freeblks += len; 575 be32_add_cpu(&agf->agf_freeblks, len); 576 577 xfs_trans_agblocks_delta(tp, len); 578 if (unlikely(be32_to_cpu(agf->agf_freeblks) > 579 be32_to_cpu(agf->agf_length))) 580 return -EFSCORRUPTED; 581 582 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); 583 return 0; 584 } 585 586 /* 587 * Allocation group level functions. 588 */ 589 590 /* 591 * Allocate a variable extent in the allocation group agno. 592 * Type and bno are used to determine where in the allocation group the 593 * extent will start. 594 * Extent's length (returned in *len) will be between minlen and maxlen, 595 * and of the form k * prod + mod unless there's nothing that large. 596 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 597 */ 598 STATIC int /* error */ 599 xfs_alloc_ag_vextent( 600 xfs_alloc_arg_t *args) /* argument structure for allocation */ 601 { 602 int error=0; 603 604 ASSERT(args->minlen > 0); 605 ASSERT(args->maxlen > 0); 606 ASSERT(args->minlen <= args->maxlen); 607 ASSERT(args->mod < args->prod); 608 ASSERT(args->alignment > 0); 609 /* 610 * Branch to correct routine based on the type. 611 */ 612 args->wasfromfl = 0; 613 switch (args->type) { 614 case XFS_ALLOCTYPE_THIS_AG: 615 error = xfs_alloc_ag_vextent_size(args); 616 break; 617 case XFS_ALLOCTYPE_NEAR_BNO: 618 error = xfs_alloc_ag_vextent_near(args); 619 break; 620 case XFS_ALLOCTYPE_THIS_BNO: 621 error = xfs_alloc_ag_vextent_exact(args); 622 break; 623 default: 624 ASSERT(0); 625 /* NOTREACHED */ 626 } 627 628 if (error || args->agbno == NULLAGBLOCK) 629 return error; 630 631 ASSERT(args->len >= args->minlen); 632 ASSERT(args->len <= args->maxlen); 633 ASSERT(!args->wasfromfl || !args->isfl); 634 ASSERT(args->agbno % args->alignment == 0); 635 636 if (!args->wasfromfl) { 637 error = xfs_alloc_update_counters(args->tp, args->pag, 638 args->agbp, 639 -((long)(args->len))); 640 if (error) 641 return error; 642 643 ASSERT(!xfs_extent_busy_search(args->mp, args->agno, 644 args->agbno, args->len)); 645 } 646 647 if (!args->isfl) { 648 xfs_trans_mod_sb(args->tp, args->wasdel ? 649 XFS_TRANS_SB_RES_FDBLOCKS : 650 XFS_TRANS_SB_FDBLOCKS, 651 -((long)(args->len))); 652 } 653 654 XFS_STATS_INC(xs_allocx); 655 XFS_STATS_ADD(xs_allocb, args->len); 656 return error; 657 } 658 659 /* 660 * Allocate a variable extent at exactly agno/bno. 661 * Extent's length (returned in *len) will be between minlen and maxlen, 662 * and of the form k * prod + mod unless there's nothing that large. 663 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it. 664 */ 665 STATIC int /* error */ 666 xfs_alloc_ag_vextent_exact( 667 xfs_alloc_arg_t *args) /* allocation argument structure */ 668 { 669 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 670 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 671 int error; 672 xfs_agblock_t fbno; /* start block of found extent */ 673 xfs_extlen_t flen; /* length of found extent */ 674 xfs_agblock_t tbno; /* start block of trimmed extent */ 675 xfs_extlen_t tlen; /* length of trimmed extent */ 676 xfs_agblock_t tend; /* end block of trimmed extent */ 677 int i; /* success/failure of operation */ 678 679 ASSERT(args->alignment == 1); 680 681 /* 682 * Allocate/initialize a cursor for the by-number freespace btree. 683 */ 684 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 685 args->agno, XFS_BTNUM_BNO); 686 687 /* 688 * Lookup bno and minlen in the btree (minlen is irrelevant, really). 689 * Look for the closest free block <= bno, it must contain bno 690 * if any free block does. 691 */ 692 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); 693 if (error) 694 goto error0; 695 if (!i) 696 goto not_found; 697 698 /* 699 * Grab the freespace record. 700 */ 701 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); 702 if (error) 703 goto error0; 704 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 705 ASSERT(fbno <= args->agbno); 706 707 /* 708 * Check for overlapping busy extents. 709 */ 710 xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); 711 712 /* 713 * Give up if the start of the extent is busy, or the freespace isn't 714 * long enough for the minimum request. 715 */ 716 if (tbno > args->agbno) 717 goto not_found; 718 if (tlen < args->minlen) 719 goto not_found; 720 tend = tbno + tlen; 721 if (tend < args->agbno + args->minlen) 722 goto not_found; 723 724 /* 725 * End of extent will be smaller of the freespace end and the 726 * maximal requested end. 727 * 728 * Fix the length according to mod and prod if given. 729 */ 730 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) 731 - args->agbno; 732 xfs_alloc_fix_len(args); 733 if (!xfs_alloc_fix_minleft(args)) 734 goto not_found; 735 736 ASSERT(args->agbno + args->len <= tend); 737 738 /* 739 * We are allocating agbno for args->len 740 * Allocate/initialize a cursor for the by-size btree. 741 */ 742 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 743 args->agno, XFS_BTNUM_CNT); 744 ASSERT(args->agbno + args->len <= 745 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 746 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, 747 args->len, XFSA_FIXUP_BNO_OK); 748 if (error) { 749 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 750 goto error0; 751 } 752 753 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 754 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 755 756 args->wasfromfl = 0; 757 trace_xfs_alloc_exact_done(args); 758 return 0; 759 760 not_found: 761 /* Didn't find it, return null. */ 762 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 763 args->agbno = NULLAGBLOCK; 764 trace_xfs_alloc_exact_notfound(args); 765 return 0; 766 767 error0: 768 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 769 trace_xfs_alloc_exact_error(args); 770 return error; 771 } 772 773 /* 774 * Search the btree in a given direction via the search cursor and compare 775 * the records found against the good extent we've already found. 776 */ 777 STATIC int 778 xfs_alloc_find_best_extent( 779 struct xfs_alloc_arg *args, /* allocation argument structure */ 780 struct xfs_btree_cur **gcur, /* good cursor */ 781 struct xfs_btree_cur **scur, /* searching cursor */ 782 xfs_agblock_t gdiff, /* difference for search comparison */ 783 xfs_agblock_t *sbno, /* extent found by search */ 784 xfs_extlen_t *slen, /* extent length */ 785 xfs_agblock_t *sbnoa, /* aligned extent found by search */ 786 xfs_extlen_t *slena, /* aligned extent length */ 787 int dir) /* 0 = search right, 1 = search left */ 788 { 789 xfs_agblock_t new; 790 xfs_agblock_t sdiff; 791 int error; 792 int i; 793 794 /* The good extent is perfect, no need to search. */ 795 if (!gdiff) 796 goto out_use_good; 797 798 /* 799 * Look until we find a better one, run out of space or run off the end. 800 */ 801 do { 802 error = xfs_alloc_get_rec(*scur, sbno, slen, &i); 803 if (error) 804 goto error0; 805 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 806 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); 807 808 /* 809 * The good extent is closer than this one. 810 */ 811 if (!dir) { 812 if (*sbnoa > args->max_agbno) 813 goto out_use_good; 814 if (*sbnoa >= args->agbno + gdiff) 815 goto out_use_good; 816 } else { 817 if (*sbnoa < args->min_agbno) 818 goto out_use_good; 819 if (*sbnoa <= args->agbno - gdiff) 820 goto out_use_good; 821 } 822 823 /* 824 * Same distance, compare length and pick the best. 825 */ 826 if (*slena >= args->minlen) { 827 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); 828 xfs_alloc_fix_len(args); 829 830 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 831 args->alignment, 832 args->userdata, *sbnoa, 833 *slena, &new); 834 835 /* 836 * Choose closer size and invalidate other cursor. 837 */ 838 if (sdiff < gdiff) 839 goto out_use_search; 840 goto out_use_good; 841 } 842 843 if (!dir) 844 error = xfs_btree_increment(*scur, 0, &i); 845 else 846 error = xfs_btree_decrement(*scur, 0, &i); 847 if (error) 848 goto error0; 849 } while (i); 850 851 out_use_good: 852 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); 853 *scur = NULL; 854 return 0; 855 856 out_use_search: 857 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); 858 *gcur = NULL; 859 return 0; 860 861 error0: 862 /* caller invalidates cursors */ 863 return error; 864 } 865 866 /* 867 * Allocate a variable extent near bno in the allocation group agno. 868 * Extent's length (returned in len) will be between minlen and maxlen, 869 * and of the form k * prod + mod unless there's nothing that large. 870 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 871 */ 872 STATIC int /* error */ 873 xfs_alloc_ag_vextent_near( 874 xfs_alloc_arg_t *args) /* allocation argument structure */ 875 { 876 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */ 877 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */ 878 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */ 879 xfs_agblock_t gtbno; /* start bno of right side entry */ 880 xfs_agblock_t gtbnoa; /* aligned ... */ 881 xfs_extlen_t gtdiff; /* difference to right side entry */ 882 xfs_extlen_t gtlen; /* length of right side entry */ 883 xfs_extlen_t gtlena; /* aligned ... */ 884 xfs_agblock_t gtnew; /* useful start bno of right side */ 885 int error; /* error code */ 886 int i; /* result code, temporary */ 887 int j; /* result code, temporary */ 888 xfs_agblock_t ltbno; /* start bno of left side entry */ 889 xfs_agblock_t ltbnoa; /* aligned ... */ 890 xfs_extlen_t ltdiff; /* difference to left side entry */ 891 xfs_extlen_t ltlen; /* length of left side entry */ 892 xfs_extlen_t ltlena; /* aligned ... */ 893 xfs_agblock_t ltnew; /* useful start bno of left side */ 894 xfs_extlen_t rlen; /* length of returned extent */ 895 int forced = 0; 896 #ifdef DEBUG 897 /* 898 * Randomly don't execute the first algorithm. 899 */ 900 int dofirst; /* set to do first algorithm */ 901 902 dofirst = prandom_u32() & 1; 903 #endif 904 905 /* handle unitialized agbno range so caller doesn't have to */ 906 if (!args->min_agbno && !args->max_agbno) 907 args->max_agbno = args->mp->m_sb.sb_agblocks - 1; 908 ASSERT(args->min_agbno <= args->max_agbno); 909 910 /* clamp agbno to the range if it's outside */ 911 if (args->agbno < args->min_agbno) 912 args->agbno = args->min_agbno; 913 if (args->agbno > args->max_agbno) 914 args->agbno = args->max_agbno; 915 916 restart: 917 bno_cur_lt = NULL; 918 bno_cur_gt = NULL; 919 ltlen = 0; 920 gtlena = 0; 921 ltlena = 0; 922 923 /* 924 * Get a cursor for the by-size btree. 925 */ 926 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 927 args->agno, XFS_BTNUM_CNT); 928 929 /* 930 * See if there are any free extents as big as maxlen. 931 */ 932 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i))) 933 goto error0; 934 /* 935 * If none, then pick up the last entry in the tree unless the 936 * tree is empty. 937 */ 938 if (!i) { 939 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, <bno, 940 <len, &i))) 941 goto error0; 942 if (i == 0 || ltlen == 0) { 943 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 944 trace_xfs_alloc_near_noentry(args); 945 return 0; 946 } 947 ASSERT(i == 1); 948 } 949 args->wasfromfl = 0; 950 951 /* 952 * First algorithm. 953 * If the requested extent is large wrt the freespaces available 954 * in this a.g., then the cursor will be pointing to a btree entry 955 * near the right edge of the tree. If it's in the last btree leaf 956 * block, then we just examine all the entries in that block 957 * that are big enough, and pick the best one. 958 * This is written as a while loop so we can break out of it, 959 * but we never loop back to the top. 960 */ 961 while (xfs_btree_islastblock(cnt_cur, 0)) { 962 xfs_extlen_t bdiff; 963 int besti=0; 964 xfs_extlen_t blen=0; 965 xfs_agblock_t bnew=0; 966 967 #ifdef DEBUG 968 if (dofirst) 969 break; 970 #endif 971 /* 972 * Start from the entry that lookup found, sequence through 973 * all larger free blocks. If we're actually pointing at a 974 * record smaller than maxlen, go to the start of this block, 975 * and skip all those smaller than minlen. 976 */ 977 if (ltlen || args->alignment > 1) { 978 cnt_cur->bc_ptrs[0] = 1; 979 do { 980 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, 981 <len, &i))) 982 goto error0; 983 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 984 if (ltlen >= args->minlen) 985 break; 986 if ((error = xfs_btree_increment(cnt_cur, 0, &i))) 987 goto error0; 988 } while (i); 989 ASSERT(ltlen >= args->minlen); 990 if (!i) 991 break; 992 } 993 i = cnt_cur->bc_ptrs[0]; 994 for (j = 1, blen = 0, bdiff = 0; 995 !error && j && (blen < args->maxlen || bdiff > 0); 996 error = xfs_btree_increment(cnt_cur, 0, &j)) { 997 /* 998 * For each entry, decide if it's better than 999 * the previous best entry. 1000 */ 1001 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1002 goto error0; 1003 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1004 xfs_alloc_compute_aligned(args, ltbno, ltlen, 1005 <bnoa, <lena); 1006 if (ltlena < args->minlen) 1007 continue; 1008 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) 1009 continue; 1010 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1011 xfs_alloc_fix_len(args); 1012 ASSERT(args->len >= args->minlen); 1013 if (args->len < blen) 1014 continue; 1015 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1016 args->alignment, args->userdata, ltbnoa, 1017 ltlena, <new); 1018 if (ltnew != NULLAGBLOCK && 1019 (args->len > blen || ltdiff < bdiff)) { 1020 bdiff = ltdiff; 1021 bnew = ltnew; 1022 blen = args->len; 1023 besti = cnt_cur->bc_ptrs[0]; 1024 } 1025 } 1026 /* 1027 * It didn't work. We COULD be in a case where 1028 * there's a good record somewhere, so try again. 1029 */ 1030 if (blen == 0) 1031 break; 1032 /* 1033 * Point at the best entry, and retrieve it again. 1034 */ 1035 cnt_cur->bc_ptrs[0] = besti; 1036 if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) 1037 goto error0; 1038 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1039 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1040 args->len = blen; 1041 if (!xfs_alloc_fix_minleft(args)) { 1042 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1043 trace_xfs_alloc_near_nominleft(args); 1044 return 0; 1045 } 1046 blen = args->len; 1047 /* 1048 * We are allocating starting at bnew for blen blocks. 1049 */ 1050 args->agbno = bnew; 1051 ASSERT(bnew >= ltbno); 1052 ASSERT(bnew + blen <= ltbno + ltlen); 1053 /* 1054 * Set up a cursor for the by-bno tree. 1055 */ 1056 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, 1057 args->agbp, args->agno, XFS_BTNUM_BNO); 1058 /* 1059 * Fix up the btree entries. 1060 */ 1061 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, 1062 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) 1063 goto error0; 1064 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1065 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1066 1067 trace_xfs_alloc_near_first(args); 1068 return 0; 1069 } 1070 /* 1071 * Second algorithm. 1072 * Search in the by-bno tree to the left and to the right 1073 * simultaneously, until in each case we find a space big enough, 1074 * or run into the edge of the tree. When we run into the edge, 1075 * we deallocate that cursor. 1076 * If both searches succeed, we compare the two spaces and pick 1077 * the better one. 1078 * With alignment, it's possible for both to fail; the upper 1079 * level algorithm that picks allocation groups for allocations 1080 * is not supposed to do this. 1081 */ 1082 /* 1083 * Allocate and initialize the cursor for the leftward search. 1084 */ 1085 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1086 args->agno, XFS_BTNUM_BNO); 1087 /* 1088 * Lookup <= bno to find the leftward search's starting point. 1089 */ 1090 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i))) 1091 goto error0; 1092 if (!i) { 1093 /* 1094 * Didn't find anything; use this cursor for the rightward 1095 * search. 1096 */ 1097 bno_cur_gt = bno_cur_lt; 1098 bno_cur_lt = NULL; 1099 } 1100 /* 1101 * Found something. Duplicate the cursor for the rightward search. 1102 */ 1103 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt))) 1104 goto error0; 1105 /* 1106 * Increment the cursor, so we will point at the entry just right 1107 * of the leftward entry if any, or to the leftmost entry. 1108 */ 1109 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1110 goto error0; 1111 if (!i) { 1112 /* 1113 * It failed, there are no rightward entries. 1114 */ 1115 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR); 1116 bno_cur_gt = NULL; 1117 } 1118 /* 1119 * Loop going left with the leftward cursor, right with the 1120 * rightward cursor, until either both directions give up or 1121 * we find an entry at least as big as minlen. 1122 */ 1123 do { 1124 if (bno_cur_lt) { 1125 if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) 1126 goto error0; 1127 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1128 xfs_alloc_compute_aligned(args, ltbno, ltlen, 1129 <bnoa, <lena); 1130 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) 1131 break; 1132 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1133 goto error0; 1134 if (!i || ltbnoa < args->min_agbno) { 1135 xfs_btree_del_cursor(bno_cur_lt, 1136 XFS_BTREE_NOERROR); 1137 bno_cur_lt = NULL; 1138 } 1139 } 1140 if (bno_cur_gt) { 1141 if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) 1142 goto error0; 1143 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1144 xfs_alloc_compute_aligned(args, gtbno, gtlen, 1145 >bnoa, >lena); 1146 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) 1147 break; 1148 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1149 goto error0; 1150 if (!i || gtbnoa > args->max_agbno) { 1151 xfs_btree_del_cursor(bno_cur_gt, 1152 XFS_BTREE_NOERROR); 1153 bno_cur_gt = NULL; 1154 } 1155 } 1156 } while (bno_cur_lt || bno_cur_gt); 1157 1158 /* 1159 * Got both cursors still active, need to find better entry. 1160 */ 1161 if (bno_cur_lt && bno_cur_gt) { 1162 if (ltlena >= args->minlen) { 1163 /* 1164 * Left side is good, look for a right side entry. 1165 */ 1166 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1167 xfs_alloc_fix_len(args); 1168 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1169 args->alignment, args->userdata, ltbnoa, 1170 ltlena, <new); 1171 1172 error = xfs_alloc_find_best_extent(args, 1173 &bno_cur_lt, &bno_cur_gt, 1174 ltdiff, >bno, >len, 1175 >bnoa, >lena, 1176 0 /* search right */); 1177 } else { 1178 ASSERT(gtlena >= args->minlen); 1179 1180 /* 1181 * Right side is good, look for a left side entry. 1182 */ 1183 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1184 xfs_alloc_fix_len(args); 1185 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1186 args->alignment, args->userdata, gtbnoa, 1187 gtlena, >new); 1188 1189 error = xfs_alloc_find_best_extent(args, 1190 &bno_cur_gt, &bno_cur_lt, 1191 gtdiff, <bno, <len, 1192 <bnoa, <lena, 1193 1 /* search left */); 1194 } 1195 1196 if (error) 1197 goto error0; 1198 } 1199 1200 /* 1201 * If we couldn't get anything, give up. 1202 */ 1203 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1204 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1205 1206 if (!forced++) { 1207 trace_xfs_alloc_near_busy(args); 1208 xfs_log_force(args->mp, XFS_LOG_SYNC); 1209 goto restart; 1210 } 1211 trace_xfs_alloc_size_neither(args); 1212 args->agbno = NULLAGBLOCK; 1213 return 0; 1214 } 1215 1216 /* 1217 * At this point we have selected a freespace entry, either to the 1218 * left or to the right. If it's on the right, copy all the 1219 * useful variables to the "left" set so we only have one 1220 * copy of this code. 1221 */ 1222 if (bno_cur_gt) { 1223 bno_cur_lt = bno_cur_gt; 1224 bno_cur_gt = NULL; 1225 ltbno = gtbno; 1226 ltbnoa = gtbnoa; 1227 ltlen = gtlen; 1228 ltlena = gtlena; 1229 j = 1; 1230 } else 1231 j = 0; 1232 1233 /* 1234 * Fix up the length and compute the useful address. 1235 */ 1236 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1237 xfs_alloc_fix_len(args); 1238 if (!xfs_alloc_fix_minleft(args)) { 1239 trace_xfs_alloc_near_nominleft(args); 1240 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1241 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1242 return 0; 1243 } 1244 rlen = args->len; 1245 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, 1246 args->userdata, ltbnoa, ltlena, <new); 1247 ASSERT(ltnew >= ltbno); 1248 ASSERT(ltnew + rlen <= ltbnoa + ltlena); 1249 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1250 ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno); 1251 args->agbno = ltnew; 1252 1253 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1254 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1255 goto error0; 1256 1257 if (j) 1258 trace_xfs_alloc_near_greater(args); 1259 else 1260 trace_xfs_alloc_near_lesser(args); 1261 1262 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1263 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); 1264 return 0; 1265 1266 error0: 1267 trace_xfs_alloc_near_error(args); 1268 if (cnt_cur != NULL) 1269 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1270 if (bno_cur_lt != NULL) 1271 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR); 1272 if (bno_cur_gt != NULL) 1273 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR); 1274 return error; 1275 } 1276 1277 /* 1278 * Allocate a variable extent anywhere in the allocation group agno. 1279 * Extent's length (returned in len) will be between minlen and maxlen, 1280 * and of the form k * prod + mod unless there's nothing that large. 1281 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. 1282 */ 1283 STATIC int /* error */ 1284 xfs_alloc_ag_vextent_size( 1285 xfs_alloc_arg_t *args) /* allocation argument structure */ 1286 { 1287 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */ 1288 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */ 1289 int error; /* error result */ 1290 xfs_agblock_t fbno; /* start of found freespace */ 1291 xfs_extlen_t flen; /* length of found freespace */ 1292 int i; /* temp status variable */ 1293 xfs_agblock_t rbno; /* returned block number */ 1294 xfs_extlen_t rlen; /* length of returned extent */ 1295 int forced = 0; 1296 1297 restart: 1298 /* 1299 * Allocate and initialize a cursor for the by-size btree. 1300 */ 1301 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1302 args->agno, XFS_BTNUM_CNT); 1303 bno_cur = NULL; 1304 1305 /* 1306 * Look for an entry >= maxlen+alignment-1 blocks. 1307 */ 1308 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1309 args->maxlen + args->alignment - 1, &i))) 1310 goto error0; 1311 1312 /* 1313 * If none or we have busy extents that we cannot allocate from, then 1314 * we have to settle for a smaller extent. In the case that there are 1315 * no large extents, this will return the last entry in the tree unless 1316 * the tree is empty. In the case that there are only busy large 1317 * extents, this will return the largest small extent unless there 1318 * are no smaller extents available. 1319 */ 1320 if (!i || forced > 1) { 1321 error = xfs_alloc_ag_vextent_small(args, cnt_cur, 1322 &fbno, &flen, &i); 1323 if (error) 1324 goto error0; 1325 if (i == 0 || flen == 0) { 1326 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1327 trace_xfs_alloc_size_noentry(args); 1328 return 0; 1329 } 1330 ASSERT(i == 1); 1331 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); 1332 } else { 1333 /* 1334 * Search for a non-busy extent that is large enough. 1335 * If we are at low space, don't check, or if we fall of 1336 * the end of the btree, turn off the busy check and 1337 * restart. 1338 */ 1339 for (;;) { 1340 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1341 if (error) 1342 goto error0; 1343 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1344 1345 xfs_alloc_compute_aligned(args, fbno, flen, 1346 &rbno, &rlen); 1347 1348 if (rlen >= args->maxlen) 1349 break; 1350 1351 error = xfs_btree_increment(cnt_cur, 0, &i); 1352 if (error) 1353 goto error0; 1354 if (i == 0) { 1355 /* 1356 * Our only valid extents must have been busy. 1357 * Make it unbusy by forcing the log out and 1358 * retrying. If we've been here before, forcing 1359 * the log isn't making the extents available, 1360 * which means they have probably been freed in 1361 * this transaction. In that case, we have to 1362 * give up on them and we'll attempt a minlen 1363 * allocation the next time around. 1364 */ 1365 xfs_btree_del_cursor(cnt_cur, 1366 XFS_BTREE_NOERROR); 1367 trace_xfs_alloc_size_busy(args); 1368 if (!forced++) 1369 xfs_log_force(args->mp, XFS_LOG_SYNC); 1370 goto restart; 1371 } 1372 } 1373 } 1374 1375 /* 1376 * In the first case above, we got the last entry in the 1377 * by-size btree. Now we check to see if the space hits maxlen 1378 * once aligned; if not, we search left for something better. 1379 * This can't happen in the second case above. 1380 */ 1381 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1382 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1383 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1384 if (rlen < args->maxlen) { 1385 xfs_agblock_t bestfbno; 1386 xfs_extlen_t bestflen; 1387 xfs_agblock_t bestrbno; 1388 xfs_extlen_t bestrlen; 1389 1390 bestrlen = rlen; 1391 bestrbno = rbno; 1392 bestflen = flen; 1393 bestfbno = fbno; 1394 for (;;) { 1395 if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) 1396 goto error0; 1397 if (i == 0) 1398 break; 1399 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, 1400 &i))) 1401 goto error0; 1402 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1403 if (flen < bestrlen) 1404 break; 1405 xfs_alloc_compute_aligned(args, fbno, flen, 1406 &rbno, &rlen); 1407 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1408 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1409 (rlen <= flen && rbno + rlen <= fbno + flen), 1410 error0); 1411 if (rlen > bestrlen) { 1412 bestrlen = rlen; 1413 bestrbno = rbno; 1414 bestflen = flen; 1415 bestfbno = fbno; 1416 if (rlen == args->maxlen) 1417 break; 1418 } 1419 } 1420 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen, 1421 &i))) 1422 goto error0; 1423 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1424 rlen = bestrlen; 1425 rbno = bestrbno; 1426 flen = bestflen; 1427 fbno = bestfbno; 1428 } 1429 args->wasfromfl = 0; 1430 /* 1431 * Fix up the length. 1432 */ 1433 args->len = rlen; 1434 if (rlen < args->minlen) { 1435 if (!forced++) { 1436 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1437 trace_xfs_alloc_size_busy(args); 1438 xfs_log_force(args->mp, XFS_LOG_SYNC); 1439 goto restart; 1440 } 1441 goto out_nominleft; 1442 } 1443 xfs_alloc_fix_len(args); 1444 1445 if (!xfs_alloc_fix_minleft(args)) 1446 goto out_nominleft; 1447 rlen = args->len; 1448 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0); 1449 /* 1450 * Allocate and initialize a cursor for the by-block tree. 1451 */ 1452 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1453 args->agno, XFS_BTNUM_BNO); 1454 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, 1455 rbno, rlen, XFSA_FIXUP_CNT_OK))) 1456 goto error0; 1457 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1458 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1459 cnt_cur = bno_cur = NULL; 1460 args->len = rlen; 1461 args->agbno = rbno; 1462 XFS_WANT_CORRUPTED_GOTO(args->mp, 1463 args->agbno + args->len <= 1464 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1465 error0); 1466 trace_xfs_alloc_size_done(args); 1467 return 0; 1468 1469 error0: 1470 trace_xfs_alloc_size_error(args); 1471 if (cnt_cur) 1472 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1473 if (bno_cur) 1474 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1475 return error; 1476 1477 out_nominleft: 1478 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1479 trace_xfs_alloc_size_nominleft(args); 1480 args->agbno = NULLAGBLOCK; 1481 return 0; 1482 } 1483 1484 /* 1485 * Deal with the case where only small freespaces remain. 1486 * Either return the contents of the last freespace record, 1487 * or allocate space from the freelist if there is nothing in the tree. 1488 */ 1489 STATIC int /* error */ 1490 xfs_alloc_ag_vextent_small( 1491 xfs_alloc_arg_t *args, /* allocation argument structure */ 1492 xfs_btree_cur_t *ccur, /* by-size cursor */ 1493 xfs_agblock_t *fbnop, /* result block number */ 1494 xfs_extlen_t *flenp, /* result length */ 1495 int *stat) /* status: 0-freelist, 1-normal/none */ 1496 { 1497 int error; 1498 xfs_agblock_t fbno; 1499 xfs_extlen_t flen; 1500 int i; 1501 1502 if ((error = xfs_btree_decrement(ccur, 0, &i))) 1503 goto error0; 1504 if (i) { 1505 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) 1506 goto error0; 1507 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1508 } 1509 /* 1510 * Nothing in the btree, try the freelist. Make sure 1511 * to respect minleft even when pulling from the 1512 * freelist. 1513 */ 1514 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && 1515 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) 1516 > args->minleft)) { 1517 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); 1518 if (error) 1519 goto error0; 1520 if (fbno != NULLAGBLOCK) { 1521 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, 1522 args->userdata); 1523 1524 if (args->userdata) { 1525 xfs_buf_t *bp; 1526 1527 bp = xfs_btree_get_bufs(args->mp, args->tp, 1528 args->agno, fbno, 0); 1529 xfs_trans_binval(args->tp, bp); 1530 } 1531 args->len = 1; 1532 args->agbno = fbno; 1533 XFS_WANT_CORRUPTED_GOTO(args->mp, 1534 args->agbno + args->len <= 1535 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), 1536 error0); 1537 args->wasfromfl = 1; 1538 trace_xfs_alloc_small_freelist(args); 1539 *stat = 0; 1540 return 0; 1541 } 1542 /* 1543 * Nothing in the freelist. 1544 */ 1545 else 1546 flen = 0; 1547 } 1548 /* 1549 * Can't allocate from the freelist for some reason. 1550 */ 1551 else { 1552 fbno = NULLAGBLOCK; 1553 flen = 0; 1554 } 1555 /* 1556 * Can't do the allocation, give up. 1557 */ 1558 if (flen < args->minlen) { 1559 args->agbno = NULLAGBLOCK; 1560 trace_xfs_alloc_small_notenough(args); 1561 flen = 0; 1562 } 1563 *fbnop = fbno; 1564 *flenp = flen; 1565 *stat = 1; 1566 trace_xfs_alloc_small_done(args); 1567 return 0; 1568 1569 error0: 1570 trace_xfs_alloc_small_error(args); 1571 return error; 1572 } 1573 1574 /* 1575 * Free the extent starting at agno/bno for length. 1576 */ 1577 STATIC int /* error */ 1578 xfs_free_ag_extent( 1579 xfs_trans_t *tp, /* transaction pointer */ 1580 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 1581 xfs_agnumber_t agno, /* allocation group number */ 1582 xfs_agblock_t bno, /* starting block number */ 1583 xfs_extlen_t len, /* length of extent */ 1584 int isfl) /* set if is freelist blocks - no sb acctg */ 1585 { 1586 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ 1587 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ 1588 int error; /* error return value */ 1589 xfs_agblock_t gtbno; /* start of right neighbor block */ 1590 xfs_extlen_t gtlen; /* length of right neighbor block */ 1591 int haveleft; /* have a left neighbor block */ 1592 int haveright; /* have a right neighbor block */ 1593 int i; /* temp, result code */ 1594 xfs_agblock_t ltbno; /* start of left neighbor block */ 1595 xfs_extlen_t ltlen; /* length of left neighbor block */ 1596 xfs_mount_t *mp; /* mount point struct for filesystem */ 1597 xfs_agblock_t nbno; /* new starting block of freespace */ 1598 xfs_extlen_t nlen; /* new length of freespace */ 1599 xfs_perag_t *pag; /* per allocation group data */ 1600 1601 mp = tp->t_mountp; 1602 /* 1603 * Allocate and initialize a cursor for the by-block btree. 1604 */ 1605 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); 1606 cnt_cur = NULL; 1607 /* 1608 * Look for a neighboring block on the left (lower block numbers) 1609 * that is contiguous with this space. 1610 */ 1611 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft))) 1612 goto error0; 1613 if (haveleft) { 1614 /* 1615 * There is a block to our left. 1616 */ 1617 if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) 1618 goto error0; 1619 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1620 /* 1621 * It's not contiguous, though. 1622 */ 1623 if (ltbno + ltlen < bno) 1624 haveleft = 0; 1625 else { 1626 /* 1627 * If this failure happens the request to free this 1628 * space was invalid, it's (partly) already free. 1629 * Very bad. 1630 */ 1631 XFS_WANT_CORRUPTED_GOTO(mp, 1632 ltbno + ltlen <= bno, error0); 1633 } 1634 } 1635 /* 1636 * Look for a neighboring block on the right (higher block numbers) 1637 * that is contiguous with this space. 1638 */ 1639 if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) 1640 goto error0; 1641 if (haveright) { 1642 /* 1643 * There is a block to our right. 1644 */ 1645 if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) 1646 goto error0; 1647 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1648 /* 1649 * It's not contiguous, though. 1650 */ 1651 if (bno + len < gtbno) 1652 haveright = 0; 1653 else { 1654 /* 1655 * If this failure happens the request to free this 1656 * space was invalid, it's (partly) already free. 1657 * Very bad. 1658 */ 1659 XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0); 1660 } 1661 } 1662 /* 1663 * Now allocate and initialize a cursor for the by-size tree. 1664 */ 1665 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); 1666 /* 1667 * Have both left and right contiguous neighbors. 1668 * Merge all three into a single free block. 1669 */ 1670 if (haveleft && haveright) { 1671 /* 1672 * Delete the old by-size entry on the left. 1673 */ 1674 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1675 goto error0; 1676 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1677 if ((error = xfs_btree_delete(cnt_cur, &i))) 1678 goto error0; 1679 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1680 /* 1681 * Delete the old by-size entry on the right. 1682 */ 1683 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1684 goto error0; 1685 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1686 if ((error = xfs_btree_delete(cnt_cur, &i))) 1687 goto error0; 1688 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1689 /* 1690 * Delete the old by-block entry for the right block. 1691 */ 1692 if ((error = xfs_btree_delete(bno_cur, &i))) 1693 goto error0; 1694 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1695 /* 1696 * Move the by-block cursor back to the left neighbor. 1697 */ 1698 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1699 goto error0; 1700 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1701 #ifdef DEBUG 1702 /* 1703 * Check that this is the right record: delete didn't 1704 * mangle the cursor. 1705 */ 1706 { 1707 xfs_agblock_t xxbno; 1708 xfs_extlen_t xxlen; 1709 1710 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen, 1711 &i))) 1712 goto error0; 1713 XFS_WANT_CORRUPTED_GOTO(mp, 1714 i == 1 && xxbno == ltbno && xxlen == ltlen, 1715 error0); 1716 } 1717 #endif 1718 /* 1719 * Update remaining by-block entry to the new, joined block. 1720 */ 1721 nbno = ltbno; 1722 nlen = len + ltlen + gtlen; 1723 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1724 goto error0; 1725 } 1726 /* 1727 * Have only a left contiguous neighbor. 1728 * Merge it together with the new freespace. 1729 */ 1730 else if (haveleft) { 1731 /* 1732 * Delete the old by-size entry on the left. 1733 */ 1734 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) 1735 goto error0; 1736 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1737 if ((error = xfs_btree_delete(cnt_cur, &i))) 1738 goto error0; 1739 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1740 /* 1741 * Back up the by-block cursor to the left neighbor, and 1742 * update its length. 1743 */ 1744 if ((error = xfs_btree_decrement(bno_cur, 0, &i))) 1745 goto error0; 1746 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1747 nbno = ltbno; 1748 nlen = len + ltlen; 1749 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1750 goto error0; 1751 } 1752 /* 1753 * Have only a right contiguous neighbor. 1754 * Merge it together with the new freespace. 1755 */ 1756 else if (haveright) { 1757 /* 1758 * Delete the old by-size entry on the right. 1759 */ 1760 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) 1761 goto error0; 1762 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1763 if ((error = xfs_btree_delete(cnt_cur, &i))) 1764 goto error0; 1765 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1766 /* 1767 * Update the starting block and length of the right 1768 * neighbor in the by-block tree. 1769 */ 1770 nbno = bno; 1771 nlen = len + gtlen; 1772 if ((error = xfs_alloc_update(bno_cur, nbno, nlen))) 1773 goto error0; 1774 } 1775 /* 1776 * No contiguous neighbors. 1777 * Insert the new freespace into the by-block tree. 1778 */ 1779 else { 1780 nbno = bno; 1781 nlen = len; 1782 if ((error = xfs_btree_insert(bno_cur, &i))) 1783 goto error0; 1784 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1785 } 1786 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); 1787 bno_cur = NULL; 1788 /* 1789 * In all cases we need to insert the new freespace in the by-size tree. 1790 */ 1791 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) 1792 goto error0; 1793 XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0); 1794 if ((error = xfs_btree_insert(cnt_cur, &i))) 1795 goto error0; 1796 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0); 1797 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1798 cnt_cur = NULL; 1799 1800 /* 1801 * Update the freespace totals in the ag and superblock. 1802 */ 1803 pag = xfs_perag_get(mp, agno); 1804 error = xfs_alloc_update_counters(tp, pag, agbp, len); 1805 xfs_perag_put(pag); 1806 if (error) 1807 goto error0; 1808 1809 if (!isfl) 1810 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1811 XFS_STATS_INC(xs_freex); 1812 XFS_STATS_ADD(xs_freeb, len); 1813 1814 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1815 1816 return 0; 1817 1818 error0: 1819 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); 1820 if (bno_cur) 1821 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1822 if (cnt_cur) 1823 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); 1824 return error; 1825 } 1826 1827 /* 1828 * Visible (exported) allocation/free functions. 1829 * Some of these are used just by xfs_alloc_btree.c and this file. 1830 */ 1831 1832 /* 1833 * Compute and fill in value of m_ag_maxlevels. 1834 */ 1835 void 1836 xfs_alloc_compute_maxlevels( 1837 xfs_mount_t *mp) /* file system mount structure */ 1838 { 1839 int level; 1840 uint maxblocks; 1841 uint maxleafents; 1842 int minleafrecs; 1843 int minnoderecs; 1844 1845 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2; 1846 minleafrecs = mp->m_alloc_mnr[0]; 1847 minnoderecs = mp->m_alloc_mnr[1]; 1848 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; 1849 for (level = 1; maxblocks > 1; level++) 1850 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs; 1851 mp->m_ag_maxlevels = level; 1852 } 1853 1854 /* 1855 * Find the length of the longest extent in an AG. 1856 */ 1857 xfs_extlen_t 1858 xfs_alloc_longest_free_extent( 1859 struct xfs_mount *mp, 1860 struct xfs_perag *pag, 1861 xfs_extlen_t need) 1862 { 1863 xfs_extlen_t delta = 0; 1864 1865 if (need > pag->pagf_flcount) 1866 delta = need - pag->pagf_flcount; 1867 1868 if (pag->pagf_longest > delta) 1869 return pag->pagf_longest - delta; 1870 return pag->pagf_flcount > 0 || pag->pagf_longest > 0; 1871 } 1872 1873 unsigned int 1874 xfs_alloc_min_freelist( 1875 struct xfs_mount *mp, 1876 struct xfs_perag *pag) 1877 { 1878 unsigned int min_free; 1879 1880 /* space needed by-bno freespace btree */ 1881 min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, 1882 mp->m_ag_maxlevels); 1883 /* space needed by-size freespace btree */ 1884 min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, 1885 mp->m_ag_maxlevels); 1886 1887 return min_free; 1888 } 1889 1890 /* 1891 * Check if the operation we are fixing up the freelist for should go ahead or 1892 * not. If we are freeing blocks, we always allow it, otherwise the allocation 1893 * is dependent on whether the size and shape of free space available will 1894 * permit the requested allocation to take place. 1895 */ 1896 static bool 1897 xfs_alloc_space_available( 1898 struct xfs_alloc_arg *args, 1899 xfs_extlen_t min_free, 1900 int flags) 1901 { 1902 struct xfs_perag *pag = args->pag; 1903 xfs_extlen_t longest; 1904 int available; 1905 1906 if (flags & XFS_ALLOC_FLAG_FREEING) 1907 return true; 1908 1909 /* do we have enough contiguous free space for the allocation? */ 1910 longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free); 1911 if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) 1912 return false; 1913 1914 /* do have enough free space remaining for the allocation? */ 1915 available = (int)(pag->pagf_freeblks + pag->pagf_flcount - 1916 min_free - args->total); 1917 if (available < (int)args->minleft) 1918 return false; 1919 1920 return true; 1921 } 1922 1923 /* 1924 * Decide whether to use this allocation group for this allocation. 1925 * If so, fix up the btree freelist's size. 1926 */ 1927 STATIC int /* error */ 1928 xfs_alloc_fix_freelist( 1929 struct xfs_alloc_arg *args, /* allocation argument structure */ 1930 int flags) /* XFS_ALLOC_FLAG_... */ 1931 { 1932 struct xfs_mount *mp = args->mp; 1933 struct xfs_perag *pag = args->pag; 1934 struct xfs_trans *tp = args->tp; 1935 struct xfs_buf *agbp = NULL; 1936 struct xfs_buf *agflbp = NULL; 1937 struct xfs_alloc_arg targs; /* local allocation arguments */ 1938 xfs_agblock_t bno; /* freelist block */ 1939 xfs_extlen_t need; /* total blocks needed in freelist */ 1940 int error = 0; 1941 1942 if (!pag->pagf_init) { 1943 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 1944 if (error) 1945 goto out_no_agbp; 1946 if (!pag->pagf_init) { 1947 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1948 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1949 goto out_agbp_relse; 1950 } 1951 } 1952 1953 /* 1954 * If this is a metadata preferred pag and we are user data then try 1955 * somewhere else if we are not being asked to try harder at this 1956 * point 1957 */ 1958 if (pag->pagf_metadata && args->userdata && 1959 (flags & XFS_ALLOC_FLAG_TRYLOCK)) { 1960 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1961 goto out_agbp_relse; 1962 } 1963 1964 need = xfs_alloc_min_freelist(mp, pag); 1965 if (!xfs_alloc_space_available(args, need, flags)) 1966 goto out_agbp_relse; 1967 1968 /* 1969 * Get the a.g. freespace buffer. 1970 * Can fail if we're not blocking on locks, and it's held. 1971 */ 1972 if (!agbp) { 1973 error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); 1974 if (error) 1975 goto out_no_agbp; 1976 if (!agbp) { 1977 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK); 1978 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); 1979 goto out_no_agbp; 1980 } 1981 } 1982 1983 /* If there isn't enough total space or single-extent, reject it. */ 1984 need = xfs_alloc_min_freelist(mp, pag); 1985 if (!xfs_alloc_space_available(args, need, flags)) 1986 goto out_agbp_relse; 1987 1988 /* 1989 * Make the freelist shorter if it's too long. 1990 * 1991 * Note that from this point onwards, we will always release the agf and 1992 * agfl buffers on error. This handles the case where we error out and 1993 * the buffers are clean or may not have been joined to the transaction 1994 * and hence need to be released manually. If they have been joined to 1995 * the transaction, then xfs_trans_brelse() will handle them 1996 * appropriately based on the recursion count and dirty state of the 1997 * buffer. 1998 * 1999 * XXX (dgc): When we have lots of free space, does this buy us 2000 * anything other than extra overhead when we need to put more blocks 2001 * back on the free list? Maybe we should only do this when space is 2002 * getting low or the AGFL is more than half full? 2003 */ 2004 while (pag->pagf_flcount > need) { 2005 struct xfs_buf *bp; 2006 2007 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); 2008 if (error) 2009 goto out_agbp_relse; 2010 error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1); 2011 if (error) 2012 goto out_agbp_relse; 2013 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); 2014 xfs_trans_binval(tp, bp); 2015 } 2016 2017 memset(&targs, 0, sizeof(targs)); 2018 targs.tp = tp; 2019 targs.mp = mp; 2020 targs.agbp = agbp; 2021 targs.agno = args->agno; 2022 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; 2023 targs.type = XFS_ALLOCTYPE_THIS_AG; 2024 targs.pag = pag; 2025 error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); 2026 if (error) 2027 goto out_agbp_relse; 2028 2029 /* Make the freelist longer if it's too short. */ 2030 while (pag->pagf_flcount < need) { 2031 targs.agbno = 0; 2032 targs.maxlen = need - pag->pagf_flcount; 2033 2034 /* Allocate as many blocks as possible at once. */ 2035 error = xfs_alloc_ag_vextent(&targs); 2036 if (error) 2037 goto out_agflbp_relse; 2038 2039 /* 2040 * Stop if we run out. Won't happen if callers are obeying 2041 * the restrictions correctly. Can happen for free calls 2042 * on a completely full ag. 2043 */ 2044 if (targs.agbno == NULLAGBLOCK) { 2045 if (flags & XFS_ALLOC_FLAG_FREEING) 2046 break; 2047 goto out_agflbp_relse; 2048 } 2049 /* 2050 * Put each allocated block on the list. 2051 */ 2052 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { 2053 error = xfs_alloc_put_freelist(tp, agbp, 2054 agflbp, bno, 0); 2055 if (error) 2056 goto out_agflbp_relse; 2057 } 2058 } 2059 xfs_trans_brelse(tp, agflbp); 2060 args->agbp = agbp; 2061 return 0; 2062 2063 out_agflbp_relse: 2064 xfs_trans_brelse(tp, agflbp); 2065 out_agbp_relse: 2066 if (agbp) 2067 xfs_trans_brelse(tp, agbp); 2068 out_no_agbp: 2069 args->agbp = NULL; 2070 return error; 2071 } 2072 2073 /* 2074 * Get a block from the freelist. 2075 * Returns with the buffer for the block gotten. 2076 */ 2077 int /* error */ 2078 xfs_alloc_get_freelist( 2079 xfs_trans_t *tp, /* transaction pointer */ 2080 xfs_buf_t *agbp, /* buffer containing the agf structure */ 2081 xfs_agblock_t *bnop, /* block address retrieved from freelist */ 2082 int btreeblk) /* destination is a AGF btree */ 2083 { 2084 xfs_agf_t *agf; /* a.g. freespace structure */ 2085 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ 2086 xfs_agblock_t bno; /* block number returned */ 2087 __be32 *agfl_bno; 2088 int error; 2089 int logflags; 2090 xfs_mount_t *mp = tp->t_mountp; 2091 xfs_perag_t *pag; /* per allocation group data */ 2092 2093 /* 2094 * Freelist is empty, give up. 2095 */ 2096 agf = XFS_BUF_TO_AGF(agbp); 2097 if (!agf->agf_flcount) { 2098 *bnop = NULLAGBLOCK; 2099 return 0; 2100 } 2101 /* 2102 * Read the array of free blocks. 2103 */ 2104 error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), 2105 &agflbp); 2106 if (error) 2107 return error; 2108 2109 2110 /* 2111 * Get the block number and update the data structures. 2112 */ 2113 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2114 bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); 2115 be32_add_cpu(&agf->agf_flfirst, 1); 2116 xfs_trans_brelse(tp, agflbp); 2117 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) 2118 agf->agf_flfirst = 0; 2119 2120 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2121 be32_add_cpu(&agf->agf_flcount, -1); 2122 xfs_trans_agflist_delta(tp, -1); 2123 pag->pagf_flcount--; 2124 xfs_perag_put(pag); 2125 2126 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; 2127 if (btreeblk) { 2128 be32_add_cpu(&agf->agf_btreeblks, 1); 2129 pag->pagf_btreeblks++; 2130 logflags |= XFS_AGF_BTREEBLKS; 2131 } 2132 2133 xfs_alloc_log_agf(tp, agbp, logflags); 2134 *bnop = bno; 2135 2136 return 0; 2137 } 2138 2139 /* 2140 * Log the given fields from the agf structure. 2141 */ 2142 void 2143 xfs_alloc_log_agf( 2144 xfs_trans_t *tp, /* transaction pointer */ 2145 xfs_buf_t *bp, /* buffer for a.g. freelist header */ 2146 int fields) /* mask of fields to be logged (XFS_AGF_...) */ 2147 { 2148 int first; /* first byte offset */ 2149 int last; /* last byte offset */ 2150 static const short offsets[] = { 2151 offsetof(xfs_agf_t, agf_magicnum), 2152 offsetof(xfs_agf_t, agf_versionnum), 2153 offsetof(xfs_agf_t, agf_seqno), 2154 offsetof(xfs_agf_t, agf_length), 2155 offsetof(xfs_agf_t, agf_roots[0]), 2156 offsetof(xfs_agf_t, agf_levels[0]), 2157 offsetof(xfs_agf_t, agf_flfirst), 2158 offsetof(xfs_agf_t, agf_fllast), 2159 offsetof(xfs_agf_t, agf_flcount), 2160 offsetof(xfs_agf_t, agf_freeblks), 2161 offsetof(xfs_agf_t, agf_longest), 2162 offsetof(xfs_agf_t, agf_btreeblks), 2163 offsetof(xfs_agf_t, agf_uuid), 2164 sizeof(xfs_agf_t) 2165 }; 2166 2167 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); 2168 2169 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); 2170 2171 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); 2172 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); 2173 } 2174 2175 /* 2176 * Interface for inode allocation to force the pag data to be initialized. 2177 */ 2178 int /* error */ 2179 xfs_alloc_pagf_init( 2180 xfs_mount_t *mp, /* file system mount structure */ 2181 xfs_trans_t *tp, /* transaction pointer */ 2182 xfs_agnumber_t agno, /* allocation group number */ 2183 int flags) /* XFS_ALLOC_FLAGS_... */ 2184 { 2185 xfs_buf_t *bp; 2186 int error; 2187 2188 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp))) 2189 return error; 2190 if (bp) 2191 xfs_trans_brelse(tp, bp); 2192 return 0; 2193 } 2194 2195 /* 2196 * Put the block on the freelist for the allocation group. 2197 */ 2198 int /* error */ 2199 xfs_alloc_put_freelist( 2200 xfs_trans_t *tp, /* transaction pointer */ 2201 xfs_buf_t *agbp, /* buffer for a.g. freelist header */ 2202 xfs_buf_t *agflbp,/* buffer for a.g. free block array */ 2203 xfs_agblock_t bno, /* block being freed */ 2204 int btreeblk) /* block came from a AGF btree */ 2205 { 2206 xfs_agf_t *agf; /* a.g. freespace structure */ 2207 __be32 *blockp;/* pointer to array entry */ 2208 int error; 2209 int logflags; 2210 xfs_mount_t *mp; /* mount structure */ 2211 xfs_perag_t *pag; /* per allocation group data */ 2212 __be32 *agfl_bno; 2213 int startoff; 2214 2215 agf = XFS_BUF_TO_AGF(agbp); 2216 mp = tp->t_mountp; 2217 2218 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, 2219 be32_to_cpu(agf->agf_seqno), &agflbp))) 2220 return error; 2221 be32_add_cpu(&agf->agf_fllast, 1); 2222 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) 2223 agf->agf_fllast = 0; 2224 2225 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); 2226 be32_add_cpu(&agf->agf_flcount, 1); 2227 xfs_trans_agflist_delta(tp, 1); 2228 pag->pagf_flcount++; 2229 2230 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; 2231 if (btreeblk) { 2232 be32_add_cpu(&agf->agf_btreeblks, -1); 2233 pag->pagf_btreeblks--; 2234 logflags |= XFS_AGF_BTREEBLKS; 2235 } 2236 xfs_perag_put(pag); 2237 2238 xfs_alloc_log_agf(tp, agbp, logflags); 2239 2240 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); 2241 2242 agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); 2243 blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; 2244 *blockp = cpu_to_be32(bno); 2245 startoff = (char *)blockp - (char *)agflbp->b_addr; 2246 2247 xfs_alloc_log_agf(tp, agbp, logflags); 2248 2249 xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); 2250 xfs_trans_log_buf(tp, agflbp, startoff, 2251 startoff + sizeof(xfs_agblock_t) - 1); 2252 return 0; 2253 } 2254 2255 static bool 2256 xfs_agf_verify( 2257 struct xfs_mount *mp, 2258 struct xfs_buf *bp) 2259 { 2260 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2261 2262 if (xfs_sb_version_hascrc(&mp->m_sb) && 2263 !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2264 return false; 2265 2266 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2267 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2268 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2269 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2270 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && 2271 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) 2272 return false; 2273 2274 if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > XFS_BTREE_MAXLEVELS || 2275 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) 2276 return false; 2277 2278 /* 2279 * during growfs operations, the perag is not fully initialised, 2280 * so we can't use it for any useful checking. growfs ensures we can't 2281 * use it by using uncached buffers that don't have the perag attached 2282 * so we can detect and avoid this problem. 2283 */ 2284 if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) 2285 return false; 2286 2287 if (xfs_sb_version_haslazysbcount(&mp->m_sb) && 2288 be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) 2289 return false; 2290 2291 return true;; 2292 2293 } 2294 2295 static void 2296 xfs_agf_read_verify( 2297 struct xfs_buf *bp) 2298 { 2299 struct xfs_mount *mp = bp->b_target->bt_mount; 2300 2301 if (xfs_sb_version_hascrc(&mp->m_sb) && 2302 !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) 2303 xfs_buf_ioerror(bp, -EFSBADCRC); 2304 else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, 2305 XFS_ERRTAG_ALLOC_READ_AGF, 2306 XFS_RANDOM_ALLOC_READ_AGF)) 2307 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2308 2309 if (bp->b_error) 2310 xfs_verifier_error(bp); 2311 } 2312 2313 static void 2314 xfs_agf_write_verify( 2315 struct xfs_buf *bp) 2316 { 2317 struct xfs_mount *mp = bp->b_target->bt_mount; 2318 struct xfs_buf_log_item *bip = bp->b_fspriv; 2319 2320 if (!xfs_agf_verify(mp, bp)) { 2321 xfs_buf_ioerror(bp, -EFSCORRUPTED); 2322 xfs_verifier_error(bp); 2323 return; 2324 } 2325 2326 if (!xfs_sb_version_hascrc(&mp->m_sb)) 2327 return; 2328 2329 if (bip) 2330 XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); 2331 2332 xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); 2333 } 2334 2335 const struct xfs_buf_ops xfs_agf_buf_ops = { 2336 .verify_read = xfs_agf_read_verify, 2337 .verify_write = xfs_agf_write_verify, 2338 }; 2339 2340 /* 2341 * Read in the allocation group header (free/alloc section). 2342 */ 2343 int /* error */ 2344 xfs_read_agf( 2345 struct xfs_mount *mp, /* mount point structure */ 2346 struct xfs_trans *tp, /* transaction pointer */ 2347 xfs_agnumber_t agno, /* allocation group number */ 2348 int flags, /* XFS_BUF_ */ 2349 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2350 { 2351 int error; 2352 2353 trace_xfs_read_agf(mp, agno); 2354 2355 ASSERT(agno != NULLAGNUMBER); 2356 error = xfs_trans_read_buf( 2357 mp, tp, mp->m_ddev_targp, 2358 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), 2359 XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); 2360 if (error) 2361 return error; 2362 if (!*bpp) 2363 return 0; 2364 2365 ASSERT(!(*bpp)->b_error); 2366 xfs_buf_set_ref(*bpp, XFS_AGF_REF); 2367 return 0; 2368 } 2369 2370 /* 2371 * Read in the allocation group header (free/alloc section). 2372 */ 2373 int /* error */ 2374 xfs_alloc_read_agf( 2375 struct xfs_mount *mp, /* mount point structure */ 2376 struct xfs_trans *tp, /* transaction pointer */ 2377 xfs_agnumber_t agno, /* allocation group number */ 2378 int flags, /* XFS_ALLOC_FLAG_... */ 2379 struct xfs_buf **bpp) /* buffer for the ag freelist header */ 2380 { 2381 struct xfs_agf *agf; /* ag freelist header */ 2382 struct xfs_perag *pag; /* per allocation group data */ 2383 int error; 2384 2385 trace_xfs_alloc_read_agf(mp, agno); 2386 2387 ASSERT(agno != NULLAGNUMBER); 2388 error = xfs_read_agf(mp, tp, agno, 2389 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, 2390 bpp); 2391 if (error) 2392 return error; 2393 if (!*bpp) 2394 return 0; 2395 ASSERT(!(*bpp)->b_error); 2396 2397 agf = XFS_BUF_TO_AGF(*bpp); 2398 pag = xfs_perag_get(mp, agno); 2399 if (!pag->pagf_init) { 2400 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); 2401 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); 2402 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); 2403 pag->pagf_longest = be32_to_cpu(agf->agf_longest); 2404 pag->pagf_levels[XFS_BTNUM_BNOi] = 2405 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); 2406 pag->pagf_levels[XFS_BTNUM_CNTi] = 2407 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); 2408 spin_lock_init(&pag->pagb_lock); 2409 pag->pagb_count = 0; 2410 pag->pagb_tree = RB_ROOT; 2411 pag->pagf_init = 1; 2412 } 2413 #ifdef DEBUG 2414 else if (!XFS_FORCED_SHUTDOWN(mp)) { 2415 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); 2416 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); 2417 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); 2418 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); 2419 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == 2420 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); 2421 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == 2422 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); 2423 } 2424 #endif 2425 xfs_perag_put(pag); 2426 return 0; 2427 } 2428 2429 /* 2430 * Allocate an extent (variable-size). 2431 * Depending on the allocation type, we either look in a single allocation 2432 * group or loop over the allocation groups to find the result. 2433 */ 2434 int /* error */ 2435 xfs_alloc_vextent( 2436 xfs_alloc_arg_t *args) /* allocation argument structure */ 2437 { 2438 xfs_agblock_t agsize; /* allocation group size */ 2439 int error; 2440 int flags; /* XFS_ALLOC_FLAG_... locking flags */ 2441 xfs_extlen_t minleft;/* minimum left value, temp copy */ 2442 xfs_mount_t *mp; /* mount structure pointer */ 2443 xfs_agnumber_t sagno; /* starting allocation group number */ 2444 xfs_alloctype_t type; /* input allocation type */ 2445 int bump_rotor = 0; 2446 int no_min = 0; 2447 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ 2448 2449 mp = args->mp; 2450 type = args->otype = args->type; 2451 args->agbno = NULLAGBLOCK; 2452 /* 2453 * Just fix this up, for the case where the last a.g. is shorter 2454 * (or there's only one a.g.) and the caller couldn't easily figure 2455 * that out (xfs_bmap_alloc). 2456 */ 2457 agsize = mp->m_sb.sb_agblocks; 2458 if (args->maxlen > agsize) 2459 args->maxlen = agsize; 2460 if (args->alignment == 0) 2461 args->alignment = 1; 2462 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); 2463 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); 2464 ASSERT(args->minlen <= args->maxlen); 2465 ASSERT(args->minlen <= agsize); 2466 ASSERT(args->mod < args->prod); 2467 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || 2468 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || 2469 args->minlen > args->maxlen || args->minlen > agsize || 2470 args->mod >= args->prod) { 2471 args->fsbno = NULLFSBLOCK; 2472 trace_xfs_alloc_vextent_badargs(args); 2473 return 0; 2474 } 2475 minleft = args->minleft; 2476 2477 switch (type) { 2478 case XFS_ALLOCTYPE_THIS_AG: 2479 case XFS_ALLOCTYPE_NEAR_BNO: 2480 case XFS_ALLOCTYPE_THIS_BNO: 2481 /* 2482 * These three force us into a single a.g. 2483 */ 2484 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2485 args->pag = xfs_perag_get(mp, args->agno); 2486 args->minleft = 0; 2487 error = xfs_alloc_fix_freelist(args, 0); 2488 args->minleft = minleft; 2489 if (error) { 2490 trace_xfs_alloc_vextent_nofix(args); 2491 goto error0; 2492 } 2493 if (!args->agbp) { 2494 trace_xfs_alloc_vextent_noagbp(args); 2495 break; 2496 } 2497 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2498 if ((error = xfs_alloc_ag_vextent(args))) 2499 goto error0; 2500 break; 2501 case XFS_ALLOCTYPE_START_BNO: 2502 /* 2503 * Try near allocation first, then anywhere-in-ag after 2504 * the first a.g. fails. 2505 */ 2506 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && 2507 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2508 args->fsbno = XFS_AGB_TO_FSB(mp, 2509 ((mp->m_agfrotor / rotorstep) % 2510 mp->m_sb.sb_agcount), 0); 2511 bump_rotor = 1; 2512 } 2513 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); 2514 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2515 /* FALLTHROUGH */ 2516 case XFS_ALLOCTYPE_ANY_AG: 2517 case XFS_ALLOCTYPE_START_AG: 2518 case XFS_ALLOCTYPE_FIRST_AG: 2519 /* 2520 * Rotate through the allocation groups looking for a winner. 2521 */ 2522 if (type == XFS_ALLOCTYPE_ANY_AG) { 2523 /* 2524 * Start with the last place we left off. 2525 */ 2526 args->agno = sagno = (mp->m_agfrotor / rotorstep) % 2527 mp->m_sb.sb_agcount; 2528 args->type = XFS_ALLOCTYPE_THIS_AG; 2529 flags = XFS_ALLOC_FLAG_TRYLOCK; 2530 } else if (type == XFS_ALLOCTYPE_FIRST_AG) { 2531 /* 2532 * Start with allocation group given by bno. 2533 */ 2534 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2535 args->type = XFS_ALLOCTYPE_THIS_AG; 2536 sagno = 0; 2537 flags = 0; 2538 } else { 2539 if (type == XFS_ALLOCTYPE_START_AG) 2540 args->type = XFS_ALLOCTYPE_THIS_AG; 2541 /* 2542 * Start with the given allocation group. 2543 */ 2544 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); 2545 flags = XFS_ALLOC_FLAG_TRYLOCK; 2546 } 2547 /* 2548 * Loop over allocation groups twice; first time with 2549 * trylock set, second time without. 2550 */ 2551 for (;;) { 2552 args->pag = xfs_perag_get(mp, args->agno); 2553 if (no_min) args->minleft = 0; 2554 error = xfs_alloc_fix_freelist(args, flags); 2555 args->minleft = minleft; 2556 if (error) { 2557 trace_xfs_alloc_vextent_nofix(args); 2558 goto error0; 2559 } 2560 /* 2561 * If we get a buffer back then the allocation will fly. 2562 */ 2563 if (args->agbp) { 2564 if ((error = xfs_alloc_ag_vextent(args))) 2565 goto error0; 2566 break; 2567 } 2568 2569 trace_xfs_alloc_vextent_loopfailed(args); 2570 2571 /* 2572 * Didn't work, figure out the next iteration. 2573 */ 2574 if (args->agno == sagno && 2575 type == XFS_ALLOCTYPE_START_BNO) 2576 args->type = XFS_ALLOCTYPE_THIS_AG; 2577 /* 2578 * For the first allocation, we can try any AG to get 2579 * space. However, if we already have allocated a 2580 * block, we don't want to try AGs whose number is below 2581 * sagno. Otherwise, we may end up with out-of-order 2582 * locking of AGF, which might cause deadlock. 2583 */ 2584 if (++(args->agno) == mp->m_sb.sb_agcount) { 2585 if (args->firstblock != NULLFSBLOCK) 2586 args->agno = sagno; 2587 else 2588 args->agno = 0; 2589 } 2590 /* 2591 * Reached the starting a.g., must either be done 2592 * or switch to non-trylock mode. 2593 */ 2594 if (args->agno == sagno) { 2595 if (no_min == 1) { 2596 args->agbno = NULLAGBLOCK; 2597 trace_xfs_alloc_vextent_allfailed(args); 2598 break; 2599 } 2600 if (flags == 0) { 2601 no_min = 1; 2602 } else { 2603 flags = 0; 2604 if (type == XFS_ALLOCTYPE_START_BNO) { 2605 args->agbno = XFS_FSB_TO_AGBNO(mp, 2606 args->fsbno); 2607 args->type = XFS_ALLOCTYPE_NEAR_BNO; 2608 } 2609 } 2610 } 2611 xfs_perag_put(args->pag); 2612 } 2613 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { 2614 if (args->agno == sagno) 2615 mp->m_agfrotor = (mp->m_agfrotor + 1) % 2616 (mp->m_sb.sb_agcount * rotorstep); 2617 else 2618 mp->m_agfrotor = (args->agno * rotorstep + 1) % 2619 (mp->m_sb.sb_agcount * rotorstep); 2620 } 2621 break; 2622 default: 2623 ASSERT(0); 2624 /* NOTREACHED */ 2625 } 2626 if (args->agbno == NULLAGBLOCK) 2627 args->fsbno = NULLFSBLOCK; 2628 else { 2629 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); 2630 #ifdef DEBUG 2631 ASSERT(args->len >= args->minlen); 2632 ASSERT(args->len <= args->maxlen); 2633 ASSERT(args->agbno % args->alignment == 0); 2634 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2635 args->len); 2636 #endif 2637 } 2638 xfs_perag_put(args->pag); 2639 return 0; 2640 error0: 2641 xfs_perag_put(args->pag); 2642 return error; 2643 } 2644 2645 /* 2646 * Free an extent. 2647 * Just break up the extent address and hand off to xfs_free_ag_extent 2648 * after fixing up the freelist. 2649 */ 2650 int /* error */ 2651 xfs_free_extent( 2652 xfs_trans_t *tp, /* transaction pointer */ 2653 xfs_fsblock_t bno, /* starting block number of extent */ 2654 xfs_extlen_t len) /* length of extent */ 2655 { 2656 xfs_alloc_arg_t args; 2657 int error; 2658 2659 ASSERT(len != 0); 2660 memset(&args, 0, sizeof(xfs_alloc_arg_t)); 2661 args.tp = tp; 2662 args.mp = tp->t_mountp; 2663 2664 /* 2665 * validate that the block number is legal - the enables us to detect 2666 * and handle a silent filesystem corruption rather than crashing. 2667 */ 2668 args.agno = XFS_FSB_TO_AGNO(args.mp, bno); 2669 if (args.agno >= args.mp->m_sb.sb_agcount) 2670 return -EFSCORRUPTED; 2671 2672 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); 2673 if (args.agbno >= args.mp->m_sb.sb_agblocks) 2674 return -EFSCORRUPTED; 2675 2676 args.pag = xfs_perag_get(args.mp, args.agno); 2677 ASSERT(args.pag); 2678 2679 error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); 2680 if (error) 2681 goto error0; 2682 2683 /* validate the extent size is legal now we have the agf locked */ 2684 if (args.agbno + len > 2685 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { 2686 error = -EFSCORRUPTED; 2687 goto error0; 2688 } 2689 2690 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); 2691 if (!error) 2692 xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); 2693 error0: 2694 xfs_perag_put(args.pag); 2695 return error; 2696 } 2697