1 /* 2 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_fs.h" 20 #include "xfs_shared.h" 21 #include "xfs_format.h" 22 #include "xfs_log_format.h" 23 #include "xfs_trans_resv.h" 24 #include "xfs_mount.h" 25 #include "xfs_inode.h" 26 #include "xfs_btree.h" 27 #include "xfs_bmap_btree.h" 28 #include "xfs_bmap.h" 29 #include "xfs_bmap_util.h" 30 #include "xfs_error.h" 31 #include "xfs_trans.h" 32 #include "xfs_trans_space.h" 33 #include "xfs_iomap.h" 34 #include "xfs_trace.h" 35 #include "xfs_icache.h" 36 #include "xfs_quota.h" 37 #include "xfs_dquot_item.h" 38 #include "xfs_dquot.h" 39 40 41 #define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \ 42 << mp->m_writeio_log) 43 #define XFS_WRITE_IMAPS XFS_BMAP_MAX_NMAP 44 45 STATIC int 46 xfs_iomap_eof_align_last_fsb( 47 xfs_mount_t *mp, 48 xfs_inode_t *ip, 49 xfs_extlen_t extsize, 50 xfs_fileoff_t *last_fsb) 51 { 52 xfs_extlen_t align = 0; 53 int eof, error; 54 55 if (!XFS_IS_REALTIME_INODE(ip)) { 56 /* 57 * Round up the allocation request to a stripe unit 58 * (m_dalign) boundary if the file size is >= stripe unit 59 * size, and we are allocating past the allocation eof. 60 * 61 * If mounted with the "-o swalloc" option the alignment is 62 * increased from the strip unit size to the stripe width. 63 */ 64 if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) 65 align = mp->m_swidth; 66 else if (mp->m_dalign) 67 align = mp->m_dalign; 68 69 if (align && XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, align)) 70 align = 0; 71 } 72 73 /* 74 * Always round up the allocation request to an extent boundary 75 * (when file on a real-time subvolume or has di_extsize hint). 76 */ 77 if (extsize) { 78 if (align) 79 align = roundup_64(align, extsize); 80 else 81 align = extsize; 82 } 83 84 if (align) { 85 xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align); 86 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); 87 if (error) 88 return error; 89 if (eof) 90 *last_fsb = new_last_fsb; 91 } 92 return 0; 93 } 94 95 STATIC int 96 xfs_alert_fsblock_zero( 97 xfs_inode_t *ip, 98 xfs_bmbt_irec_t *imap) 99 { 100 xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO, 101 "Access to block zero in inode %llu " 102 "start_block: %llx start_off: %llx " 103 "blkcnt: %llx extent-state: %x", 104 (unsigned long long)ip->i_ino, 105 (unsigned long long)imap->br_startblock, 106 (unsigned long long)imap->br_startoff, 107 (unsigned long long)imap->br_blockcount, 108 imap->br_state); 109 return -EFSCORRUPTED; 110 } 111 112 int 113 xfs_iomap_write_direct( 114 xfs_inode_t *ip, 115 xfs_off_t offset, 116 size_t count, 117 xfs_bmbt_irec_t *imap, 118 int nmaps) 119 { 120 xfs_mount_t *mp = ip->i_mount; 121 xfs_fileoff_t offset_fsb; 122 xfs_fileoff_t last_fsb; 123 xfs_filblks_t count_fsb, resaligned; 124 xfs_fsblock_t firstfsb; 125 xfs_extlen_t extsz, temp; 126 int nimaps; 127 int quota_flag; 128 int rt; 129 xfs_trans_t *tp; 130 xfs_bmap_free_t free_list; 131 uint qblocks, resblks, resrtextents; 132 int committed; 133 int error; 134 int lockmode; 135 int bmapi_flags = XFS_BMAPI_PREALLOC; 136 137 rt = XFS_IS_REALTIME_INODE(ip); 138 extsz = xfs_get_extsz_hint(ip); 139 lockmode = XFS_ILOCK_SHARED; /* locked by caller */ 140 141 ASSERT(xfs_isilocked(ip, lockmode)); 142 143 offset_fsb = XFS_B_TO_FSBT(mp, offset); 144 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 145 if ((offset + count) > XFS_ISIZE(ip)) { 146 /* 147 * Assert that the in-core extent list is present since this can 148 * call xfs_iread_extents() and we only have the ilock shared. 149 * This should be safe because the lock was held around a bmapi 150 * call in the caller and we only need it to access the in-core 151 * list. 152 */ 153 ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags & 154 XFS_IFEXTENTS); 155 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 156 if (error) 157 goto out_unlock; 158 } else { 159 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) 160 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 161 imap->br_blockcount + 162 imap->br_startoff); 163 } 164 count_fsb = last_fsb - offset_fsb; 165 ASSERT(count_fsb > 0); 166 167 resaligned = count_fsb; 168 if (unlikely(extsz)) { 169 if ((temp = do_mod(offset_fsb, extsz))) 170 resaligned += temp; 171 if ((temp = do_mod(resaligned, extsz))) 172 resaligned += extsz - temp; 173 } 174 175 if (unlikely(rt)) { 176 resrtextents = qblocks = resaligned; 177 resrtextents /= mp->m_sb.sb_rextsize; 178 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); 179 quota_flag = XFS_QMOPT_RES_RTBLKS; 180 } else { 181 resrtextents = 0; 182 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 183 quota_flag = XFS_QMOPT_RES_REGBLKS; 184 } 185 186 /* 187 * Drop the shared lock acquired by the caller, attach the dquot if 188 * necessary and move on to transaction setup. 189 */ 190 xfs_iunlock(ip, lockmode); 191 error = xfs_qm_dqattach(ip, 0); 192 if (error) 193 return error; 194 195 /* 196 * Allocate and setup the transaction 197 */ 198 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 199 200 /* 201 * For DAX, we do not allocate unwritten extents, but instead we zero 202 * the block before we commit the transaction. Ideally we'd like to do 203 * this outside the transaction context, but if we commit and then crash 204 * we may not have zeroed the blocks and this will be exposed on 205 * recovery of the allocation. Hence we must zero before commit. 206 * Further, if we are mapping unwritten extents here, we need to zero 207 * and convert them to written so that we don't need an unwritten extent 208 * callback for DAX. This also means that we need to be able to dip into 209 * the reserve block pool if there is no space left but we need to do 210 * unwritten extent conversion. 211 */ 212 if (IS_DAX(VFS_I(ip))) { 213 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 214 tp->t_flags |= XFS_TRANS_RESERVE; 215 } 216 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 217 resblks, resrtextents); 218 /* 219 * Check for running out of space, note: need lock to return 220 */ 221 if (error) { 222 xfs_trans_cancel(tp); 223 return error; 224 } 225 226 lockmode = XFS_ILOCK_EXCL; 227 xfs_ilock(ip, lockmode); 228 229 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); 230 if (error) 231 goto out_trans_cancel; 232 233 xfs_trans_ijoin(tp, ip, 0); 234 235 /* 236 * From this point onwards we overwrite the imap pointer that the 237 * caller gave to us. 238 */ 239 xfs_bmap_init(&free_list, &firstfsb); 240 nimaps = 1; 241 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 242 bmapi_flags, &firstfsb, resblks, imap, 243 &nimaps, &free_list); 244 if (error) 245 goto out_bmap_cancel; 246 247 /* 248 * Complete the transaction 249 */ 250 error = xfs_bmap_finish(&tp, &free_list, &committed); 251 if (error) 252 goto out_bmap_cancel; 253 254 error = xfs_trans_commit(tp); 255 if (error) 256 goto out_unlock; 257 258 /* 259 * Copy any maps to caller's array and return any error. 260 */ 261 if (nimaps == 0) { 262 error = -ENOSPC; 263 goto out_unlock; 264 } 265 266 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) 267 error = xfs_alert_fsblock_zero(ip, imap); 268 269 out_unlock: 270 xfs_iunlock(ip, lockmode); 271 return error; 272 273 out_bmap_cancel: 274 xfs_bmap_cancel(&free_list); 275 xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); 276 out_trans_cancel: 277 xfs_trans_cancel(tp); 278 goto out_unlock; 279 } 280 281 /* 282 * If the caller is doing a write at the end of the file, then extend the 283 * allocation out to the file system's write iosize. We clean up any extra 284 * space left over when the file is closed in xfs_inactive(). 285 * 286 * If we find we already have delalloc preallocation beyond EOF, don't do more 287 * preallocation as it it not needed. 288 */ 289 STATIC int 290 xfs_iomap_eof_want_preallocate( 291 xfs_mount_t *mp, 292 xfs_inode_t *ip, 293 xfs_off_t offset, 294 size_t count, 295 xfs_bmbt_irec_t *imap, 296 int nimaps, 297 int *prealloc) 298 { 299 xfs_fileoff_t start_fsb; 300 xfs_filblks_t count_fsb; 301 int n, error, imaps; 302 int found_delalloc = 0; 303 304 *prealloc = 0; 305 if (offset + count <= XFS_ISIZE(ip)) 306 return 0; 307 308 /* 309 * If the file is smaller than the minimum prealloc and we are using 310 * dynamic preallocation, don't do any preallocation at all as it is 311 * likely this is the only write to the file that is going to be done. 312 */ 313 if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) && 314 XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)) 315 return 0; 316 317 /* 318 * If there are any real blocks past eof, then don't 319 * do any speculative allocation. 320 */ 321 start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1))); 322 count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 323 while (count_fsb > 0) { 324 imaps = nimaps; 325 error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps, 326 0); 327 if (error) 328 return error; 329 for (n = 0; n < imaps; n++) { 330 if ((imap[n].br_startblock != HOLESTARTBLOCK) && 331 (imap[n].br_startblock != DELAYSTARTBLOCK)) 332 return 0; 333 start_fsb += imap[n].br_blockcount; 334 count_fsb -= imap[n].br_blockcount; 335 336 if (imap[n].br_startblock == DELAYSTARTBLOCK) 337 found_delalloc = 1; 338 } 339 } 340 if (!found_delalloc) 341 *prealloc = 1; 342 return 0; 343 } 344 345 /* 346 * Determine the initial size of the preallocation. We are beyond the current 347 * EOF here, but we need to take into account whether this is a sparse write or 348 * an extending write when determining the preallocation size. Hence we need to 349 * look up the extent that ends at the current write offset and use the result 350 * to determine the preallocation size. 351 * 352 * If the extent is a hole, then preallocation is essentially disabled. 353 * Otherwise we take the size of the preceeding data extent as the basis for the 354 * preallocation size. If the size of the extent is greater than half the 355 * maximum extent length, then use the current offset as the basis. This ensures 356 * that for large files the preallocation size always extends to MAXEXTLEN 357 * rather than falling short due to things like stripe unit/width alignment of 358 * real extents. 359 */ 360 STATIC xfs_fsblock_t 361 xfs_iomap_eof_prealloc_initial_size( 362 struct xfs_mount *mp, 363 struct xfs_inode *ip, 364 xfs_off_t offset, 365 xfs_bmbt_irec_t *imap, 366 int nimaps) 367 { 368 xfs_fileoff_t start_fsb; 369 int imaps = 1; 370 int error; 371 372 ASSERT(nimaps >= imaps); 373 374 /* if we are using a specific prealloc size, return now */ 375 if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) 376 return 0; 377 378 /* If the file is small, then use the minimum prealloc */ 379 if (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign)) 380 return 0; 381 382 /* 383 * As we write multiple pages, the offset will always align to the 384 * start of a page and hence point to a hole at EOF. i.e. if the size is 385 * 4096 bytes, we only have one block at FSB 0, but XFS_B_TO_FSB(4096) 386 * will return FSB 1. Hence if there are blocks in the file, we want to 387 * point to the block prior to the EOF block and not the hole that maps 388 * directly at @offset. 389 */ 390 start_fsb = XFS_B_TO_FSB(mp, offset); 391 if (start_fsb) 392 start_fsb--; 393 error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps, XFS_BMAPI_ENTIRE); 394 if (error) 395 return 0; 396 397 ASSERT(imaps == 1); 398 if (imap[0].br_startblock == HOLESTARTBLOCK) 399 return 0; 400 if (imap[0].br_blockcount <= (MAXEXTLEN >> 1)) 401 return imap[0].br_blockcount << 1; 402 return XFS_B_TO_FSB(mp, offset); 403 } 404 405 STATIC bool 406 xfs_quota_need_throttle( 407 struct xfs_inode *ip, 408 int type, 409 xfs_fsblock_t alloc_blocks) 410 { 411 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 412 413 if (!dq || !xfs_this_quota_on(ip->i_mount, type)) 414 return false; 415 416 /* no hi watermark, no throttle */ 417 if (!dq->q_prealloc_hi_wmark) 418 return false; 419 420 /* under the lo watermark, no throttle */ 421 if (dq->q_res_bcount + alloc_blocks < dq->q_prealloc_lo_wmark) 422 return false; 423 424 return true; 425 } 426 427 STATIC void 428 xfs_quota_calc_throttle( 429 struct xfs_inode *ip, 430 int type, 431 xfs_fsblock_t *qblocks, 432 int *qshift, 433 int64_t *qfreesp) 434 { 435 int64_t freesp; 436 int shift = 0; 437 struct xfs_dquot *dq = xfs_inode_dquot(ip, type); 438 439 /* no dq, or over hi wmark, squash the prealloc completely */ 440 if (!dq || dq->q_res_bcount >= dq->q_prealloc_hi_wmark) { 441 *qblocks = 0; 442 *qfreesp = 0; 443 return; 444 } 445 446 freesp = dq->q_prealloc_hi_wmark - dq->q_res_bcount; 447 if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) { 448 shift = 2; 449 if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT]) 450 shift += 2; 451 if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT]) 452 shift += 2; 453 } 454 455 if (freesp < *qfreesp) 456 *qfreesp = freesp; 457 458 /* only overwrite the throttle values if we are more aggressive */ 459 if ((freesp >> shift) < (*qblocks >> *qshift)) { 460 *qblocks = freesp; 461 *qshift = shift; 462 } 463 } 464 465 /* 466 * If we don't have a user specified preallocation size, dynamically increase 467 * the preallocation size as the size of the file grows. Cap the maximum size 468 * at a single extent or less if the filesystem is near full. The closer the 469 * filesystem is to full, the smaller the maximum prealocation. 470 */ 471 STATIC xfs_fsblock_t 472 xfs_iomap_prealloc_size( 473 struct xfs_mount *mp, 474 struct xfs_inode *ip, 475 xfs_off_t offset, 476 struct xfs_bmbt_irec *imap, 477 int nimaps) 478 { 479 xfs_fsblock_t alloc_blocks = 0; 480 int shift = 0; 481 int64_t freesp; 482 xfs_fsblock_t qblocks; 483 int qshift = 0; 484 485 alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, offset, 486 imap, nimaps); 487 if (!alloc_blocks) 488 goto check_writeio; 489 qblocks = alloc_blocks; 490 491 /* 492 * MAXEXTLEN is not a power of two value but we round the prealloc down 493 * to the nearest power of two value after throttling. To prevent the 494 * round down from unconditionally reducing the maximum supported prealloc 495 * size, we round up first, apply appropriate throttling, round down and 496 * cap the value to MAXEXTLEN. 497 */ 498 alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN), 499 alloc_blocks); 500 501 freesp = percpu_counter_read_positive(&mp->m_fdblocks); 502 if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) { 503 shift = 2; 504 if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT]) 505 shift++; 506 if (freesp < mp->m_low_space[XFS_LOWSP_3_PCNT]) 507 shift++; 508 if (freesp < mp->m_low_space[XFS_LOWSP_2_PCNT]) 509 shift++; 510 if (freesp < mp->m_low_space[XFS_LOWSP_1_PCNT]) 511 shift++; 512 } 513 514 /* 515 * Check each quota to cap the prealloc size, provide a shift value to 516 * throttle with and adjust amount of available space. 517 */ 518 if (xfs_quota_need_throttle(ip, XFS_DQ_USER, alloc_blocks)) 519 xfs_quota_calc_throttle(ip, XFS_DQ_USER, &qblocks, &qshift, 520 &freesp); 521 if (xfs_quota_need_throttle(ip, XFS_DQ_GROUP, alloc_blocks)) 522 xfs_quota_calc_throttle(ip, XFS_DQ_GROUP, &qblocks, &qshift, 523 &freesp); 524 if (xfs_quota_need_throttle(ip, XFS_DQ_PROJ, alloc_blocks)) 525 xfs_quota_calc_throttle(ip, XFS_DQ_PROJ, &qblocks, &qshift, 526 &freesp); 527 528 /* 529 * The final prealloc size is set to the minimum of free space available 530 * in each of the quotas and the overall filesystem. 531 * 532 * The shift throttle value is set to the maximum value as determined by 533 * the global low free space values and per-quota low free space values. 534 */ 535 alloc_blocks = MIN(alloc_blocks, qblocks); 536 shift = MAX(shift, qshift); 537 538 if (shift) 539 alloc_blocks >>= shift; 540 /* 541 * rounddown_pow_of_two() returns an undefined result if we pass in 542 * alloc_blocks = 0. 543 */ 544 if (alloc_blocks) 545 alloc_blocks = rounddown_pow_of_two(alloc_blocks); 546 if (alloc_blocks > MAXEXTLEN) 547 alloc_blocks = MAXEXTLEN; 548 549 /* 550 * If we are still trying to allocate more space than is 551 * available, squash the prealloc hard. This can happen if we 552 * have a large file on a small filesystem and the above 553 * lowspace thresholds are smaller than MAXEXTLEN. 554 */ 555 while (alloc_blocks && alloc_blocks >= freesp) 556 alloc_blocks >>= 4; 557 558 check_writeio: 559 if (alloc_blocks < mp->m_writeio_blocks) 560 alloc_blocks = mp->m_writeio_blocks; 561 562 trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift, 563 mp->m_writeio_blocks); 564 565 return alloc_blocks; 566 } 567 568 int 569 xfs_iomap_write_delay( 570 xfs_inode_t *ip, 571 xfs_off_t offset, 572 size_t count, 573 xfs_bmbt_irec_t *ret_imap) 574 { 575 xfs_mount_t *mp = ip->i_mount; 576 xfs_fileoff_t offset_fsb; 577 xfs_fileoff_t last_fsb; 578 xfs_off_t aligned_offset; 579 xfs_fileoff_t ioalign; 580 xfs_extlen_t extsz; 581 int nimaps; 582 xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; 583 int prealloc; 584 int error; 585 586 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 587 588 /* 589 * Make sure that the dquots are there. This doesn't hold 590 * the ilock across a disk read. 591 */ 592 error = xfs_qm_dqattach_locked(ip, 0); 593 if (error) 594 return error; 595 596 extsz = xfs_get_extsz_hint(ip); 597 offset_fsb = XFS_B_TO_FSBT(mp, offset); 598 599 error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, 600 imap, XFS_WRITE_IMAPS, &prealloc); 601 if (error) 602 return error; 603 604 retry: 605 if (prealloc) { 606 xfs_fsblock_t alloc_blocks; 607 608 alloc_blocks = xfs_iomap_prealloc_size(mp, ip, offset, imap, 609 XFS_WRITE_IMAPS); 610 611 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); 612 ioalign = XFS_B_TO_FSBT(mp, aligned_offset); 613 last_fsb = ioalign + alloc_blocks; 614 } else { 615 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 616 } 617 618 if (prealloc || extsz) { 619 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 620 if (error) 621 return error; 622 } 623 624 /* 625 * Make sure preallocation does not create extents beyond the range we 626 * actually support in this filesystem. 627 */ 628 if (last_fsb > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes)) 629 last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); 630 631 ASSERT(last_fsb > offset_fsb); 632 633 nimaps = XFS_WRITE_IMAPS; 634 error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb, 635 imap, &nimaps, XFS_BMAPI_ENTIRE); 636 switch (error) { 637 case 0: 638 case -ENOSPC: 639 case -EDQUOT: 640 break; 641 default: 642 return error; 643 } 644 645 /* 646 * If bmapi returned us nothing, we got either ENOSPC or EDQUOT. Retry 647 * without EOF preallocation. 648 */ 649 if (nimaps == 0) { 650 trace_xfs_delalloc_enospc(ip, offset, count); 651 if (prealloc) { 652 prealloc = 0; 653 error = 0; 654 goto retry; 655 } 656 return error ? error : -ENOSPC; 657 } 658 659 if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip))) 660 return xfs_alert_fsblock_zero(ip, &imap[0]); 661 662 /* 663 * Tag the inode as speculatively preallocated so we can reclaim this 664 * space on demand, if necessary. 665 */ 666 if (prealloc) 667 xfs_inode_set_eofblocks_tag(ip); 668 669 *ret_imap = imap[0]; 670 return 0; 671 } 672 673 /* 674 * Pass in a delayed allocate extent, convert it to real extents; 675 * return to the caller the extent we create which maps on top of 676 * the originating callers request. 677 * 678 * Called without a lock on the inode. 679 * 680 * We no longer bother to look at the incoming map - all we have to 681 * guarantee is that whatever we allocate fills the required range. 682 */ 683 int 684 xfs_iomap_write_allocate( 685 xfs_inode_t *ip, 686 xfs_off_t offset, 687 xfs_bmbt_irec_t *imap) 688 { 689 xfs_mount_t *mp = ip->i_mount; 690 xfs_fileoff_t offset_fsb, last_block; 691 xfs_fileoff_t end_fsb, map_start_fsb; 692 xfs_fsblock_t first_block; 693 xfs_bmap_free_t free_list; 694 xfs_filblks_t count_fsb; 695 xfs_trans_t *tp; 696 int nimaps, committed; 697 int error = 0; 698 int nres; 699 700 /* 701 * Make sure that the dquots are there. 702 */ 703 error = xfs_qm_dqattach(ip, 0); 704 if (error) 705 return error; 706 707 offset_fsb = XFS_B_TO_FSBT(mp, offset); 708 count_fsb = imap->br_blockcount; 709 map_start_fsb = imap->br_startoff; 710 711 XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); 712 713 while (count_fsb != 0) { 714 /* 715 * Set up a transaction with which to allocate the 716 * backing store for the file. Do allocations in a 717 * loop until we get some space in the range we are 718 * interested in. The other space that might be allocated 719 * is in the delayed allocation extent on which we sit 720 * but before our buffer starts. 721 */ 722 723 nimaps = 0; 724 while (nimaps == 0) { 725 tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); 726 tp->t_flags |= XFS_TRANS_RESERVE; 727 nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 728 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 729 nres, 0); 730 if (error) { 731 xfs_trans_cancel(tp); 732 return error; 733 } 734 xfs_ilock(ip, XFS_ILOCK_EXCL); 735 xfs_trans_ijoin(tp, ip, 0); 736 737 xfs_bmap_init(&free_list, &first_block); 738 739 /* 740 * it is possible that the extents have changed since 741 * we did the read call as we dropped the ilock for a 742 * while. We have to be careful about truncates or hole 743 * punchs here - we are not allowed to allocate 744 * non-delalloc blocks here. 745 * 746 * The only protection against truncation is the pages 747 * for the range we are being asked to convert are 748 * locked and hence a truncate will block on them 749 * first. 750 * 751 * As a result, if we go beyond the range we really 752 * need and hit an delalloc extent boundary followed by 753 * a hole while we have excess blocks in the map, we 754 * will fill the hole incorrectly and overrun the 755 * transaction reservation. 756 * 757 * Using a single map prevents this as we are forced to 758 * check each map we look for overlap with the desired 759 * range and abort as soon as we find it. Also, given 760 * that we only return a single map, having one beyond 761 * what we can return is probably a bit silly. 762 * 763 * We also need to check that we don't go beyond EOF; 764 * this is a truncate optimisation as a truncate sets 765 * the new file size before block on the pages we 766 * currently have locked under writeback. Because they 767 * are about to be tossed, we don't need to write them 768 * back.... 769 */ 770 nimaps = 1; 771 end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); 772 error = xfs_bmap_last_offset(ip, &last_block, 773 XFS_DATA_FORK); 774 if (error) 775 goto trans_cancel; 776 777 last_block = XFS_FILEOFF_MAX(last_block, end_fsb); 778 if ((map_start_fsb + count_fsb) > last_block) { 779 count_fsb = last_block - map_start_fsb; 780 if (count_fsb == 0) { 781 error = -EAGAIN; 782 goto trans_cancel; 783 } 784 } 785 786 /* 787 * From this point onwards we overwrite the imap 788 * pointer that the caller gave to us. 789 */ 790 error = xfs_bmapi_write(tp, ip, map_start_fsb, 791 count_fsb, 0, &first_block, 792 nres, imap, &nimaps, 793 &free_list); 794 if (error) 795 goto trans_cancel; 796 797 error = xfs_bmap_finish(&tp, &free_list, &committed); 798 if (error) 799 goto trans_cancel; 800 801 error = xfs_trans_commit(tp); 802 if (error) 803 goto error0; 804 805 xfs_iunlock(ip, XFS_ILOCK_EXCL); 806 } 807 808 /* 809 * See if we were able to allocate an extent that 810 * covers at least part of the callers request 811 */ 812 if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip))) 813 return xfs_alert_fsblock_zero(ip, imap); 814 815 if ((offset_fsb >= imap->br_startoff) && 816 (offset_fsb < (imap->br_startoff + 817 imap->br_blockcount))) { 818 XFS_STATS_INC(mp, xs_xstrat_quick); 819 return 0; 820 } 821 822 /* 823 * So far we have not mapped the requested part of the 824 * file, just surrounding data, try again. 825 */ 826 count_fsb -= imap->br_blockcount; 827 map_start_fsb = imap->br_startoff + imap->br_blockcount; 828 } 829 830 trans_cancel: 831 xfs_bmap_cancel(&free_list); 832 xfs_trans_cancel(tp); 833 error0: 834 xfs_iunlock(ip, XFS_ILOCK_EXCL); 835 return error; 836 } 837 838 int 839 xfs_iomap_write_unwritten( 840 xfs_inode_t *ip, 841 xfs_off_t offset, 842 xfs_off_t count) 843 { 844 xfs_mount_t *mp = ip->i_mount; 845 xfs_fileoff_t offset_fsb; 846 xfs_filblks_t count_fsb; 847 xfs_filblks_t numblks_fsb; 848 xfs_fsblock_t firstfsb; 849 int nimaps; 850 xfs_trans_t *tp; 851 xfs_bmbt_irec_t imap; 852 xfs_bmap_free_t free_list; 853 xfs_fsize_t i_size; 854 uint resblks; 855 int committed; 856 int error; 857 858 trace_xfs_unwritten_convert(ip, offset, count); 859 860 offset_fsb = XFS_B_TO_FSBT(mp, offset); 861 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 862 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 863 864 /* 865 * Reserve enough blocks in this transaction for two complete extent 866 * btree splits. We may be converting the middle part of an unwritten 867 * extent and in this case we will insert two new extents in the btree 868 * each of which could cause a full split. 869 * 870 * This reservation amount will be used in the first call to 871 * xfs_bmbt_split() to select an AG with enough space to satisfy the 872 * rest of the operation. 873 */ 874 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 875 876 do { 877 /* 878 * set up a transaction to convert the range of extents 879 * from unwritten to real. Do allocations in a loop until 880 * we have covered the range passed in. 881 * 882 * Note that we open code the transaction allocation here 883 * to pass KM_NOFS--we can't risk to recursing back into 884 * the filesystem here as we might be asked to write out 885 * the same inode that we complete here and might deadlock 886 * on the iolock. 887 */ 888 sb_start_intwrite(mp->m_super); 889 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); 890 tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT; 891 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 892 resblks, 0); 893 if (error) { 894 xfs_trans_cancel(tp); 895 return error; 896 } 897 898 xfs_ilock(ip, XFS_ILOCK_EXCL); 899 xfs_trans_ijoin(tp, ip, 0); 900 901 /* 902 * Modify the unwritten extent state of the buffer. 903 */ 904 xfs_bmap_init(&free_list, &firstfsb); 905 nimaps = 1; 906 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 907 XFS_BMAPI_CONVERT, &firstfsb, resblks, 908 &imap, &nimaps, &free_list); 909 if (error) 910 goto error_on_bmapi_transaction; 911 912 /* 913 * Log the updated inode size as we go. We have to be careful 914 * to only log it up to the actual write offset if it is 915 * halfway into a block. 916 */ 917 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 918 if (i_size > offset + count) 919 i_size = offset + count; 920 921 i_size = xfs_new_eof(ip, i_size); 922 if (i_size) { 923 ip->i_d.di_size = i_size; 924 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 925 } 926 927 error = xfs_bmap_finish(&tp, &free_list, &committed); 928 if (error) 929 goto error_on_bmapi_transaction; 930 931 error = xfs_trans_commit(tp); 932 xfs_iunlock(ip, XFS_ILOCK_EXCL); 933 if (error) 934 return error; 935 936 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) 937 return xfs_alert_fsblock_zero(ip, &imap); 938 939 if ((numblks_fsb = imap.br_blockcount) == 0) { 940 /* 941 * The numblks_fsb value should always get 942 * smaller, otherwise the loop is stuck. 943 */ 944 ASSERT(imap.br_blockcount); 945 break; 946 } 947 offset_fsb += numblks_fsb; 948 count_fsb -= numblks_fsb; 949 } while (count_fsb > 0); 950 951 return 0; 952 953 error_on_bmapi_transaction: 954 xfs_bmap_cancel(&free_list); 955 xfs_trans_cancel(tp); 956 xfs_iunlock(ip, XFS_ILOCK_EXCL); 957 return error; 958 } 959