1 /* 2 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * All Rights Reserved. 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope that it would be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program; if not, write the Free Software Foundation, 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 #include "xfs.h" 19 #include "xfs_shared.h" 20 #include "xfs_format.h" 21 #include "xfs_log_format.h" 22 #include "xfs_trans_resv.h" 23 #include "xfs_mount.h" 24 #include "xfs_inode.h" 25 #include "xfs_trans.h" 26 #include "xfs_inode_item.h" 27 #include "xfs_alloc.h" 28 #include "xfs_error.h" 29 #include "xfs_iomap.h" 30 #include "xfs_trace.h" 31 #include "xfs_bmap.h" 32 #include "xfs_bmap_util.h" 33 #include "xfs_bmap_btree.h" 34 #include "xfs_reflink.h" 35 #include <linux/gfp.h> 36 #include <linux/mpage.h> 37 #include <linux/pagevec.h> 38 #include <linux/writeback.h> 39 40 /* 41 * structure owned by writepages passed to individual writepage calls 42 */ 43 struct xfs_writepage_ctx { 44 struct xfs_bmbt_irec imap; 45 bool imap_valid; 46 unsigned int io_type; 47 struct xfs_ioend *ioend; 48 sector_t last_block; 49 }; 50 51 void 52 xfs_count_page_state( 53 struct page *page, 54 int *delalloc, 55 int *unwritten) 56 { 57 struct buffer_head *bh, *head; 58 59 *delalloc = *unwritten = 0; 60 61 bh = head = page_buffers(page); 62 do { 63 if (buffer_unwritten(bh)) 64 (*unwritten) = 1; 65 else if (buffer_delay(bh)) 66 (*delalloc) = 1; 67 } while ((bh = bh->b_this_page) != head); 68 } 69 70 struct block_device * 71 xfs_find_bdev_for_inode( 72 struct inode *inode) 73 { 74 struct xfs_inode *ip = XFS_I(inode); 75 struct xfs_mount *mp = ip->i_mount; 76 77 if (XFS_IS_REALTIME_INODE(ip)) 78 return mp->m_rtdev_targp->bt_bdev; 79 else 80 return mp->m_ddev_targp->bt_bdev; 81 } 82 83 /* 84 * We're now finished for good with this page. Update the page state via the 85 * associated buffer_heads, paying attention to the start and end offsets that 86 * we need to process on the page. 87 * 88 * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last 89 * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or 90 * the page at all, as we may be racing with memory reclaim and it can free both 91 * the bufferhead chain and the page as it will see the page as clean and 92 * unused. 93 */ 94 static void 95 xfs_finish_page_writeback( 96 struct inode *inode, 97 struct bio_vec *bvec, 98 int error) 99 { 100 unsigned int end = bvec->bv_offset + bvec->bv_len - 1; 101 struct buffer_head *head, *bh, *next; 102 unsigned int off = 0; 103 unsigned int bsize; 104 105 ASSERT(bvec->bv_offset < PAGE_SIZE); 106 ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); 107 ASSERT(end < PAGE_SIZE); 108 ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); 109 110 bh = head = page_buffers(bvec->bv_page); 111 112 bsize = bh->b_size; 113 do { 114 if (off > end) 115 break; 116 next = bh->b_this_page; 117 if (off < bvec->bv_offset) 118 goto next_bh; 119 bh->b_end_io(bh, !error); 120 next_bh: 121 off += bsize; 122 } while ((bh = next) != head); 123 } 124 125 /* 126 * We're now finished for good with this ioend structure. Update the page 127 * state, release holds on bios, and finally free up memory. Do not use the 128 * ioend after this. 129 */ 130 STATIC void 131 xfs_destroy_ioend( 132 struct xfs_ioend *ioend, 133 int error) 134 { 135 struct inode *inode = ioend->io_inode; 136 struct bio *last = ioend->io_bio; 137 struct bio *bio, *next; 138 139 for (bio = &ioend->io_inline_bio; bio; bio = next) { 140 struct bio_vec *bvec; 141 int i; 142 143 /* 144 * For the last bio, bi_private points to the ioend, so we 145 * need to explicitly end the iteration here. 146 */ 147 if (bio == last) 148 next = NULL; 149 else 150 next = bio->bi_private; 151 152 /* walk each page on bio, ending page IO on them */ 153 bio_for_each_segment_all(bvec, bio, i) 154 xfs_finish_page_writeback(inode, bvec, error); 155 156 bio_put(bio); 157 } 158 } 159 160 /* 161 * Fast and loose check if this write could update the on-disk inode size. 162 */ 163 static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 164 { 165 return ioend->io_offset + ioend->io_size > 166 XFS_I(ioend->io_inode)->i_d.di_size; 167 } 168 169 STATIC int 170 xfs_setfilesize_trans_alloc( 171 struct xfs_ioend *ioend) 172 { 173 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 174 struct xfs_trans *tp; 175 int error; 176 177 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 178 if (error) 179 return error; 180 181 ioend->io_append_trans = tp; 182 183 /* 184 * We may pass freeze protection with a transaction. So tell lockdep 185 * we released it. 186 */ 187 __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); 188 /* 189 * We hand off the transaction to the completion thread now, so 190 * clear the flag here. 191 */ 192 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 193 return 0; 194 } 195 196 /* 197 * Update on-disk file size now that data has been written to disk. 198 */ 199 STATIC int 200 __xfs_setfilesize( 201 struct xfs_inode *ip, 202 struct xfs_trans *tp, 203 xfs_off_t offset, 204 size_t size) 205 { 206 xfs_fsize_t isize; 207 208 xfs_ilock(ip, XFS_ILOCK_EXCL); 209 isize = xfs_new_eof(ip, offset + size); 210 if (!isize) { 211 xfs_iunlock(ip, XFS_ILOCK_EXCL); 212 xfs_trans_cancel(tp); 213 return 0; 214 } 215 216 trace_xfs_setfilesize(ip, offset, size); 217 218 ip->i_d.di_size = isize; 219 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 220 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 221 222 return xfs_trans_commit(tp); 223 } 224 225 int 226 xfs_setfilesize( 227 struct xfs_inode *ip, 228 xfs_off_t offset, 229 size_t size) 230 { 231 struct xfs_mount *mp = ip->i_mount; 232 struct xfs_trans *tp; 233 int error; 234 235 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 236 if (error) 237 return error; 238 239 return __xfs_setfilesize(ip, tp, offset, size); 240 } 241 242 STATIC int 243 xfs_setfilesize_ioend( 244 struct xfs_ioend *ioend, 245 int error) 246 { 247 struct xfs_inode *ip = XFS_I(ioend->io_inode); 248 struct xfs_trans *tp = ioend->io_append_trans; 249 250 /* 251 * The transaction may have been allocated in the I/O submission thread, 252 * thus we need to mark ourselves as being in a transaction manually. 253 * Similarly for freeze protection. 254 */ 255 current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 256 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 257 258 /* we abort the update if there was an IO error */ 259 if (error) { 260 xfs_trans_cancel(tp); 261 return error; 262 } 263 264 return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 265 } 266 267 /* 268 * IO write completion. 269 */ 270 STATIC void 271 xfs_end_io( 272 struct work_struct *work) 273 { 274 struct xfs_ioend *ioend = 275 container_of(work, struct xfs_ioend, io_work); 276 struct xfs_inode *ip = XFS_I(ioend->io_inode); 277 xfs_off_t offset = ioend->io_offset; 278 size_t size = ioend->io_size; 279 int error; 280 281 /* 282 * Just clean up the in-memory strutures if the fs has been shut down. 283 */ 284 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 285 error = -EIO; 286 goto done; 287 } 288 289 /* 290 * Clean up any COW blocks on an I/O error. 291 */ 292 error = blk_status_to_errno(ioend->io_bio->bi_status); 293 if (unlikely(error)) { 294 switch (ioend->io_type) { 295 case XFS_IO_COW: 296 xfs_reflink_cancel_cow_range(ip, offset, size, true); 297 break; 298 } 299 300 goto done; 301 } 302 303 /* 304 * Success: commit the COW or unwritten blocks if needed. 305 */ 306 switch (ioend->io_type) { 307 case XFS_IO_COW: 308 error = xfs_reflink_end_cow(ip, offset, size); 309 break; 310 case XFS_IO_UNWRITTEN: 311 error = xfs_iomap_write_unwritten(ip, offset, size); 312 break; 313 default: 314 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); 315 break; 316 } 317 318 done: 319 if (ioend->io_append_trans) 320 error = xfs_setfilesize_ioend(ioend, error); 321 xfs_destroy_ioend(ioend, error); 322 } 323 324 STATIC void 325 xfs_end_bio( 326 struct bio *bio) 327 { 328 struct xfs_ioend *ioend = bio->bi_private; 329 struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 330 331 if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW) 332 queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 333 else if (ioend->io_append_trans) 334 queue_work(mp->m_data_workqueue, &ioend->io_work); 335 else 336 xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status)); 337 } 338 339 STATIC int 340 xfs_map_blocks( 341 struct inode *inode, 342 loff_t offset, 343 struct xfs_bmbt_irec *imap, 344 int type) 345 { 346 struct xfs_inode *ip = XFS_I(inode); 347 struct xfs_mount *mp = ip->i_mount; 348 ssize_t count = i_blocksize(inode); 349 xfs_fileoff_t offset_fsb, end_fsb; 350 int error = 0; 351 int bmapi_flags = XFS_BMAPI_ENTIRE; 352 int nimaps = 1; 353 354 if (XFS_FORCED_SHUTDOWN(mp)) 355 return -EIO; 356 357 ASSERT(type != XFS_IO_COW); 358 if (type == XFS_IO_UNWRITTEN) 359 bmapi_flags |= XFS_BMAPI_IGSTATE; 360 361 xfs_ilock(ip, XFS_ILOCK_SHARED); 362 ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 363 (ip->i_df.if_flags & XFS_IFEXTENTS)); 364 ASSERT(offset <= mp->m_super->s_maxbytes); 365 366 if (offset + count > mp->m_super->s_maxbytes) 367 count = mp->m_super->s_maxbytes - offset; 368 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 369 offset_fsb = XFS_B_TO_FSBT(mp, offset); 370 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 371 imap, &nimaps, bmapi_flags); 372 /* 373 * Truncate an overwrite extent if there's a pending CoW 374 * reservation before the end of this extent. This forces us 375 * to come back to writepage to take care of the CoW. 376 */ 377 if (nimaps && type == XFS_IO_OVERWRITE) 378 xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap); 379 xfs_iunlock(ip, XFS_ILOCK_SHARED); 380 381 if (error) 382 return error; 383 384 if (type == XFS_IO_DELALLOC && 385 (!nimaps || isnullstartblock(imap->br_startblock))) { 386 error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset, 387 imap); 388 if (!error) 389 trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); 390 return error; 391 } 392 393 #ifdef DEBUG 394 if (type == XFS_IO_UNWRITTEN) { 395 ASSERT(nimaps); 396 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 397 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 398 } 399 #endif 400 if (nimaps) 401 trace_xfs_map_blocks_found(ip, offset, count, type, imap); 402 return 0; 403 } 404 405 STATIC bool 406 xfs_imap_valid( 407 struct inode *inode, 408 struct xfs_bmbt_irec *imap, 409 xfs_off_t offset) 410 { 411 offset >>= inode->i_blkbits; 412 413 return offset >= imap->br_startoff && 414 offset < imap->br_startoff + imap->br_blockcount; 415 } 416 417 STATIC void 418 xfs_start_buffer_writeback( 419 struct buffer_head *bh) 420 { 421 ASSERT(buffer_mapped(bh)); 422 ASSERT(buffer_locked(bh)); 423 ASSERT(!buffer_delay(bh)); 424 ASSERT(!buffer_unwritten(bh)); 425 426 mark_buffer_async_write(bh); 427 set_buffer_uptodate(bh); 428 clear_buffer_dirty(bh); 429 } 430 431 STATIC void 432 xfs_start_page_writeback( 433 struct page *page, 434 int clear_dirty) 435 { 436 ASSERT(PageLocked(page)); 437 ASSERT(!PageWriteback(page)); 438 439 /* 440 * if the page was not fully cleaned, we need to ensure that the higher 441 * layers come back to it correctly. That means we need to keep the page 442 * dirty, and for WB_SYNC_ALL writeback we need to ensure the 443 * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to 444 * write this page in this writeback sweep will be made. 445 */ 446 if (clear_dirty) { 447 clear_page_dirty_for_io(page); 448 set_page_writeback(page); 449 } else 450 set_page_writeback_keepwrite(page); 451 452 unlock_page(page); 453 } 454 455 static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) 456 { 457 return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 458 } 459 460 /* 461 * Submit the bio for an ioend. We are passed an ioend with a bio attached to 462 * it, and we submit that bio. The ioend may be used for multiple bio 463 * submissions, so we only want to allocate an append transaction for the ioend 464 * once. In the case of multiple bio submission, each bio will take an IO 465 * reference to the ioend to ensure that the ioend completion is only done once 466 * all bios have been submitted and the ioend is really done. 467 * 468 * If @fail is non-zero, it means that we have a situation where some part of 469 * the submission process has failed after we have marked paged for writeback 470 * and unlocked them. In this situation, we need to fail the bio and ioend 471 * rather than submit it to IO. This typically only happens on a filesystem 472 * shutdown. 473 */ 474 STATIC int 475 xfs_submit_ioend( 476 struct writeback_control *wbc, 477 struct xfs_ioend *ioend, 478 int status) 479 { 480 /* Convert CoW extents to regular */ 481 if (!status && ioend->io_type == XFS_IO_COW) { 482 status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 483 ioend->io_offset, ioend->io_size); 484 } 485 486 /* Reserve log space if we might write beyond the on-disk inode size. */ 487 if (!status && 488 ioend->io_type != XFS_IO_UNWRITTEN && 489 xfs_ioend_is_append(ioend) && 490 !ioend->io_append_trans) 491 status = xfs_setfilesize_trans_alloc(ioend); 492 493 ioend->io_bio->bi_private = ioend; 494 ioend->io_bio->bi_end_io = xfs_end_bio; 495 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 496 497 /* 498 * If we are failing the IO now, just mark the ioend with an 499 * error and finish it. This will run IO completion immediately 500 * as there is only one reference to the ioend at this point in 501 * time. 502 */ 503 if (status) { 504 ioend->io_bio->bi_status = errno_to_blk_status(status); 505 bio_endio(ioend->io_bio); 506 return status; 507 } 508 509 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; 510 submit_bio(ioend->io_bio); 511 return 0; 512 } 513 514 static void 515 xfs_init_bio_from_bh( 516 struct bio *bio, 517 struct buffer_head *bh) 518 { 519 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 520 bio->bi_bdev = bh->b_bdev; 521 } 522 523 static struct xfs_ioend * 524 xfs_alloc_ioend( 525 struct inode *inode, 526 unsigned int type, 527 xfs_off_t offset, 528 struct buffer_head *bh) 529 { 530 struct xfs_ioend *ioend; 531 struct bio *bio; 532 533 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset); 534 xfs_init_bio_from_bh(bio, bh); 535 536 ioend = container_of(bio, struct xfs_ioend, io_inline_bio); 537 INIT_LIST_HEAD(&ioend->io_list); 538 ioend->io_type = type; 539 ioend->io_inode = inode; 540 ioend->io_size = 0; 541 ioend->io_offset = offset; 542 INIT_WORK(&ioend->io_work, xfs_end_io); 543 ioend->io_append_trans = NULL; 544 ioend->io_bio = bio; 545 return ioend; 546 } 547 548 /* 549 * Allocate a new bio, and chain the old bio to the new one. 550 * 551 * Note that we have to do perform the chaining in this unintuitive order 552 * so that the bi_private linkage is set up in the right direction for the 553 * traversal in xfs_destroy_ioend(). 554 */ 555 static void 556 xfs_chain_bio( 557 struct xfs_ioend *ioend, 558 struct writeback_control *wbc, 559 struct buffer_head *bh) 560 { 561 struct bio *new; 562 563 new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); 564 xfs_init_bio_from_bh(new, bh); 565 566 bio_chain(ioend->io_bio, new); 567 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 568 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 569 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; 570 submit_bio(ioend->io_bio); 571 ioend->io_bio = new; 572 } 573 574 /* 575 * Test to see if we've been building up a completion structure for 576 * earlier buffers -- if so, we try to append to this ioend if we 577 * can, otherwise we finish off any current ioend and start another. 578 * Return the ioend we finished off so that the caller can submit it 579 * once it has finished processing the dirty page. 580 */ 581 STATIC void 582 xfs_add_to_ioend( 583 struct inode *inode, 584 struct buffer_head *bh, 585 xfs_off_t offset, 586 struct xfs_writepage_ctx *wpc, 587 struct writeback_control *wbc, 588 struct list_head *iolist) 589 { 590 if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || 591 bh->b_blocknr != wpc->last_block + 1 || 592 offset != wpc->ioend->io_offset + wpc->ioend->io_size) { 593 if (wpc->ioend) 594 list_add(&wpc->ioend->io_list, iolist); 595 wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh); 596 } 597 598 /* 599 * If the buffer doesn't fit into the bio we need to allocate a new 600 * one. This shouldn't happen more than once for a given buffer. 601 */ 602 while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) 603 xfs_chain_bio(wpc->ioend, wbc, bh); 604 605 wpc->ioend->io_size += bh->b_size; 606 wpc->last_block = bh->b_blocknr; 607 xfs_start_buffer_writeback(bh); 608 } 609 610 STATIC void 611 xfs_map_buffer( 612 struct inode *inode, 613 struct buffer_head *bh, 614 struct xfs_bmbt_irec *imap, 615 xfs_off_t offset) 616 { 617 sector_t bn; 618 struct xfs_mount *m = XFS_I(inode)->i_mount; 619 xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); 620 xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); 621 622 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 623 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 624 625 bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + 626 ((offset - iomap_offset) >> inode->i_blkbits); 627 628 ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); 629 630 bh->b_blocknr = bn; 631 set_buffer_mapped(bh); 632 } 633 634 STATIC void 635 xfs_map_at_offset( 636 struct inode *inode, 637 struct buffer_head *bh, 638 struct xfs_bmbt_irec *imap, 639 xfs_off_t offset) 640 { 641 ASSERT(imap->br_startblock != HOLESTARTBLOCK); 642 ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 643 644 xfs_map_buffer(inode, bh, imap, offset); 645 set_buffer_mapped(bh); 646 clear_buffer_delay(bh); 647 clear_buffer_unwritten(bh); 648 } 649 650 /* 651 * Test if a given page contains at least one buffer of a given @type. 652 * If @check_all_buffers is true, then we walk all the buffers in the page to 653 * try to find one of the type passed in. If it is not set, then the caller only 654 * needs to check the first buffer on the page for a match. 655 */ 656 STATIC bool 657 xfs_check_page_type( 658 struct page *page, 659 unsigned int type, 660 bool check_all_buffers) 661 { 662 struct buffer_head *bh; 663 struct buffer_head *head; 664 665 if (PageWriteback(page)) 666 return false; 667 if (!page->mapping) 668 return false; 669 if (!page_has_buffers(page)) 670 return false; 671 672 bh = head = page_buffers(page); 673 do { 674 if (buffer_unwritten(bh)) { 675 if (type == XFS_IO_UNWRITTEN) 676 return true; 677 } else if (buffer_delay(bh)) { 678 if (type == XFS_IO_DELALLOC) 679 return true; 680 } else if (buffer_dirty(bh) && buffer_mapped(bh)) { 681 if (type == XFS_IO_OVERWRITE) 682 return true; 683 } 684 685 /* If we are only checking the first buffer, we are done now. */ 686 if (!check_all_buffers) 687 break; 688 } while ((bh = bh->b_this_page) != head); 689 690 return false; 691 } 692 693 STATIC void 694 xfs_vm_invalidatepage( 695 struct page *page, 696 unsigned int offset, 697 unsigned int length) 698 { 699 trace_xfs_invalidatepage(page->mapping->host, page, offset, 700 length); 701 block_invalidatepage(page, offset, length); 702 } 703 704 /* 705 * If the page has delalloc buffers on it, we need to punch them out before we 706 * invalidate the page. If we don't, we leave a stale delalloc mapping on the 707 * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read 708 * is done on that same region - the delalloc extent is returned when none is 709 * supposed to be there. 710 * 711 * We prevent this by truncating away the delalloc regions on the page before 712 * invalidating it. Because they are delalloc, we can do this without needing a 713 * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this 714 * truncation without a transaction as there is no space left for block 715 * reservation (typically why we see a ENOSPC in writeback). 716 * 717 * This is not a performance critical path, so for now just do the punching a 718 * buffer head at a time. 719 */ 720 STATIC void 721 xfs_aops_discard_page( 722 struct page *page) 723 { 724 struct inode *inode = page->mapping->host; 725 struct xfs_inode *ip = XFS_I(inode); 726 struct buffer_head *bh, *head; 727 loff_t offset = page_offset(page); 728 729 if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true)) 730 goto out_invalidate; 731 732 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 733 goto out_invalidate; 734 735 xfs_alert(ip->i_mount, 736 "page discard on page %p, inode 0x%llx, offset %llu.", 737 page, ip->i_ino, offset); 738 739 xfs_ilock(ip, XFS_ILOCK_EXCL); 740 bh = head = page_buffers(page); 741 do { 742 int error; 743 xfs_fileoff_t start_fsb; 744 745 if (!buffer_delay(bh)) 746 goto next_buffer; 747 748 start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 749 error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); 750 if (error) { 751 /* something screwed, just bail */ 752 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 753 xfs_alert(ip->i_mount, 754 "page discard unable to remove delalloc mapping."); 755 } 756 break; 757 } 758 next_buffer: 759 offset += i_blocksize(inode); 760 761 } while ((bh = bh->b_this_page) != head); 762 763 xfs_iunlock(ip, XFS_ILOCK_EXCL); 764 out_invalidate: 765 xfs_vm_invalidatepage(page, 0, PAGE_SIZE); 766 return; 767 } 768 769 static int 770 xfs_map_cow( 771 struct xfs_writepage_ctx *wpc, 772 struct inode *inode, 773 loff_t offset, 774 unsigned int *new_type) 775 { 776 struct xfs_inode *ip = XFS_I(inode); 777 struct xfs_bmbt_irec imap; 778 bool is_cow = false; 779 int error; 780 781 /* 782 * If we already have a valid COW mapping keep using it. 783 */ 784 if (wpc->io_type == XFS_IO_COW) { 785 wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); 786 if (wpc->imap_valid) { 787 *new_type = XFS_IO_COW; 788 return 0; 789 } 790 } 791 792 /* 793 * Else we need to check if there is a COW mapping at this offset. 794 */ 795 xfs_ilock(ip, XFS_ILOCK_SHARED); 796 is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap); 797 xfs_iunlock(ip, XFS_ILOCK_SHARED); 798 799 if (!is_cow) 800 return 0; 801 802 /* 803 * And if the COW mapping has a delayed extent here we need to 804 * allocate real space for it now. 805 */ 806 if (isnullstartblock(imap.br_startblock)) { 807 error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, 808 &imap); 809 if (error) 810 return error; 811 } 812 813 wpc->io_type = *new_type = XFS_IO_COW; 814 wpc->imap_valid = true; 815 wpc->imap = imap; 816 return 0; 817 } 818 819 /* 820 * We implement an immediate ioend submission policy here to avoid needing to 821 * chain multiple ioends and hence nest mempool allocations which can violate 822 * forward progress guarantees we need to provide. The current ioend we are 823 * adding buffers to is cached on the writepage context, and if the new buffer 824 * does not append to the cached ioend it will create a new ioend and cache that 825 * instead. 826 * 827 * If a new ioend is created and cached, the old ioend is returned and queued 828 * locally for submission once the entire page is processed or an error has been 829 * detected. While ioends are submitted immediately after they are completed, 830 * batching optimisations are provided by higher level block plugging. 831 * 832 * At the end of a writeback pass, there will be a cached ioend remaining on the 833 * writepage context that the caller will need to submit. 834 */ 835 static int 836 xfs_writepage_map( 837 struct xfs_writepage_ctx *wpc, 838 struct writeback_control *wbc, 839 struct inode *inode, 840 struct page *page, 841 loff_t offset, 842 uint64_t end_offset) 843 { 844 LIST_HEAD(submit_list); 845 struct xfs_ioend *ioend, *next; 846 struct buffer_head *bh, *head; 847 ssize_t len = i_blocksize(inode); 848 int error = 0; 849 int count = 0; 850 int uptodate = 1; 851 unsigned int new_type; 852 853 bh = head = page_buffers(page); 854 offset = page_offset(page); 855 do { 856 if (offset >= end_offset) 857 break; 858 if (!buffer_uptodate(bh)) 859 uptodate = 0; 860 861 /* 862 * set_page_dirty dirties all buffers in a page, independent 863 * of their state. The dirty state however is entirely 864 * meaningless for holes (!mapped && uptodate), so skip 865 * buffers covering holes here. 866 */ 867 if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 868 wpc->imap_valid = false; 869 continue; 870 } 871 872 if (buffer_unwritten(bh)) 873 new_type = XFS_IO_UNWRITTEN; 874 else if (buffer_delay(bh)) 875 new_type = XFS_IO_DELALLOC; 876 else if (buffer_uptodate(bh)) 877 new_type = XFS_IO_OVERWRITE; 878 else { 879 if (PageUptodate(page)) 880 ASSERT(buffer_mapped(bh)); 881 /* 882 * This buffer is not uptodate and will not be 883 * written to disk. Ensure that we will put any 884 * subsequent writeable buffers into a new 885 * ioend. 886 */ 887 wpc->imap_valid = false; 888 continue; 889 } 890 891 if (xfs_is_reflink_inode(XFS_I(inode))) { 892 error = xfs_map_cow(wpc, inode, offset, &new_type); 893 if (error) 894 goto out; 895 } 896 897 if (wpc->io_type != new_type) { 898 wpc->io_type = new_type; 899 wpc->imap_valid = false; 900 } 901 902 if (wpc->imap_valid) 903 wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, 904 offset); 905 if (!wpc->imap_valid) { 906 error = xfs_map_blocks(inode, offset, &wpc->imap, 907 wpc->io_type); 908 if (error) 909 goto out; 910 wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, 911 offset); 912 } 913 if (wpc->imap_valid) { 914 lock_buffer(bh); 915 if (wpc->io_type != XFS_IO_OVERWRITE) 916 xfs_map_at_offset(inode, bh, &wpc->imap, offset); 917 xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list); 918 count++; 919 } 920 921 } while (offset += len, ((bh = bh->b_this_page) != head)); 922 923 if (uptodate && bh == head) 924 SetPageUptodate(page); 925 926 ASSERT(wpc->ioend || list_empty(&submit_list)); 927 928 out: 929 /* 930 * On error, we have to fail the ioend here because we have locked 931 * buffers in the ioend. If we don't do this, we'll deadlock 932 * invalidating the page as that tries to lock the buffers on the page. 933 * Also, because we may have set pages under writeback, we have to make 934 * sure we run IO completion to mark the error state of the IO 935 * appropriately, so we can't cancel the ioend directly here. That means 936 * we have to mark this page as under writeback if we included any 937 * buffers from it in the ioend chain so that completion treats it 938 * correctly. 939 * 940 * If we didn't include the page in the ioend, the on error we can 941 * simply discard and unlock it as there are no other users of the page 942 * or it's buffers right now. The caller will still need to trigger 943 * submission of outstanding ioends on the writepage context so they are 944 * treated correctly on error. 945 */ 946 if (count) { 947 xfs_start_page_writeback(page, !error); 948 949 /* 950 * Preserve the original error if there was one, otherwise catch 951 * submission errors here and propagate into subsequent ioend 952 * submissions. 953 */ 954 list_for_each_entry_safe(ioend, next, &submit_list, io_list) { 955 int error2; 956 957 list_del_init(&ioend->io_list); 958 error2 = xfs_submit_ioend(wbc, ioend, error); 959 if (error2 && !error) 960 error = error2; 961 } 962 } else if (error) { 963 xfs_aops_discard_page(page); 964 ClearPageUptodate(page); 965 unlock_page(page); 966 } else { 967 /* 968 * We can end up here with no error and nothing to write if we 969 * race with a partial page truncate on a sub-page block sized 970 * filesystem. In that case we need to mark the page clean. 971 */ 972 xfs_start_page_writeback(page, 1); 973 end_page_writeback(page); 974 } 975 976 mapping_set_error(page->mapping, error); 977 return error; 978 } 979 980 /* 981 * Write out a dirty page. 982 * 983 * For delalloc space on the page we need to allocate space and flush it. 984 * For unwritten space on the page we need to start the conversion to 985 * regular allocated space. 986 * For any other dirty buffer heads on the page we should flush them. 987 */ 988 STATIC int 989 xfs_do_writepage( 990 struct page *page, 991 struct writeback_control *wbc, 992 void *data) 993 { 994 struct xfs_writepage_ctx *wpc = data; 995 struct inode *inode = page->mapping->host; 996 loff_t offset; 997 uint64_t end_offset; 998 pgoff_t end_index; 999 1000 trace_xfs_writepage(inode, page, 0, 0); 1001 1002 ASSERT(page_has_buffers(page)); 1003 1004 /* 1005 * Refuse to write the page out if we are called from reclaim context. 1006 * 1007 * This avoids stack overflows when called from deeply used stacks in 1008 * random callers for direct reclaim or memcg reclaim. We explicitly 1009 * allow reclaim from kswapd as the stack usage there is relatively low. 1010 * 1011 * This should never happen except in the case of a VM regression so 1012 * warn about it. 1013 */ 1014 if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 1015 PF_MEMALLOC)) 1016 goto redirty; 1017 1018 /* 1019 * Given that we do not allow direct reclaim to call us, we should 1020 * never be called while in a filesystem transaction. 1021 */ 1022 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) 1023 goto redirty; 1024 1025 /* 1026 * Is this page beyond the end of the file? 1027 * 1028 * The page index is less than the end_index, adjust the end_offset 1029 * to the highest offset that this page should represent. 1030 * ----------------------------------------------------- 1031 * | file mapping | <EOF> | 1032 * ----------------------------------------------------- 1033 * | Page ... | Page N-2 | Page N-1 | Page N | | 1034 * ^--------------------------------^----------|-------- 1035 * | desired writeback range | see else | 1036 * ---------------------------------^------------------| 1037 */ 1038 offset = i_size_read(inode); 1039 end_index = offset >> PAGE_SHIFT; 1040 if (page->index < end_index) 1041 end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT; 1042 else { 1043 /* 1044 * Check whether the page to write out is beyond or straddles 1045 * i_size or not. 1046 * ------------------------------------------------------- 1047 * | file mapping | <EOF> | 1048 * ------------------------------------------------------- 1049 * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | 1050 * ^--------------------------------^-----------|--------- 1051 * | | Straddles | 1052 * ---------------------------------^-----------|--------| 1053 */ 1054 unsigned offset_into_page = offset & (PAGE_SIZE - 1); 1055 1056 /* 1057 * Skip the page if it is fully outside i_size, e.g. due to a 1058 * truncate operation that is in progress. We must redirty the 1059 * page so that reclaim stops reclaiming it. Otherwise 1060 * xfs_vm_releasepage() is called on it and gets confused. 1061 * 1062 * Note that the end_index is unsigned long, it would overflow 1063 * if the given offset is greater than 16TB on 32-bit system 1064 * and if we do check the page is fully outside i_size or not 1065 * via "if (page->index >= end_index + 1)" as "end_index + 1" 1066 * will be evaluated to 0. Hence this page will be redirtied 1067 * and be written out repeatedly which would result in an 1068 * infinite loop, the user program that perform this operation 1069 * will hang. Instead, we can verify this situation by checking 1070 * if the page to write is totally beyond the i_size or if it's 1071 * offset is just equal to the EOF. 1072 */ 1073 if (page->index > end_index || 1074 (page->index == end_index && offset_into_page == 0)) 1075 goto redirty; 1076 1077 /* 1078 * The page straddles i_size. It must be zeroed out on each 1079 * and every writepage invocation because it may be mmapped. 1080 * "A file is mapped in multiples of the page size. For a file 1081 * that is not a multiple of the page size, the remaining 1082 * memory is zeroed when mapped, and writes to that region are 1083 * not written out to the file." 1084 */ 1085 zero_user_segment(page, offset_into_page, PAGE_SIZE); 1086 1087 /* Adjust the end_offset to the end of file */ 1088 end_offset = offset; 1089 } 1090 1091 return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); 1092 1093 redirty: 1094 redirty_page_for_writepage(wbc, page); 1095 unlock_page(page); 1096 return 0; 1097 } 1098 1099 STATIC int 1100 xfs_vm_writepage( 1101 struct page *page, 1102 struct writeback_control *wbc) 1103 { 1104 struct xfs_writepage_ctx wpc = { 1105 .io_type = XFS_IO_INVALID, 1106 }; 1107 int ret; 1108 1109 ret = xfs_do_writepage(page, wbc, &wpc); 1110 if (wpc.ioend) 1111 ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1112 return ret; 1113 } 1114 1115 STATIC int 1116 xfs_vm_writepages( 1117 struct address_space *mapping, 1118 struct writeback_control *wbc) 1119 { 1120 struct xfs_writepage_ctx wpc = { 1121 .io_type = XFS_IO_INVALID, 1122 }; 1123 int ret; 1124 1125 xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1126 if (dax_mapping(mapping)) 1127 return dax_writeback_mapping_range(mapping, 1128 xfs_find_bdev_for_inode(mapping->host), wbc); 1129 1130 ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); 1131 if (wpc.ioend) 1132 ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1133 return ret; 1134 } 1135 1136 /* 1137 * Called to move a page into cleanable state - and from there 1138 * to be released. The page should already be clean. We always 1139 * have buffer heads in this call. 1140 * 1141 * Returns 1 if the page is ok to release, 0 otherwise. 1142 */ 1143 STATIC int 1144 xfs_vm_releasepage( 1145 struct page *page, 1146 gfp_t gfp_mask) 1147 { 1148 int delalloc, unwritten; 1149 1150 trace_xfs_releasepage(page->mapping->host, page, 0, 0); 1151 1152 /* 1153 * mm accommodates an old ext3 case where clean pages might not have had 1154 * the dirty bit cleared. Thus, it can send actual dirty pages to 1155 * ->releasepage() via shrink_active_list(). Conversely, 1156 * block_invalidatepage() can send pages that are still marked dirty 1157 * but otherwise have invalidated buffers. 1158 * 1159 * We want to release the latter to avoid unnecessary buildup of the 1160 * LRU, skip the former and warn if we've left any lingering 1161 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc 1162 * or unwritten buffers and warn if the page is not dirty. Otherwise 1163 * try to release the buffers. 1164 */ 1165 xfs_count_page_state(page, &delalloc, &unwritten); 1166 1167 if (delalloc) { 1168 WARN_ON_ONCE(!PageDirty(page)); 1169 return 0; 1170 } 1171 if (unwritten) { 1172 WARN_ON_ONCE(!PageDirty(page)); 1173 return 0; 1174 } 1175 1176 return try_to_free_buffers(page); 1177 } 1178 1179 /* 1180 * If this is O_DIRECT or the mpage code calling tell them how large the mapping 1181 * is, so that we can avoid repeated get_blocks calls. 1182 * 1183 * If the mapping spans EOF, then we have to break the mapping up as the mapping 1184 * for blocks beyond EOF must be marked new so that sub block regions can be 1185 * correctly zeroed. We can't do this for mappings within EOF unless the mapping 1186 * was just allocated or is unwritten, otherwise the callers would overwrite 1187 * existing data with zeros. Hence we have to split the mapping into a range up 1188 * to and including EOF, and a second mapping for beyond EOF. 1189 */ 1190 static void 1191 xfs_map_trim_size( 1192 struct inode *inode, 1193 sector_t iblock, 1194 struct buffer_head *bh_result, 1195 struct xfs_bmbt_irec *imap, 1196 xfs_off_t offset, 1197 ssize_t size) 1198 { 1199 xfs_off_t mapping_size; 1200 1201 mapping_size = imap->br_startoff + imap->br_blockcount - iblock; 1202 mapping_size <<= inode->i_blkbits; 1203 1204 ASSERT(mapping_size > 0); 1205 if (mapping_size > size) 1206 mapping_size = size; 1207 if (offset < i_size_read(inode) && 1208 offset + mapping_size >= i_size_read(inode)) { 1209 /* limit mapping to block that spans EOF */ 1210 mapping_size = roundup_64(i_size_read(inode) - offset, 1211 i_blocksize(inode)); 1212 } 1213 if (mapping_size > LONG_MAX) 1214 mapping_size = LONG_MAX; 1215 1216 bh_result->b_size = mapping_size; 1217 } 1218 1219 static int 1220 xfs_get_blocks( 1221 struct inode *inode, 1222 sector_t iblock, 1223 struct buffer_head *bh_result, 1224 int create) 1225 { 1226 struct xfs_inode *ip = XFS_I(inode); 1227 struct xfs_mount *mp = ip->i_mount; 1228 xfs_fileoff_t offset_fsb, end_fsb; 1229 int error = 0; 1230 int lockmode = 0; 1231 struct xfs_bmbt_irec imap; 1232 int nimaps = 1; 1233 xfs_off_t offset; 1234 ssize_t size; 1235 1236 BUG_ON(create); 1237 1238 if (XFS_FORCED_SHUTDOWN(mp)) 1239 return -EIO; 1240 1241 offset = (xfs_off_t)iblock << inode->i_blkbits; 1242 ASSERT(bh_result->b_size >= i_blocksize(inode)); 1243 size = bh_result->b_size; 1244 1245 if (offset >= i_size_read(inode)) 1246 return 0; 1247 1248 /* 1249 * Direct I/O is usually done on preallocated files, so try getting 1250 * a block mapping without an exclusive lock first. 1251 */ 1252 lockmode = xfs_ilock_data_map_shared(ip); 1253 1254 ASSERT(offset <= mp->m_super->s_maxbytes); 1255 if (offset + size > mp->m_super->s_maxbytes) 1256 size = mp->m_super->s_maxbytes - offset; 1257 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1258 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1259 1260 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 1261 &imap, &nimaps, XFS_BMAPI_ENTIRE); 1262 if (error) 1263 goto out_unlock; 1264 1265 if (nimaps) { 1266 trace_xfs_get_blocks_found(ip, offset, size, 1267 imap.br_state == XFS_EXT_UNWRITTEN ? 1268 XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap); 1269 xfs_iunlock(ip, lockmode); 1270 } else { 1271 trace_xfs_get_blocks_notfound(ip, offset, size); 1272 goto out_unlock; 1273 } 1274 1275 /* trim mapping down to size requested */ 1276 xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); 1277 1278 /* 1279 * For unwritten extents do not report a disk address in the buffered 1280 * read case (treat as if we're reading into a hole). 1281 */ 1282 if (xfs_bmap_is_real_extent(&imap)) 1283 xfs_map_buffer(inode, bh_result, &imap, offset); 1284 1285 /* 1286 * If this is a realtime file, data may be on a different device. 1287 * to that pointed to from the buffer_head b_bdev currently. 1288 */ 1289 bh_result->b_bdev = xfs_find_bdev_for_inode(inode); 1290 return 0; 1291 1292 out_unlock: 1293 xfs_iunlock(ip, lockmode); 1294 return error; 1295 } 1296 1297 STATIC ssize_t 1298 xfs_vm_direct_IO( 1299 struct kiocb *iocb, 1300 struct iov_iter *iter) 1301 { 1302 /* 1303 * We just need the method present so that open/fcntl allow direct I/O. 1304 */ 1305 return -EINVAL; 1306 } 1307 1308 STATIC sector_t 1309 xfs_vm_bmap( 1310 struct address_space *mapping, 1311 sector_t block) 1312 { 1313 struct inode *inode = (struct inode *)mapping->host; 1314 struct xfs_inode *ip = XFS_I(inode); 1315 1316 trace_xfs_vm_bmap(XFS_I(inode)); 1317 1318 /* 1319 * The swap code (ab-)uses ->bmap to get a block mapping and then 1320 * bypasseѕ the file system for actual I/O. We really can't allow 1321 * that on reflinks inodes, so we have to skip out here. And yes, 1322 * 0 is the magic code for a bmap error. 1323 * 1324 * Since we don't pass back blockdev info, we can't return bmap 1325 * information for rt files either. 1326 */ 1327 if (xfs_is_reflink_inode(ip) || XFS_IS_REALTIME_INODE(ip)) 1328 return 0; 1329 1330 filemap_write_and_wait(mapping); 1331 return generic_block_bmap(mapping, block, xfs_get_blocks); 1332 } 1333 1334 STATIC int 1335 xfs_vm_readpage( 1336 struct file *unused, 1337 struct page *page) 1338 { 1339 trace_xfs_vm_readpage(page->mapping->host, 1); 1340 return mpage_readpage(page, xfs_get_blocks); 1341 } 1342 1343 STATIC int 1344 xfs_vm_readpages( 1345 struct file *unused, 1346 struct address_space *mapping, 1347 struct list_head *pages, 1348 unsigned nr_pages) 1349 { 1350 trace_xfs_vm_readpages(mapping->host, nr_pages); 1351 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1352 } 1353 1354 /* 1355 * This is basically a copy of __set_page_dirty_buffers() with one 1356 * small tweak: buffers beyond EOF do not get marked dirty. If we mark them 1357 * dirty, we'll never be able to clean them because we don't write buffers 1358 * beyond EOF, and that means we can't invalidate pages that span EOF 1359 * that have been marked dirty. Further, the dirty state can leak into 1360 * the file interior if the file is extended, resulting in all sorts of 1361 * bad things happening as the state does not match the underlying data. 1362 * 1363 * XXX: this really indicates that bufferheads in XFS need to die. Warts like 1364 * this only exist because of bufferheads and how the generic code manages them. 1365 */ 1366 STATIC int 1367 xfs_vm_set_page_dirty( 1368 struct page *page) 1369 { 1370 struct address_space *mapping = page->mapping; 1371 struct inode *inode = mapping->host; 1372 loff_t end_offset; 1373 loff_t offset; 1374 int newly_dirty; 1375 1376 if (unlikely(!mapping)) 1377 return !TestSetPageDirty(page); 1378 1379 end_offset = i_size_read(inode); 1380 offset = page_offset(page); 1381 1382 spin_lock(&mapping->private_lock); 1383 if (page_has_buffers(page)) { 1384 struct buffer_head *head = page_buffers(page); 1385 struct buffer_head *bh = head; 1386 1387 do { 1388 if (offset < end_offset) 1389 set_buffer_dirty(bh); 1390 bh = bh->b_this_page; 1391 offset += i_blocksize(inode); 1392 } while (bh != head); 1393 } 1394 /* 1395 * Lock out page->mem_cgroup migration to keep PageDirty 1396 * synchronized with per-memcg dirty page counters. 1397 */ 1398 lock_page_memcg(page); 1399 newly_dirty = !TestSetPageDirty(page); 1400 spin_unlock(&mapping->private_lock); 1401 1402 if (newly_dirty) { 1403 /* sigh - __set_page_dirty() is static, so copy it here, too */ 1404 unsigned long flags; 1405 1406 spin_lock_irqsave(&mapping->tree_lock, flags); 1407 if (page->mapping) { /* Race with truncate? */ 1408 WARN_ON_ONCE(!PageUptodate(page)); 1409 account_page_dirtied(page, mapping); 1410 radix_tree_tag_set(&mapping->page_tree, 1411 page_index(page), PAGECACHE_TAG_DIRTY); 1412 } 1413 spin_unlock_irqrestore(&mapping->tree_lock, flags); 1414 } 1415 unlock_page_memcg(page); 1416 if (newly_dirty) 1417 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 1418 return newly_dirty; 1419 } 1420 1421 const struct address_space_operations xfs_address_space_operations = { 1422 .readpage = xfs_vm_readpage, 1423 .readpages = xfs_vm_readpages, 1424 .writepage = xfs_vm_writepage, 1425 .writepages = xfs_vm_writepages, 1426 .set_page_dirty = xfs_vm_set_page_dirty, 1427 .releasepage = xfs_vm_releasepage, 1428 .invalidatepage = xfs_vm_invalidatepage, 1429 .bmap = xfs_vm_bmap, 1430 .direct_IO = xfs_vm_direct_IO, 1431 .migratepage = buffer_migrate_page, 1432 .is_partially_uptodate = block_is_partially_uptodate, 1433 .error_remove_page = generic_error_remove_page, 1434 }; 1435