1 /* handling of writes to regular files and writing back to the server 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/backing-dev.h> 12 #include <linux/slab.h> 13 #include <linux/fs.h> 14 #include <linux/pagemap.h> 15 #include <linux/writeback.h> 16 #include <linux/pagevec.h> 17 #include "internal.h" 18 19 static int afs_write_back_from_locked_page(struct afs_writeback *wb, 20 struct page *page); 21 22 /* 23 * mark a page as having been made dirty and thus needing writeback 24 */ 25 int afs_set_page_dirty(struct page *page) 26 { 27 _enter(""); 28 return __set_page_dirty_nobuffers(page); 29 } 30 31 /* 32 * unlink a writeback record because its usage has reached zero 33 * - must be called with the wb->vnode->writeback_lock held 34 */ 35 static void afs_unlink_writeback(struct afs_writeback *wb) 36 { 37 struct afs_writeback *front; 38 struct afs_vnode *vnode = wb->vnode; 39 40 list_del_init(&wb->link); 41 if (!list_empty(&vnode->writebacks)) { 42 /* if an fsync rises to the front of the queue then wake it 43 * up */ 44 front = list_entry(vnode->writebacks.next, 45 struct afs_writeback, link); 46 if (front->state == AFS_WBACK_SYNCING) { 47 _debug("wake up sync"); 48 front->state = AFS_WBACK_COMPLETE; 49 wake_up(&front->waitq); 50 } 51 } 52 } 53 54 /* 55 * free a writeback record 56 */ 57 static void afs_free_writeback(struct afs_writeback *wb) 58 { 59 _enter(""); 60 key_put(wb->key); 61 kfree(wb); 62 } 63 64 /* 65 * dispose of a reference to a writeback record 66 */ 67 void afs_put_writeback(struct afs_writeback *wb) 68 { 69 struct afs_vnode *vnode = wb->vnode; 70 71 _enter("{%d}", wb->usage); 72 73 spin_lock(&vnode->writeback_lock); 74 if (--wb->usage == 0) 75 afs_unlink_writeback(wb); 76 else 77 wb = NULL; 78 spin_unlock(&vnode->writeback_lock); 79 if (wb) 80 afs_free_writeback(wb); 81 } 82 83 /* 84 * partly or wholly fill a page that's under preparation for writing 85 */ 86 static int afs_fill_page(struct afs_vnode *vnode, struct key *key, 87 loff_t pos, unsigned int len, struct page *page) 88 { 89 struct afs_read *req; 90 int ret; 91 92 _enter(",,%llu", (unsigned long long)pos); 93 94 req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), 95 GFP_KERNEL); 96 if (!req) 97 return -ENOMEM; 98 99 atomic_set(&req->usage, 1); 100 req->pos = pos; 101 req->len = len; 102 req->nr_pages = 1; 103 req->pages[0] = page; 104 get_page(page); 105 106 ret = afs_vnode_fetch_data(vnode, key, req); 107 afs_put_read(req); 108 if (ret < 0) { 109 if (ret == -ENOENT) { 110 _debug("got NOENT from server" 111 " - marking file deleted and stale"); 112 set_bit(AFS_VNODE_DELETED, &vnode->flags); 113 ret = -ESTALE; 114 } 115 } 116 117 _leave(" = %d", ret); 118 return ret; 119 } 120 121 /* 122 * prepare to perform part of a write to a page 123 */ 124 int afs_write_begin(struct file *file, struct address_space *mapping, 125 loff_t pos, unsigned len, unsigned flags, 126 struct page **pagep, void **fsdata) 127 { 128 struct afs_writeback *candidate, *wb; 129 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 130 struct page *page; 131 struct key *key = file->private_data; 132 unsigned from = pos & (PAGE_SIZE - 1); 133 unsigned to = from + len; 134 pgoff_t index = pos >> PAGE_SHIFT; 135 int ret; 136 137 _enter("{%x:%u},{%lx},%u,%u", 138 vnode->fid.vid, vnode->fid.vnode, index, from, to); 139 140 candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); 141 if (!candidate) 142 return -ENOMEM; 143 candidate->vnode = vnode; 144 candidate->first = candidate->last = index; 145 candidate->offset_first = from; 146 candidate->to_last = to; 147 INIT_LIST_HEAD(&candidate->link); 148 candidate->usage = 1; 149 candidate->state = AFS_WBACK_PENDING; 150 init_waitqueue_head(&candidate->waitq); 151 152 page = grab_cache_page_write_begin(mapping, index, flags); 153 if (!page) { 154 kfree(candidate); 155 return -ENOMEM; 156 } 157 *pagep = page; 158 /* page won't leak in error case: it eventually gets cleaned off LRU */ 159 160 if (!PageUptodate(page) && len != PAGE_SIZE) { 161 ret = afs_fill_page(vnode, key, pos & PAGE_MASK, PAGE_SIZE, page); 162 if (ret < 0) { 163 kfree(candidate); 164 _leave(" = %d [prep]", ret); 165 return ret; 166 } 167 SetPageUptodate(page); 168 } 169 170 try_again: 171 spin_lock(&vnode->writeback_lock); 172 173 /* see if this page is already pending a writeback under a suitable key 174 * - if so we can just join onto that one */ 175 wb = (struct afs_writeback *) page_private(page); 176 if (wb) { 177 if (wb->key == key && wb->state == AFS_WBACK_PENDING) 178 goto subsume_in_current_wb; 179 goto flush_conflicting_wb; 180 } 181 182 if (index > 0) { 183 /* see if we can find an already pending writeback that we can 184 * append this page to */ 185 list_for_each_entry(wb, &vnode->writebacks, link) { 186 if (wb->last == index - 1 && wb->key == key && 187 wb->state == AFS_WBACK_PENDING) 188 goto append_to_previous_wb; 189 } 190 } 191 192 list_add_tail(&candidate->link, &vnode->writebacks); 193 candidate->key = key_get(key); 194 spin_unlock(&vnode->writeback_lock); 195 SetPagePrivate(page); 196 set_page_private(page, (unsigned long) candidate); 197 _leave(" = 0 [new]"); 198 return 0; 199 200 subsume_in_current_wb: 201 _debug("subsume"); 202 ASSERTRANGE(wb->first, <=, index, <=, wb->last); 203 if (index == wb->first && from < wb->offset_first) 204 wb->offset_first = from; 205 if (index == wb->last && to > wb->to_last) 206 wb->to_last = to; 207 spin_unlock(&vnode->writeback_lock); 208 kfree(candidate); 209 _leave(" = 0 [sub]"); 210 return 0; 211 212 append_to_previous_wb: 213 _debug("append into %lx-%lx", wb->first, wb->last); 214 wb->usage++; 215 wb->last++; 216 wb->to_last = to; 217 spin_unlock(&vnode->writeback_lock); 218 SetPagePrivate(page); 219 set_page_private(page, (unsigned long) wb); 220 kfree(candidate); 221 _leave(" = 0 [app]"); 222 return 0; 223 224 /* the page is currently bound to another context, so if it's dirty we 225 * need to flush it before we can use the new context */ 226 flush_conflicting_wb: 227 _debug("flush conflict"); 228 if (wb->state == AFS_WBACK_PENDING) 229 wb->state = AFS_WBACK_CONFLICTING; 230 spin_unlock(&vnode->writeback_lock); 231 if (PageDirty(page)) { 232 ret = afs_write_back_from_locked_page(wb, page); 233 if (ret < 0) { 234 afs_put_writeback(candidate); 235 _leave(" = %d", ret); 236 return ret; 237 } 238 } 239 240 /* the page holds a ref on the writeback record */ 241 afs_put_writeback(wb); 242 set_page_private(page, 0); 243 ClearPagePrivate(page); 244 goto try_again; 245 } 246 247 /* 248 * finalise part of a write to a page 249 */ 250 int afs_write_end(struct file *file, struct address_space *mapping, 251 loff_t pos, unsigned len, unsigned copied, 252 struct page *page, void *fsdata) 253 { 254 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 255 struct key *key = file->private_data; 256 loff_t i_size, maybe_i_size; 257 int ret; 258 259 _enter("{%x:%u},{%lx}", 260 vnode->fid.vid, vnode->fid.vnode, page->index); 261 262 maybe_i_size = pos + copied; 263 264 i_size = i_size_read(&vnode->vfs_inode); 265 if (maybe_i_size > i_size) { 266 spin_lock(&vnode->writeback_lock); 267 i_size = i_size_read(&vnode->vfs_inode); 268 if (maybe_i_size > i_size) 269 i_size_write(&vnode->vfs_inode, maybe_i_size); 270 spin_unlock(&vnode->writeback_lock); 271 } 272 273 if (!PageUptodate(page)) { 274 if (copied < len) { 275 /* Try and load any missing data from the server. The 276 * unmarshalling routine will take care of clearing any 277 * bits that are beyond the EOF. 278 */ 279 ret = afs_fill_page(vnode, key, pos + copied, 280 len - copied, page); 281 if (ret < 0) 282 return ret; 283 } 284 SetPageUptodate(page); 285 } 286 287 set_page_dirty(page); 288 if (PageDirty(page)) 289 _debug("dirtied"); 290 unlock_page(page); 291 put_page(page); 292 293 return copied; 294 } 295 296 /* 297 * kill all the pages in the given range 298 */ 299 static void afs_kill_pages(struct afs_vnode *vnode, bool error, 300 pgoff_t first, pgoff_t last) 301 { 302 struct pagevec pv; 303 unsigned count, loop; 304 305 _enter("{%x:%u},%lx-%lx", 306 vnode->fid.vid, vnode->fid.vnode, first, last); 307 308 pagevec_init(&pv, 0); 309 310 do { 311 _debug("kill %lx-%lx", first, last); 312 313 count = last - first + 1; 314 if (count > PAGEVEC_SIZE) 315 count = PAGEVEC_SIZE; 316 pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, 317 first, count, pv.pages); 318 ASSERTCMP(pv.nr, ==, count); 319 320 for (loop = 0; loop < count; loop++) { 321 ClearPageUptodate(pv.pages[loop]); 322 if (error) 323 SetPageError(pv.pages[loop]); 324 end_page_writeback(pv.pages[loop]); 325 } 326 327 __pagevec_release(&pv); 328 } while (first < last); 329 330 _leave(""); 331 } 332 333 /* 334 * synchronously write back the locked page and any subsequent non-locked dirty 335 * pages also covered by the same writeback record 336 */ 337 static int afs_write_back_from_locked_page(struct afs_writeback *wb, 338 struct page *primary_page) 339 { 340 struct page *pages[8], *page; 341 unsigned long count; 342 unsigned n, offset, to; 343 pgoff_t start, first, last; 344 int loop, ret; 345 346 _enter(",%lx", primary_page->index); 347 348 count = 1; 349 if (!clear_page_dirty_for_io(primary_page)) 350 BUG(); 351 if (test_set_page_writeback(primary_page)) 352 BUG(); 353 354 /* find all consecutive lockable dirty pages, stopping when we find a 355 * page that is not immediately lockable, is not dirty or is missing, 356 * or we reach the end of the range */ 357 start = primary_page->index; 358 if (start >= wb->last) 359 goto no_more; 360 start++; 361 do { 362 _debug("more %lx [%lx]", start, count); 363 n = wb->last - start + 1; 364 if (n > ARRAY_SIZE(pages)) 365 n = ARRAY_SIZE(pages); 366 n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, 367 start, n, pages); 368 _debug("fgpc %u", n); 369 if (n == 0) 370 goto no_more; 371 if (pages[0]->index != start) { 372 do { 373 put_page(pages[--n]); 374 } while (n > 0); 375 goto no_more; 376 } 377 378 for (loop = 0; loop < n; loop++) { 379 page = pages[loop]; 380 if (page->index > wb->last) 381 break; 382 if (!trylock_page(page)) 383 break; 384 if (!PageDirty(page) || 385 page_private(page) != (unsigned long) wb) { 386 unlock_page(page); 387 break; 388 } 389 if (!clear_page_dirty_for_io(page)) 390 BUG(); 391 if (test_set_page_writeback(page)) 392 BUG(); 393 unlock_page(page); 394 put_page(page); 395 } 396 count += loop; 397 if (loop < n) { 398 for (; loop < n; loop++) 399 put_page(pages[loop]); 400 goto no_more; 401 } 402 403 start += loop; 404 } while (start <= wb->last && count < 65536); 405 406 no_more: 407 /* we now have a contiguous set of dirty pages, each with writeback set 408 * and the dirty mark cleared; the first page is locked and must remain 409 * so, all the rest are unlocked */ 410 first = primary_page->index; 411 last = first + count - 1; 412 413 offset = (first == wb->first) ? wb->offset_first : 0; 414 to = (last == wb->last) ? wb->to_last : PAGE_SIZE; 415 416 _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); 417 418 ret = afs_vnode_store_data(wb, first, last, offset, to); 419 if (ret < 0) { 420 switch (ret) { 421 case -EDQUOT: 422 case -ENOSPC: 423 mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC); 424 break; 425 case -EROFS: 426 case -EIO: 427 case -EREMOTEIO: 428 case -EFBIG: 429 case -ENOENT: 430 case -ENOMEDIUM: 431 case -ENXIO: 432 afs_kill_pages(wb->vnode, true, first, last); 433 mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO); 434 break; 435 case -EACCES: 436 case -EPERM: 437 case -ENOKEY: 438 case -EKEYEXPIRED: 439 case -EKEYREJECTED: 440 case -EKEYREVOKED: 441 afs_kill_pages(wb->vnode, false, first, last); 442 break; 443 default: 444 break; 445 } 446 } else { 447 ret = count; 448 } 449 450 _leave(" = %d", ret); 451 return ret; 452 } 453 454 /* 455 * write a page back to the server 456 * - the caller locked the page for us 457 */ 458 int afs_writepage(struct page *page, struct writeback_control *wbc) 459 { 460 struct afs_writeback *wb; 461 int ret; 462 463 _enter("{%lx},", page->index); 464 465 wb = (struct afs_writeback *) page_private(page); 466 ASSERT(wb != NULL); 467 468 ret = afs_write_back_from_locked_page(wb, page); 469 unlock_page(page); 470 if (ret < 0) { 471 _leave(" = %d", ret); 472 return 0; 473 } 474 475 wbc->nr_to_write -= ret; 476 477 _leave(" = 0"); 478 return 0; 479 } 480 481 /* 482 * write a region of pages back to the server 483 */ 484 static int afs_writepages_region(struct address_space *mapping, 485 struct writeback_control *wbc, 486 pgoff_t index, pgoff_t end, pgoff_t *_next) 487 { 488 struct afs_writeback *wb; 489 struct page *page; 490 int ret, n; 491 492 _enter(",,%lx,%lx,", index, end); 493 494 do { 495 n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY, 496 1, &page); 497 if (!n) 498 break; 499 500 _debug("wback %lx", page->index); 501 502 if (page->index > end) { 503 *_next = index; 504 put_page(page); 505 _leave(" = 0 [%lx]", *_next); 506 return 0; 507 } 508 509 /* at this point we hold neither mapping->tree_lock nor lock on 510 * the page itself: the page may be truncated or invalidated 511 * (changing page->mapping to NULL), or even swizzled back from 512 * swapper_space to tmpfs file mapping 513 */ 514 lock_page(page); 515 516 if (page->mapping != mapping) { 517 unlock_page(page); 518 put_page(page); 519 continue; 520 } 521 522 if (wbc->sync_mode != WB_SYNC_NONE) 523 wait_on_page_writeback(page); 524 525 if (PageWriteback(page) || !PageDirty(page)) { 526 unlock_page(page); 527 put_page(page); 528 continue; 529 } 530 531 wb = (struct afs_writeback *) page_private(page); 532 ASSERT(wb != NULL); 533 534 spin_lock(&wb->vnode->writeback_lock); 535 wb->state = AFS_WBACK_WRITING; 536 spin_unlock(&wb->vnode->writeback_lock); 537 538 ret = afs_write_back_from_locked_page(wb, page); 539 unlock_page(page); 540 put_page(page); 541 if (ret < 0) { 542 _leave(" = %d", ret); 543 return ret; 544 } 545 546 wbc->nr_to_write -= ret; 547 548 cond_resched(); 549 } while (index < end && wbc->nr_to_write > 0); 550 551 *_next = index; 552 _leave(" = 0 [%lx]", *_next); 553 return 0; 554 } 555 556 /* 557 * write some of the pending data back to the server 558 */ 559 int afs_writepages(struct address_space *mapping, 560 struct writeback_control *wbc) 561 { 562 pgoff_t start, end, next; 563 int ret; 564 565 _enter(""); 566 567 if (wbc->range_cyclic) { 568 start = mapping->writeback_index; 569 end = -1; 570 ret = afs_writepages_region(mapping, wbc, start, end, &next); 571 if (start > 0 && wbc->nr_to_write > 0 && ret == 0) 572 ret = afs_writepages_region(mapping, wbc, 0, start, 573 &next); 574 mapping->writeback_index = next; 575 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 576 end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT); 577 ret = afs_writepages_region(mapping, wbc, 0, end, &next); 578 if (wbc->nr_to_write > 0) 579 mapping->writeback_index = next; 580 } else { 581 start = wbc->range_start >> PAGE_SHIFT; 582 end = wbc->range_end >> PAGE_SHIFT; 583 ret = afs_writepages_region(mapping, wbc, start, end, &next); 584 } 585 586 _leave(" = %d", ret); 587 return ret; 588 } 589 590 /* 591 * completion of write to server 592 */ 593 void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) 594 { 595 struct afs_writeback *wb = call->wb; 596 struct pagevec pv; 597 unsigned count, loop; 598 pgoff_t first = call->first, last = call->last; 599 bool free_wb; 600 601 _enter("{%x:%u},{%lx-%lx}", 602 vnode->fid.vid, vnode->fid.vnode, first, last); 603 604 ASSERT(wb != NULL); 605 606 pagevec_init(&pv, 0); 607 608 do { 609 _debug("done %lx-%lx", first, last); 610 611 count = last - first + 1; 612 if (count > PAGEVEC_SIZE) 613 count = PAGEVEC_SIZE; 614 pv.nr = find_get_pages_contig(call->mapping, first, count, 615 pv.pages); 616 ASSERTCMP(pv.nr, ==, count); 617 618 spin_lock(&vnode->writeback_lock); 619 for (loop = 0; loop < count; loop++) { 620 struct page *page = pv.pages[loop]; 621 end_page_writeback(page); 622 if (page_private(page) == (unsigned long) wb) { 623 set_page_private(page, 0); 624 ClearPagePrivate(page); 625 wb->usage--; 626 } 627 } 628 free_wb = false; 629 if (wb->usage == 0) { 630 afs_unlink_writeback(wb); 631 free_wb = true; 632 } 633 spin_unlock(&vnode->writeback_lock); 634 first += count; 635 if (free_wb) { 636 afs_free_writeback(wb); 637 wb = NULL; 638 } 639 640 __pagevec_release(&pv); 641 } while (first <= last); 642 643 _leave(""); 644 } 645 646 /* 647 * write to an AFS file 648 */ 649 ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) 650 { 651 struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); 652 ssize_t result; 653 size_t count = iov_iter_count(from); 654 655 _enter("{%x.%u},{%zu},", 656 vnode->fid.vid, vnode->fid.vnode, count); 657 658 if (IS_SWAPFILE(&vnode->vfs_inode)) { 659 printk(KERN_INFO 660 "AFS: Attempt to write to active swap file!\n"); 661 return -EBUSY; 662 } 663 664 if (!count) 665 return 0; 666 667 result = generic_file_write_iter(iocb, from); 668 669 _leave(" = %zd", result); 670 return result; 671 } 672 673 /* 674 * flush the vnode to the fileserver 675 */ 676 int afs_writeback_all(struct afs_vnode *vnode) 677 { 678 struct address_space *mapping = vnode->vfs_inode.i_mapping; 679 struct writeback_control wbc = { 680 .sync_mode = WB_SYNC_ALL, 681 .nr_to_write = LONG_MAX, 682 .range_cyclic = 1, 683 }; 684 int ret; 685 686 _enter(""); 687 688 ret = mapping->a_ops->writepages(mapping, &wbc); 689 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 690 691 _leave(" = %d", ret); 692 return ret; 693 } 694 695 /* 696 * flush any dirty pages for this process, and check for write errors. 697 * - the return status from this call provides a reliable indication of 698 * whether any write errors occurred for this process. 699 */ 700 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 701 { 702 struct inode *inode = file_inode(file); 703 struct afs_writeback *wb, *xwb; 704 struct afs_vnode *vnode = AFS_FS_I(inode); 705 int ret; 706 707 _enter("{%x:%u},{n=%pD},%d", 708 vnode->fid.vid, vnode->fid.vnode, file, 709 datasync); 710 711 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 712 if (ret) 713 return ret; 714 inode_lock(inode); 715 716 /* use a writeback record as a marker in the queue - when this reaches 717 * the front of the queue, all the outstanding writes are either 718 * completed or rejected */ 719 wb = kzalloc(sizeof(*wb), GFP_KERNEL); 720 if (!wb) { 721 ret = -ENOMEM; 722 goto out; 723 } 724 wb->vnode = vnode; 725 wb->first = 0; 726 wb->last = -1; 727 wb->offset_first = 0; 728 wb->to_last = PAGE_SIZE; 729 wb->usage = 1; 730 wb->state = AFS_WBACK_SYNCING; 731 init_waitqueue_head(&wb->waitq); 732 733 spin_lock(&vnode->writeback_lock); 734 list_for_each_entry(xwb, &vnode->writebacks, link) { 735 if (xwb->state == AFS_WBACK_PENDING) 736 xwb->state = AFS_WBACK_CONFLICTING; 737 } 738 list_add_tail(&wb->link, &vnode->writebacks); 739 spin_unlock(&vnode->writeback_lock); 740 741 /* push all the outstanding writebacks to the server */ 742 ret = afs_writeback_all(vnode); 743 if (ret < 0) { 744 afs_put_writeback(wb); 745 _leave(" = %d [wb]", ret); 746 goto out; 747 } 748 749 /* wait for the preceding writes to actually complete */ 750 ret = wait_event_interruptible(wb->waitq, 751 wb->state == AFS_WBACK_COMPLETE || 752 vnode->writebacks.next == &wb->link); 753 afs_put_writeback(wb); 754 _leave(" = %d", ret); 755 out: 756 inode_unlock(inode); 757 return ret; 758 } 759 760 /* 761 * notification that a previously read-only page is about to become writable 762 * - if it returns an error, the caller will deliver a bus error signal 763 */ 764 int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) 765 { 766 struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); 767 768 _enter("{{%x:%u}},{%lx}", 769 vnode->fid.vid, vnode->fid.vnode, page->index); 770 771 /* wait for the page to be written to the cache before we allow it to 772 * be modified */ 773 #ifdef CONFIG_AFS_FSCACHE 774 fscache_wait_on_page_write(vnode->cache, page); 775 #endif 776 777 _leave(" = 0"); 778 return 0; 779 } 780